test: Remove redundant comments from test files
Applies the same doc comment cleanup to test files: - Removes test module headers (//! lines) - Removes obvious test function comments - Retains comments explaining non-obvious test scenarios Test names should be descriptive enough to convey intent without additional comments. Complex test setup or assertions that need explanation retain their comments. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,8 @@
|
||||
//! Integration tests for embedding storage and vector search.
|
||||
//!
|
||||
//! These tests create an in-memory SQLite database with sqlite-vec loaded,
|
||||
//! apply all migrations through 010 (chunk config), and verify KNN search
|
||||
//! and metadata operations.
|
||||
|
||||
use lore::core::db::create_connection;
|
||||
use rusqlite::Connection;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Create a test DB on disk (required for sqlite-vec which needs the extension loaded).
|
||||
/// Uses create_connection to get the sqlite-vec extension registered.
|
||||
fn create_test_db() -> (TempDir, Connection) {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let db_path = tmp.path().join("test.db");
|
||||
@@ -35,7 +27,6 @@ fn create_test_db() -> (TempDir, Connection) {
|
||||
.unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
|
||||
}
|
||||
|
||||
// Seed a project
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')",
|
||||
[],
|
||||
@@ -54,7 +45,6 @@ fn insert_document(conn: &Connection, id: i64, title: &str, content: &str) {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Create a 768-dim vector with a specific dimension set to 1.0 (unit vector along axis).
|
||||
fn axis_vector(dim: usize) -> Vec<f32> {
|
||||
let mut v = vec![0.0f32; 768];
|
||||
v[dim] = 1.0;
|
||||
@@ -89,12 +79,10 @@ fn knn_search_returns_nearest_neighbors() {
|
||||
insert_document(&conn, 2, "Doc B", "Content about database optimization.");
|
||||
insert_document(&conn, 3, "Doc C", "Content about logging infrastructure.");
|
||||
|
||||
// Doc 1: axis 0, Doc 2: axis 1, Doc 3: axis 2
|
||||
insert_embedding(&conn, 1, 0, &axis_vector(0));
|
||||
insert_embedding(&conn, 2, 0, &axis_vector(1));
|
||||
insert_embedding(&conn, 3, 0, &axis_vector(2));
|
||||
|
||||
// Query vector close to axis 0 (should match doc 1)
|
||||
let mut query = vec![0.0f32; 768];
|
||||
query[0] = 0.9;
|
||||
query[1] = 0.1;
|
||||
@@ -132,7 +120,6 @@ fn knn_search_deduplicates_chunks() {
|
||||
"Very long content that was chunked.",
|
||||
);
|
||||
|
||||
// Same document, two chunks, both similar to query
|
||||
let mut v1 = vec![0.0f32; 768];
|
||||
v1[0] = 1.0;
|
||||
let mut v2 = vec![0.0f32; 768];
|
||||
@@ -144,7 +131,6 @@ fn knn_search_deduplicates_chunks() {
|
||||
|
||||
let results = lore::search::search_vector(&conn, &axis_vector(0), 10).unwrap();
|
||||
|
||||
// Should deduplicate: same document_id appears at most once
|
||||
let unique_docs: std::collections::HashSet<i64> =
|
||||
results.iter().map(|r| r.document_id).collect();
|
||||
assert_eq!(
|
||||
@@ -161,7 +147,6 @@ fn orphan_trigger_deletes_embeddings_on_document_delete() {
|
||||
insert_document(&conn, 1, "Will be deleted", "Content.");
|
||||
insert_embedding(&conn, 1, 0, &axis_vector(0));
|
||||
|
||||
// Verify embedding exists
|
||||
let count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM embeddings WHERE rowid = 1000",
|
||||
@@ -171,11 +156,9 @@ fn orphan_trigger_deletes_embeddings_on_document_delete() {
|
||||
.unwrap();
|
||||
assert_eq!(count, 1, "Embedding should exist before delete");
|
||||
|
||||
// Delete the document
|
||||
conn.execute("DELETE FROM documents WHERE id = 1", [])
|
||||
.unwrap();
|
||||
|
||||
// Verify embedding was cascade-deleted via trigger
|
||||
let count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM embeddings WHERE rowid = 1000",
|
||||
@@ -188,7 +171,6 @@ fn orphan_trigger_deletes_embeddings_on_document_delete() {
|
||||
"Trigger should delete embeddings when document is deleted"
|
||||
);
|
||||
|
||||
// Verify metadata was cascade-deleted via FK
|
||||
let meta_count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM embedding_metadata WHERE document_id = 1",
|
||||
@@ -207,19 +189,12 @@ fn empty_database_returns_no_results() {
|
||||
assert!(results.is_empty(), "Empty DB should return no results");
|
||||
}
|
||||
|
||||
// --- Bug-fix regression tests ---
|
||||
|
||||
#[test]
|
||||
fn overflow_doc_with_error_sentinel_not_re_detected_as_pending() {
|
||||
// Bug 2: Documents skipped for chunk overflow must record a sentinel error
|
||||
// in embedding_metadata so they are not re-detected as pending on subsequent
|
||||
// pipeline runs (which would cause an infinite re-processing loop).
|
||||
let (_tmp, conn) = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "Overflow doc", "Some content");
|
||||
|
||||
// Simulate what the pipeline does when a document exceeds CHUNK_ROWID_MULTIPLIER:
|
||||
// it records an error sentinel at chunk_index=0.
|
||||
let now = chrono::Utc::now().timestamp_millis();
|
||||
conn.execute(
|
||||
"INSERT INTO embedding_metadata
|
||||
@@ -230,7 +205,6 @@ fn overflow_doc_with_error_sentinel_not_re_detected_as_pending() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Now find_pending_documents should NOT return this document
|
||||
let pending =
|
||||
lore::embedding::find_pending_documents(&conn, 100, 0, "nomic-embed-text").unwrap();
|
||||
assert!(
|
||||
@@ -239,7 +213,6 @@ fn overflow_doc_with_error_sentinel_not_re_detected_as_pending() {
|
||||
pending.len()
|
||||
);
|
||||
|
||||
// count_pending_documents should also return 0
|
||||
let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
|
||||
assert_eq!(
|
||||
count, 0,
|
||||
@@ -249,11 +222,8 @@ fn overflow_doc_with_error_sentinel_not_re_detected_as_pending() {
|
||||
|
||||
#[test]
|
||||
fn count_and_find_pending_agree() {
|
||||
// Bug 1: count_pending_documents and find_pending_documents must use
|
||||
// logically equivalent WHERE clauses to produce consistent results.
|
||||
let (_tmp, conn) = create_test_db();
|
||||
|
||||
// Case 1: No documents at all
|
||||
let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
|
||||
let found =
|
||||
lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
|
||||
@@ -263,7 +233,6 @@ fn count_and_find_pending_agree() {
|
||||
"Empty DB: count and find should agree"
|
||||
);
|
||||
|
||||
// Case 2: New document (no metadata)
|
||||
insert_document(&conn, 1, "New doc", "Content");
|
||||
let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
|
||||
let found =
|
||||
@@ -275,7 +244,6 @@ fn count_and_find_pending_agree() {
|
||||
);
|
||||
assert_eq!(count, 1);
|
||||
|
||||
// Case 3: Document with matching metadata (not pending)
|
||||
let now = chrono::Utc::now().timestamp_millis();
|
||||
conn.execute(
|
||||
"INSERT INTO embedding_metadata
|
||||
@@ -295,7 +263,6 @@ fn count_and_find_pending_agree() {
|
||||
);
|
||||
assert_eq!(count, 0);
|
||||
|
||||
// Case 4: Config drift (chunk_max_bytes mismatch)
|
||||
conn.execute(
|
||||
"UPDATE embedding_metadata SET chunk_max_bytes = 999 WHERE document_id = 1",
|
||||
[],
|
||||
@@ -314,14 +281,11 @@ fn count_and_find_pending_agree() {
|
||||
|
||||
#[test]
|
||||
fn full_embed_delete_is_atomic() {
|
||||
// Bug 7: The --full flag's two DELETE statements should be atomic.
|
||||
// This test verifies that both tables are cleared together.
|
||||
let (_tmp, conn) = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "Doc", "Content");
|
||||
insert_embedding(&conn, 1, 0, &axis_vector(0));
|
||||
|
||||
// Verify data exists
|
||||
let meta_count: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM embedding_metadata", [], |r| r.get(0))
|
||||
.unwrap();
|
||||
@@ -331,7 +295,6 @@ fn full_embed_delete_is_atomic() {
|
||||
assert_eq!(meta_count, 1);
|
||||
assert_eq!(embed_count, 1);
|
||||
|
||||
// Execute the atomic delete (same as embed.rs --full)
|
||||
conn.execute_batch(
|
||||
"BEGIN;
|
||||
DELETE FROM embedding_metadata;
|
||||
|
||||
Reference in New Issue
Block a user