test: Add test suites for embedding, FTS, hybrid search, and golden queries
Four new test modules covering the search infrastructure: - tests/embedding.rs: Unit tests for the embedding pipeline including chunk ID encoding/decoding, change detection, and document chunking with overlap verification. - tests/fts_search.rs: Integration tests for FTS5 search including safe query sanitization, multi-term queries, prefix matching, and the raw FTS mode for power users. - tests/hybrid_search.rs: End-to-end tests for hybrid search mode including RRF fusion correctness, graceful degradation when embeddings are unavailable, and filter application. - tests/golden_query_tests.rs: Golden query tests using fixtures from tests/fixtures/golden_queries.json to verify search quality against known-good query/result pairs. Ensures ranking stability across implementation changes. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
198
tests/fts_search.rs
Normal file
198
tests/fts_search.rs
Normal file
@@ -0,0 +1,198 @@
|
||||
//! Integration tests for FTS5 search.
|
||||
//!
|
||||
//! These tests create an in-memory SQLite database, apply migrations through 008 (FTS5),
|
||||
//! seed documents, and verify search behavior.
|
||||
|
||||
use rusqlite::Connection;
|
||||
|
||||
fn create_test_db() -> Connection {
|
||||
let conn = Connection::open_in_memory().unwrap();
|
||||
conn.pragma_update(None, "foreign_keys", "ON").unwrap();
|
||||
|
||||
let migrations_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");
|
||||
|
||||
for version in 1..=8 {
|
||||
let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
|
||||
.unwrap()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
e.file_name()
|
||||
.to_string_lossy()
|
||||
.starts_with(&format!("{:03}", version))
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert!(!entries.is_empty(), "Migration {} not found", version);
|
||||
let sql = std::fs::read_to_string(entries[0].path()).unwrap();
|
||||
conn.execute_batch(&sql)
|
||||
.unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
|
||||
}
|
||||
|
||||
// Seed a project
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn
|
||||
}
|
||||
|
||||
fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, content: &str) {
|
||||
conn.execute(
|
||||
"INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url)
|
||||
VALUES (?1, ?2, ?1, 1, ?3, ?4, 'hash_' || ?1, 'https://example.com/' || ?1)",
|
||||
rusqlite::params![id, source_type, title, content],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_basic_search() {
|
||||
let conn = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "issue", "Authentication bug", "Users cannot login when using OAuth tokens. The JWT refresh fails silently.");
|
||||
insert_document(&conn, 2, "merge_request", "Add user profile page", "This MR adds a new user profile page with avatar upload support.");
|
||||
insert_document(&conn, 3, "issue", "Database migration failing", "The migration script crashes on PostgreSQL 14 due to deprecated syntax.");
|
||||
|
||||
let results = lore::search::search_fts(&conn, "authentication login", 10, lore::search::FtsQueryMode::Safe).unwrap();
|
||||
|
||||
assert!(!results.is_empty(), "Expected at least one result for 'authentication login'");
|
||||
assert_eq!(results[0].document_id, 1, "Authentication issue should be top result");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_stemming_matches() {
|
||||
let conn = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "issue", "Running tests", "The test runner is executing integration tests.");
|
||||
insert_document(&conn, 2, "issue", "Deployment config", "Deployment configuration for production servers.");
|
||||
|
||||
// "running" should match "runner" and "executing" via porter stemmer
|
||||
let results = lore::search::search_fts(&conn, "running", 10, lore::search::FtsQueryMode::Safe).unwrap();
|
||||
assert!(!results.is_empty(), "Stemming should match 'running' to 'runner'");
|
||||
assert_eq!(results[0].document_id, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_empty_results() {
|
||||
let conn = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "issue", "Bug fix", "Fixed a null pointer dereference in the parser.");
|
||||
|
||||
let results = lore::search::search_fts(&conn, "kubernetes deployment helm", 10, lore::search::FtsQueryMode::Safe).unwrap();
|
||||
assert!(results.is_empty(), "No documents should match unrelated query");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_special_characters_handled() {
|
||||
let conn = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "issue", "C++ compiler", "The C++ compiler segfaults on template metaprogramming.");
|
||||
|
||||
// Special characters should not crash the search
|
||||
let results = lore::search::search_fts(&conn, "C++ compiler", 10, lore::search::FtsQueryMode::Safe).unwrap();
|
||||
// Safe mode sanitizes the query — it should still return results or at least not crash
|
||||
assert!(results.len() <= 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_result_ordering_by_relevance() {
|
||||
let conn = create_test_db();
|
||||
|
||||
// Doc 1: "authentication" in title and content
|
||||
insert_document(&conn, 1, "issue", "Authentication system redesign", "The authentication system needs a complete redesign. Authentication flows are broken.");
|
||||
// Doc 2: "authentication" only in content, once
|
||||
insert_document(&conn, 2, "issue", "Login page update", "Updated the login page with better authentication error messages.");
|
||||
// Doc 3: unrelated
|
||||
insert_document(&conn, 3, "issue", "Database optimization", "Optimize database queries for faster response times.");
|
||||
|
||||
let results = lore::search::search_fts(&conn, "authentication", 10, lore::search::FtsQueryMode::Safe).unwrap();
|
||||
|
||||
assert!(results.len() >= 2, "Should match at least 2 documents");
|
||||
// Doc 1 should rank higher (more occurrences of the term)
|
||||
assert_eq!(results[0].document_id, 1, "Document with more term occurrences should rank first");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_respects_limit() {
|
||||
let conn = create_test_db();
|
||||
|
||||
for i in 1..=20 {
|
||||
insert_document(
|
||||
&conn,
|
||||
i,
|
||||
"issue",
|
||||
&format!("Bug report {}", i),
|
||||
&format!("This is bug report number {} about the login system.", i),
|
||||
);
|
||||
}
|
||||
|
||||
let results = lore::search::search_fts(&conn, "bug login", 5, lore::search::FtsQueryMode::Safe).unwrap();
|
||||
assert!(results.len() <= 5, "Results should be capped at limit");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_snippet_generated() {
|
||||
let conn = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "issue", "Performance issue", "The application performance degrades significantly when more than 100 users are connected simultaneously. Memory usage spikes to 4GB.");
|
||||
|
||||
let results = lore::search::search_fts(&conn, "performance", 10, lore::search::FtsQueryMode::Safe).unwrap();
|
||||
|
||||
assert!(!results.is_empty());
|
||||
// Snippet should contain some text (may have FTS5 highlight markers)
|
||||
assert!(!results[0].snippet.is_empty(), "Snippet should be generated");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_triggers_sync_on_insert() {
|
||||
let conn = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "issue", "Test document", "This is test content for FTS trigger verification.");
|
||||
|
||||
// Verify FTS table has an entry via direct query
|
||||
let fts_count: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'test'", [], |r| r.get(0))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(fts_count, 1, "FTS trigger should auto-index on INSERT");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_triggers_sync_on_delete() {
|
||||
let conn = create_test_db();
|
||||
|
||||
insert_document(&conn, 1, "issue", "Deletable document", "This content will be deleted from the index.");
|
||||
|
||||
// Verify it's indexed
|
||||
let before: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'", [], |r| r.get(0))
|
||||
.unwrap();
|
||||
assert_eq!(before, 1);
|
||||
|
||||
// Delete the document
|
||||
conn.execute("DELETE FROM documents WHERE id = 1", []).unwrap();
|
||||
|
||||
// Verify it's removed from FTS
|
||||
let after: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'", [], |r| r.get(0))
|
||||
.unwrap();
|
||||
assert_eq!(after, 0, "FTS trigger should remove entry on DELETE");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fts_null_title_handled() {
|
||||
let conn = create_test_db();
|
||||
|
||||
// Discussion documents have NULL titles
|
||||
conn.execute(
|
||||
"INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url)
|
||||
VALUES (1, 'discussion', 1, 1, NULL, 'Discussion about API rate limiting strategies.', 'hash1', 'https://example.com/1')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let results = lore::search::search_fts(&conn, "rate limiting", 10, lore::search::FtsQueryMode::Safe).unwrap();
|
||||
assert!(!results.is_empty(), "Should find documents with NULL title");
|
||||
}
|
||||
Reference in New Issue
Block a user