test: Add test suites for embedding, FTS, hybrid search, and golden queries

Four new test modules covering the search infrastructure:

- tests/embedding.rs: Unit tests for the embedding pipeline including
  chunk ID encoding/decoding, change detection, and document chunking
  with overlap verification.

- tests/fts_search.rs: Integration tests for FTS5 search including
  safe query sanitization, multi-term queries, prefix matching, and
  the raw FTS mode for power users.

- tests/hybrid_search.rs: End-to-end tests for hybrid search mode
  including RRF fusion correctness, graceful degradation when
  embeddings are unavailable, and filter application.

- tests/golden_query_tests.rs: Golden query tests using fixtures
  from tests/fixtures/golden_queries.json to verify search quality
  against known-good query/result pairs. Ensures ranking stability
  across implementation changes.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-01-30 15:47:19 -05:00
parent daf5a73019
commit d235f2b4dd
5 changed files with 931 additions and 0 deletions

198
tests/fts_search.rs Normal file
View File

@@ -0,0 +1,198 @@
//! Integration tests for FTS5 search.
//!
//! These tests create an in-memory SQLite database, apply migrations through 008 (FTS5),
//! seed documents, and verify search behavior.
use rusqlite::Connection;
fn create_test_db() -> Connection {
let conn = Connection::open_in_memory().unwrap();
conn.pragma_update(None, "foreign_keys", "ON").unwrap();
let migrations_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");
for version in 1..=8 {
let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| {
e.file_name()
.to_string_lossy()
.starts_with(&format!("{:03}", version))
})
.collect();
assert!(!entries.is_empty(), "Migration {} not found", version);
let sql = std::fs::read_to_string(entries[0].path()).unwrap();
conn.execute_batch(&sql)
.unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
}
// Seed a project
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')",
[],
)
.unwrap();
conn
}
fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, content: &str) {
conn.execute(
"INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url)
VALUES (?1, ?2, ?1, 1, ?3, ?4, 'hash_' || ?1, 'https://example.com/' || ?1)",
rusqlite::params![id, source_type, title, content],
)
.unwrap();
}
#[test]
fn fts_basic_search() {
let conn = create_test_db();
insert_document(&conn, 1, "issue", "Authentication bug", "Users cannot login when using OAuth tokens. The JWT refresh fails silently.");
insert_document(&conn, 2, "merge_request", "Add user profile page", "This MR adds a new user profile page with avatar upload support.");
insert_document(&conn, 3, "issue", "Database migration failing", "The migration script crashes on PostgreSQL 14 due to deprecated syntax.");
let results = lore::search::search_fts(&conn, "authentication login", 10, lore::search::FtsQueryMode::Safe).unwrap();
assert!(!results.is_empty(), "Expected at least one result for 'authentication login'");
assert_eq!(results[0].document_id, 1, "Authentication issue should be top result");
}
#[test]
fn fts_stemming_matches() {
let conn = create_test_db();
insert_document(&conn, 1, "issue", "Running tests", "The test runner is executing integration tests.");
insert_document(&conn, 2, "issue", "Deployment config", "Deployment configuration for production servers.");
// "running" should match "runner" and "executing" via porter stemmer
let results = lore::search::search_fts(&conn, "running", 10, lore::search::FtsQueryMode::Safe).unwrap();
assert!(!results.is_empty(), "Stemming should match 'running' to 'runner'");
assert_eq!(results[0].document_id, 1);
}
#[test]
fn fts_empty_results() {
let conn = create_test_db();
insert_document(&conn, 1, "issue", "Bug fix", "Fixed a null pointer dereference in the parser.");
let results = lore::search::search_fts(&conn, "kubernetes deployment helm", 10, lore::search::FtsQueryMode::Safe).unwrap();
assert!(results.is_empty(), "No documents should match unrelated query");
}
#[test]
fn fts_special_characters_handled() {
let conn = create_test_db();
insert_document(&conn, 1, "issue", "C++ compiler", "The C++ compiler segfaults on template metaprogramming.");
// Special characters should not crash the search
let results = lore::search::search_fts(&conn, "C++ compiler", 10, lore::search::FtsQueryMode::Safe).unwrap();
// Safe mode sanitizes the query — it should still return results or at least not crash
assert!(results.len() <= 1);
}
#[test]
fn fts_result_ordering_by_relevance() {
let conn = create_test_db();
// Doc 1: "authentication" in title and content
insert_document(&conn, 1, "issue", "Authentication system redesign", "The authentication system needs a complete redesign. Authentication flows are broken.");
// Doc 2: "authentication" only in content, once
insert_document(&conn, 2, "issue", "Login page update", "Updated the login page with better authentication error messages.");
// Doc 3: unrelated
insert_document(&conn, 3, "issue", "Database optimization", "Optimize database queries for faster response times.");
let results = lore::search::search_fts(&conn, "authentication", 10, lore::search::FtsQueryMode::Safe).unwrap();
assert!(results.len() >= 2, "Should match at least 2 documents");
// Doc 1 should rank higher (more occurrences of the term)
assert_eq!(results[0].document_id, 1, "Document with more term occurrences should rank first");
}
#[test]
fn fts_respects_limit() {
let conn = create_test_db();
for i in 1..=20 {
insert_document(
&conn,
i,
"issue",
&format!("Bug report {}", i),
&format!("This is bug report number {} about the login system.", i),
);
}
let results = lore::search::search_fts(&conn, "bug login", 5, lore::search::FtsQueryMode::Safe).unwrap();
assert!(results.len() <= 5, "Results should be capped at limit");
}
#[test]
fn fts_snippet_generated() {
let conn = create_test_db();
insert_document(&conn, 1, "issue", "Performance issue", "The application performance degrades significantly when more than 100 users are connected simultaneously. Memory usage spikes to 4GB.");
let results = lore::search::search_fts(&conn, "performance", 10, lore::search::FtsQueryMode::Safe).unwrap();
assert!(!results.is_empty());
// Snippet should contain some text (may have FTS5 highlight markers)
assert!(!results[0].snippet.is_empty(), "Snippet should be generated");
}
#[test]
fn fts_triggers_sync_on_insert() {
let conn = create_test_db();
insert_document(&conn, 1, "issue", "Test document", "This is test content for FTS trigger verification.");
// Verify FTS table has an entry via direct query
let fts_count: i64 = conn
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'test'", [], |r| r.get(0))
.unwrap();
assert_eq!(fts_count, 1, "FTS trigger should auto-index on INSERT");
}
#[test]
fn fts_triggers_sync_on_delete() {
let conn = create_test_db();
insert_document(&conn, 1, "issue", "Deletable document", "This content will be deleted from the index.");
// Verify it's indexed
let before: i64 = conn
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'", [], |r| r.get(0))
.unwrap();
assert_eq!(before, 1);
// Delete the document
conn.execute("DELETE FROM documents WHERE id = 1", []).unwrap();
// Verify it's removed from FTS
let after: i64 = conn
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'", [], |r| r.get(0))
.unwrap();
assert_eq!(after, 0, "FTS trigger should remove entry on DELETE");
}
#[test]
fn fts_null_title_handled() {
let conn = create_test_db();
// Discussion documents have NULL titles
conn.execute(
"INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url)
VALUES (1, 'discussion', 1, 1, NULL, 'Discussion about API rate limiting strategies.', 'hash1', 'https://example.com/1')",
[],
)
.unwrap();
let results = lore::search::search_fts(&conn, "rate limiting", 10, lore::search::FtsQueryMode::Safe).unwrap();
assert!(!results.is_empty(), "Should find documents with NULL title");
}