test: Add test suites for embedding, FTS, hybrid search, and golden queries
Four new test modules covering the search infrastructure: - tests/embedding.rs: Unit tests for the embedding pipeline including chunk ID encoding/decoding, change detection, and document chunking with overlap verification. - tests/fts_search.rs: Integration tests for FTS5 search including safe query sanitization, multi-term queries, prefix matching, and the raw FTS mode for power users. - tests/hybrid_search.rs: End-to-end tests for hybrid search mode including RRF fusion correctness, graceful degradation when embeddings are unavailable, and filter application. - tests/golden_query_tests.rs: Golden query tests using fixtures from tests/fixtures/golden_queries.json to verify search quality against known-good query/result pairs. Ensures ranking stability across implementation changes. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
279
tests/golden_query_tests.rs
Normal file
279
tests/golden_query_tests.rs
Normal file
@@ -0,0 +1,279 @@
|
||||
//! Golden query test suite.
|
||||
//!
|
||||
//! Verifies end-to-end search quality with known-good expected results.
|
||||
//! Uses a seeded SQLite DB with deterministic fixture data and no external
|
||||
//! dependencies (no Ollama, no GitLab).
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
use rusqlite::Connection;
|
||||
use serde::Deserialize;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use lore::search::{self, FtsQueryMode, SearchFilters, SearchMode, search_fts, apply_filters};
|
||||
|
||||
/// A golden query test case.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GoldenQuery {
|
||||
query: String,
|
||||
mode: String,
|
||||
#[serde(default)]
|
||||
filters: GoldenFilters,
|
||||
expected_doc_ids: Vec<i64>,
|
||||
min_results: usize,
|
||||
max_rank: usize,
|
||||
description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Deserialize)]
|
||||
struct GoldenFilters {
|
||||
source_type: Option<String>,
|
||||
author: Option<String>,
|
||||
project: Option<String>,
|
||||
#[serde(default)]
|
||||
labels: Vec<String>,
|
||||
}
|
||||
|
||||
fn load_golden_queries() -> Vec<GoldenQuery> {
|
||||
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("tests/fixtures/golden_queries.json");
|
||||
let content = std::fs::read_to_string(&path)
|
||||
.unwrap_or_else(|_| panic!("Failed to read golden queries fixture"));
|
||||
serde_json::from_str(&content)
|
||||
.unwrap_or_else(|e| panic!("Failed to parse golden queries: {}", e))
|
||||
}
|
||||
|
||||
/// Create an in-memory database with FTS5 schema and seed deterministic fixture data.
|
||||
fn create_seeded_db() -> Connection {
|
||||
let conn = Connection::open_in_memory().unwrap();
|
||||
conn.pragma_update(None, "foreign_keys", "ON").unwrap();
|
||||
|
||||
// Apply migrations 001-008 (FTS5)
|
||||
let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");
|
||||
for version in 1..=8 {
|
||||
let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
|
||||
.unwrap()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
e.file_name()
|
||||
.to_string_lossy()
|
||||
.starts_with(&format!("{:03}", version))
|
||||
})
|
||||
.collect();
|
||||
assert!(!entries.is_empty(), "Migration {} not found", version);
|
||||
let sql = std::fs::read_to_string(entries[0].path()).unwrap();
|
||||
conn.execute_batch(&sql)
|
||||
.unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
|
||||
}
|
||||
|
||||
// Seed project
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
|
||||
VALUES (1, 100, 'group/project', 'https://gitlab.example.com/group/project')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Seed deterministic documents
|
||||
let documents = vec![
|
||||
// id=1: Auth issue (matches: authentication, login, OAuth, JWT, token, refresh)
|
||||
(1, "issue", "Authentication and login broken with OAuth",
|
||||
"Users cannot login when using OAuth tokens. The JWT token refresh fails silently, \
|
||||
causing authentication errors. When the access token expires, the refresh flow returns \
|
||||
a 401 instead of fetching new credentials. Login page shows a generic error. \
|
||||
Multiple users reported authentication failures across all OAuth providers.",
|
||||
"testuser"),
|
||||
|
||||
// id=2: User profile MR (matches: user, profile, avatar, upload)
|
||||
(2, "merge_request", "Add user profile page with avatar upload",
|
||||
"This merge request adds a new user profile page. Users can now upload their avatar, \
|
||||
edit their display name, and manage notification preferences. The profile page includes \
|
||||
responsive design for mobile and desktop viewports.",
|
||||
"developer1"),
|
||||
|
||||
// id=3: Database migration issue (matches: database, migration, PostgreSQL, schema)
|
||||
(3, "issue", "Database migration failing on PostgreSQL 14",
|
||||
"The database migration script crashes on PostgreSQL 14 due to deprecated syntax. \
|
||||
The ALTER TABLE command uses a syntax removed in PG14. Migration 042 needs to be \
|
||||
rewritten to use the new schema modification syntax. All staging environments affected.",
|
||||
"dba_admin"),
|
||||
|
||||
// id=4: Performance MR (matches: performance, optimization, caching, query)
|
||||
(4, "merge_request", "Performance optimization for dashboard queries",
|
||||
"Optimized the dashboard query performance by adding database indexes and implementing \
|
||||
Redis caching for frequently accessed reports. Query execution time reduced from 3.2s \
|
||||
to 180ms. Added connection pooling and prepared statement caching.",
|
||||
"senior_dev"),
|
||||
|
||||
// id=5: API rate limiting discussion (matches: API, rate, limiting, throttle)
|
||||
(5, "discussion", "API rate limiting strategies for public endpoints",
|
||||
"Discussion about implementing API rate limiting on public-facing endpoints. \
|
||||
Proposed approaches: token bucket with sliding window, fixed window counters, \
|
||||
or leaky bucket algorithm. Rate limits should be configurable per API key tier. \
|
||||
Need to handle burst traffic during peak hours without throttling legitimate users.",
|
||||
"architect"),
|
||||
|
||||
// id=6: UI/CSS issue (matches: CSS, styling, frontend, responsive, UI)
|
||||
(6, "issue", "CSS styling issues on mobile frontend",
|
||||
"Multiple CSS styling problems on the mobile frontend. The navigation menu overlaps \
|
||||
content on screens smaller than 768px. Button text truncates on compact viewports. \
|
||||
Frontend responsive breakpoints need adjustment. The UI components library has \
|
||||
conflicting CSS specificity with the theme system.",
|
||||
"frontend_dev"),
|
||||
|
||||
// id=7: CI/CD MR (matches: CI, CD, pipeline, deployment, Docker)
|
||||
(7, "merge_request", "Revamp CI/CD pipeline with Docker caching",
|
||||
"Complete overhaul of the CI/CD pipeline. Added Docker layer caching to speed up \
|
||||
builds. Deployment stages now run in parallel where possible. Added rollback \
|
||||
support for failed deployments. Pipeline runtime reduced from 45min to 12min.",
|
||||
"devops_lead"),
|
||||
|
||||
// id=8: Security issue (matches: security, vulnerability, XSS, injection)
|
||||
(8, "issue", "Security vulnerability in form submission",
|
||||
"A cross-site scripting (XSS) vulnerability was found in the comment submission form. \
|
||||
User input is not properly sanitized before rendering. The security scanner also flagged \
|
||||
potential SQL injection in the search endpoint. Both vulnerabilities need immediate patching.",
|
||||
"security_team"),
|
||||
];
|
||||
|
||||
for (id, source_type, title, content, author) in &documents {
|
||||
conn.execute(
|
||||
"INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url, author_username)
|
||||
VALUES (?1, ?2, ?1, 1, ?3, ?4, 'hash_' || ?1, 'https://gitlab.example.com/group/project/-/' || ?2 || 's/' || ?1, ?5)",
|
||||
rusqlite::params![id, source_type, title, content, author],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Seed labels for filtered queries
|
||||
conn.execute_batch(
|
||||
"INSERT INTO document_labels (document_id, label_name) VALUES (1, 'bug');
|
||||
INSERT INTO document_labels (document_id, label_name) VALUES (1, 'authentication');
|
||||
INSERT INTO document_labels (document_id, label_name) VALUES (3, 'bug');
|
||||
INSERT INTO document_labels (document_id, label_name) VALUES (3, 'database');
|
||||
INSERT INTO document_labels (document_id, label_name) VALUES (6, 'bug');
|
||||
INSERT INTO document_labels (document_id, label_name) VALUES (6, 'frontend');
|
||||
INSERT INTO document_labels (document_id, label_name) VALUES (8, 'security');
|
||||
INSERT INTO document_labels (document_id, label_name) VALUES (8, 'critical');",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn
|
||||
}
|
||||
|
||||
fn build_search_filters(golden: &GoldenFilters) -> SearchFilters {
|
||||
let source_type = golden.source_type.as_deref().and_then(|s| match s {
|
||||
"issue" => Some(lore::documents::SourceType::Issue),
|
||||
"merge_request" => Some(lore::documents::SourceType::MergeRequest),
|
||||
"discussion" => Some(lore::documents::SourceType::Discussion),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
SearchFilters {
|
||||
source_type,
|
||||
author: golden.author.clone(),
|
||||
labels: golden.labels.clone(),
|
||||
limit: 100,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn golden_queries_all_pass() {
|
||||
let queries = load_golden_queries();
|
||||
let conn = create_seeded_db();
|
||||
|
||||
let mut failures: Vec<String> = Vec::new();
|
||||
|
||||
for (i, gq) in queries.iter().enumerate() {
|
||||
let mode = SearchMode::parse(&gq.mode).unwrap_or(SearchMode::Lexical);
|
||||
|
||||
// For lexical-only golden queries (no Ollama needed)
|
||||
assert_eq!(
|
||||
mode,
|
||||
SearchMode::Lexical,
|
||||
"Golden query {} uses non-lexical mode '{}' which requires Ollama — not supported in CI",
|
||||
i,
|
||||
gq.mode
|
||||
);
|
||||
|
||||
// Run FTS search
|
||||
let fts_results = search_fts(&conn, &gq.query, 50, FtsQueryMode::Safe).unwrap();
|
||||
let doc_ids: Vec<i64> = fts_results.iter().map(|r| r.document_id).collect();
|
||||
|
||||
// Apply filters if any
|
||||
let filters = build_search_filters(&gq.filters);
|
||||
let filtered_ids = if filters.has_any_filter() {
|
||||
apply_filters(&conn, &doc_ids, &filters).unwrap()
|
||||
} else {
|
||||
doc_ids.clone()
|
||||
};
|
||||
|
||||
// Check min_results
|
||||
if filtered_ids.len() < gq.min_results {
|
||||
failures.push(format!(
|
||||
"FAIL [{}] \"{}\": expected >= {} results, got {} (description: {})",
|
||||
i, gq.query, gq.min_results, filtered_ids.len(), gq.description
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check each expected doc_id is in top max_rank
|
||||
for expected_id in &gq.expected_doc_ids {
|
||||
let position = filtered_ids.iter().position(|id| id == expected_id);
|
||||
match position {
|
||||
Some(pos) if pos < gq.max_rank => {
|
||||
// Pass
|
||||
}
|
||||
Some(pos) => {
|
||||
failures.push(format!(
|
||||
"FAIL [{}] \"{}\": expected doc_id {} in top {}, found at rank {} (description: {})",
|
||||
i, gq.query, expected_id, gq.max_rank, pos + 1, gq.description
|
||||
));
|
||||
}
|
||||
None => {
|
||||
failures.push(format!(
|
||||
"FAIL [{}] \"{}\": expected doc_id {} not found in results {:?} (description: {})",
|
||||
i, gq.query, expected_id, filtered_ids, gq.description
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !failures.is_empty() {
|
||||
panic!(
|
||||
"Golden query failures ({}/{}):\n{}",
|
||||
failures.len(),
|
||||
queries.len(),
|
||||
failures.join("\n")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn golden_queries_fixture_is_valid() {
|
||||
let queries = load_golden_queries();
|
||||
assert!(
|
||||
queries.len() >= 5,
|
||||
"Golden queries fixture should have at least 5 queries, got {}",
|
||||
queries.len()
|
||||
);
|
||||
|
||||
for (i, gq) in queries.iter().enumerate() {
|
||||
assert!(!gq.query.is_empty(), "Query {} has empty query string", i);
|
||||
assert!(
|
||||
!gq.expected_doc_ids.is_empty(),
|
||||
"Query {} has no expected doc IDs",
|
||||
i
|
||||
);
|
||||
assert!(gq.min_results > 0, "Query {} has min_results=0", i);
|
||||
assert!(gq.max_rank > 0, "Query {} has max_rank=0", i);
|
||||
assert!(
|
||||
SearchMode::parse(&gq.mode).is_some(),
|
||||
"Query {} has invalid mode '{}'",
|
||||
i,
|
||||
gq.mode
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user