Files
gitlore/tests/golden_query_tests.rs
Taylor Eernisse d235f2b4dd test: Add test suites for embedding, FTS, hybrid search, and golden queries
Four new test modules covering the search infrastructure:

- tests/embedding.rs: Unit tests for the embedding pipeline including
  chunk ID encoding/decoding, change detection, and document chunking
  with overlap verification.

- tests/fts_search.rs: Integration tests for FTS5 search including
  safe query sanitization, multi-term queries, prefix matching, and
  the raw FTS mode for power users.

- tests/hybrid_search.rs: End-to-end tests for hybrid search mode
  including RRF fusion correctness, graceful degradation when
  embeddings are unavailable, and filter application.

- tests/golden_query_tests.rs: Golden query tests using fixtures
  from tests/fixtures/golden_queries.json to verify search quality
  against known-good query/result pairs. Ensures ranking stability
  across implementation changes.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 15:47:19 -05:00

280 lines
11 KiB
Rust

//! Golden query test suite.
//!
//! Verifies end-to-end search quality with known-good expected results.
//! Uses a seeded SQLite DB with deterministic fixture data and no external
//! dependencies (no Ollama, no GitLab).
#![allow(dead_code)]
use rusqlite::Connection;
use serde::Deserialize;
use std::path::PathBuf;
use lore::search::{self, FtsQueryMode, SearchFilters, SearchMode, search_fts, apply_filters};
/// A golden query test case.
#[derive(Debug, Deserialize)]
struct GoldenQuery {
query: String,
mode: String,
#[serde(default)]
filters: GoldenFilters,
expected_doc_ids: Vec<i64>,
min_results: usize,
max_rank: usize,
description: String,
}
#[derive(Debug, Default, Deserialize)]
struct GoldenFilters {
source_type: Option<String>,
author: Option<String>,
project: Option<String>,
#[serde(default)]
labels: Vec<String>,
}
fn load_golden_queries() -> Vec<GoldenQuery> {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/golden_queries.json");
let content = std::fs::read_to_string(&path)
.unwrap_or_else(|_| panic!("Failed to read golden queries fixture"));
serde_json::from_str(&content)
.unwrap_or_else(|e| panic!("Failed to parse golden queries: {}", e))
}
/// Create an in-memory database with FTS5 schema and seed deterministic fixture data.
fn create_seeded_db() -> Connection {
let conn = Connection::open_in_memory().unwrap();
conn.pragma_update(None, "foreign_keys", "ON").unwrap();
// Apply migrations 001-008 (FTS5)
let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");
for version in 1..=8 {
let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| {
e.file_name()
.to_string_lossy()
.starts_with(&format!("{:03}", version))
})
.collect();
assert!(!entries.is_empty(), "Migration {} not found", version);
let sql = std::fs::read_to_string(entries[0].path()).unwrap();
conn.execute_batch(&sql)
.unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
}
// Seed project
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
VALUES (1, 100, 'group/project', 'https://gitlab.example.com/group/project')",
[],
)
.unwrap();
// Seed deterministic documents
let documents = vec![
// id=1: Auth issue (matches: authentication, login, OAuth, JWT, token, refresh)
(1, "issue", "Authentication and login broken with OAuth",
"Users cannot login when using OAuth tokens. The JWT token refresh fails silently, \
causing authentication errors. When the access token expires, the refresh flow returns \
a 401 instead of fetching new credentials. Login page shows a generic error. \
Multiple users reported authentication failures across all OAuth providers.",
"testuser"),
// id=2: User profile MR (matches: user, profile, avatar, upload)
(2, "merge_request", "Add user profile page with avatar upload",
"This merge request adds a new user profile page. Users can now upload their avatar, \
edit their display name, and manage notification preferences. The profile page includes \
responsive design for mobile and desktop viewports.",
"developer1"),
// id=3: Database migration issue (matches: database, migration, PostgreSQL, schema)
(3, "issue", "Database migration failing on PostgreSQL 14",
"The database migration script crashes on PostgreSQL 14 due to deprecated syntax. \
The ALTER TABLE command uses a syntax removed in PG14. Migration 042 needs to be \
rewritten to use the new schema modification syntax. All staging environments affected.",
"dba_admin"),
// id=4: Performance MR (matches: performance, optimization, caching, query)
(4, "merge_request", "Performance optimization for dashboard queries",
"Optimized the dashboard query performance by adding database indexes and implementing \
Redis caching for frequently accessed reports. Query execution time reduced from 3.2s \
to 180ms. Added connection pooling and prepared statement caching.",
"senior_dev"),
// id=5: API rate limiting discussion (matches: API, rate, limiting, throttle)
(5, "discussion", "API rate limiting strategies for public endpoints",
"Discussion about implementing API rate limiting on public-facing endpoints. \
Proposed approaches: token bucket with sliding window, fixed window counters, \
or leaky bucket algorithm. Rate limits should be configurable per API key tier. \
Need to handle burst traffic during peak hours without throttling legitimate users.",
"architect"),
// id=6: UI/CSS issue (matches: CSS, styling, frontend, responsive, UI)
(6, "issue", "CSS styling issues on mobile frontend",
"Multiple CSS styling problems on the mobile frontend. The navigation menu overlaps \
content on screens smaller than 768px. Button text truncates on compact viewports. \
Frontend responsive breakpoints need adjustment. The UI components library has \
conflicting CSS specificity with the theme system.",
"frontend_dev"),
// id=7: CI/CD MR (matches: CI, CD, pipeline, deployment, Docker)
(7, "merge_request", "Revamp CI/CD pipeline with Docker caching",
"Complete overhaul of the CI/CD pipeline. Added Docker layer caching to speed up \
builds. Deployment stages now run in parallel where possible. Added rollback \
support for failed deployments. Pipeline runtime reduced from 45min to 12min.",
"devops_lead"),
// id=8: Security issue (matches: security, vulnerability, XSS, injection)
(8, "issue", "Security vulnerability in form submission",
"A cross-site scripting (XSS) vulnerability was found in the comment submission form. \
User input is not properly sanitized before rendering. The security scanner also flagged \
potential SQL injection in the search endpoint. Both vulnerabilities need immediate patching.",
"security_team"),
];
for (id, source_type, title, content, author) in &documents {
conn.execute(
"INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url, author_username)
VALUES (?1, ?2, ?1, 1, ?3, ?4, 'hash_' || ?1, 'https://gitlab.example.com/group/project/-/' || ?2 || 's/' || ?1, ?5)",
rusqlite::params![id, source_type, title, content, author],
)
.unwrap();
}
// Seed labels for filtered queries
conn.execute_batch(
"INSERT INTO document_labels (document_id, label_name) VALUES (1, 'bug');
INSERT INTO document_labels (document_id, label_name) VALUES (1, 'authentication');
INSERT INTO document_labels (document_id, label_name) VALUES (3, 'bug');
INSERT INTO document_labels (document_id, label_name) VALUES (3, 'database');
INSERT INTO document_labels (document_id, label_name) VALUES (6, 'bug');
INSERT INTO document_labels (document_id, label_name) VALUES (6, 'frontend');
INSERT INTO document_labels (document_id, label_name) VALUES (8, 'security');
INSERT INTO document_labels (document_id, label_name) VALUES (8, 'critical');",
)
.unwrap();
conn
}
fn build_search_filters(golden: &GoldenFilters) -> SearchFilters {
let source_type = golden.source_type.as_deref().and_then(|s| match s {
"issue" => Some(lore::documents::SourceType::Issue),
"merge_request" => Some(lore::documents::SourceType::MergeRequest),
"discussion" => Some(lore::documents::SourceType::Discussion),
_ => None,
});
SearchFilters {
source_type,
author: golden.author.clone(),
labels: golden.labels.clone(),
limit: 100,
..Default::default()
}
}
#[test]
fn golden_queries_all_pass() {
let queries = load_golden_queries();
let conn = create_seeded_db();
let mut failures: Vec<String> = Vec::new();
for (i, gq) in queries.iter().enumerate() {
let mode = SearchMode::parse(&gq.mode).unwrap_or(SearchMode::Lexical);
// For lexical-only golden queries (no Ollama needed)
assert_eq!(
mode,
SearchMode::Lexical,
"Golden query {} uses non-lexical mode '{}' which requires Ollama — not supported in CI",
i,
gq.mode
);
// Run FTS search
let fts_results = search_fts(&conn, &gq.query, 50, FtsQueryMode::Safe).unwrap();
let doc_ids: Vec<i64> = fts_results.iter().map(|r| r.document_id).collect();
// Apply filters if any
let filters = build_search_filters(&gq.filters);
let filtered_ids = if filters.has_any_filter() {
apply_filters(&conn, &doc_ids, &filters).unwrap()
} else {
doc_ids.clone()
};
// Check min_results
if filtered_ids.len() < gq.min_results {
failures.push(format!(
"FAIL [{}] \"{}\": expected >= {} results, got {} (description: {})",
i, gq.query, gq.min_results, filtered_ids.len(), gq.description
));
continue;
}
// Check each expected doc_id is in top max_rank
for expected_id in &gq.expected_doc_ids {
let position = filtered_ids.iter().position(|id| id == expected_id);
match position {
Some(pos) if pos < gq.max_rank => {
// Pass
}
Some(pos) => {
failures.push(format!(
"FAIL [{}] \"{}\": expected doc_id {} in top {}, found at rank {} (description: {})",
i, gq.query, expected_id, gq.max_rank, pos + 1, gq.description
));
}
None => {
failures.push(format!(
"FAIL [{}] \"{}\": expected doc_id {} not found in results {:?} (description: {})",
i, gq.query, expected_id, filtered_ids, gq.description
));
}
}
}
}
if !failures.is_empty() {
panic!(
"Golden query failures ({}/{}):\n{}",
failures.len(),
queries.len(),
failures.join("\n")
);
}
}
#[test]
fn golden_queries_fixture_is_valid() {
let queries = load_golden_queries();
assert!(
queries.len() >= 5,
"Golden queries fixture should have at least 5 queries, got {}",
queries.len()
);
for (i, gq) in queries.iter().enumerate() {
assert!(!gq.query.is_empty(), "Query {} has empty query string", i);
assert!(
!gq.expected_doc_ids.is_empty(),
"Query {} has no expected doc IDs",
i
);
assert!(gq.min_results > 0, "Query {} has min_results=0", i);
assert!(gq.max_rank > 0, "Query {} has max_rank=0", i);
assert!(
SearchMode::parse(&gq.mode).is_some(),
"Query {} has invalid mode '{}'",
i,
gq.mode
);
}
}