Automated formatting and lint corrections from parallel agent work: - cargo fmt: import reordering (alphabetical), line wrapping to respect max width, trailing comma normalization, destructuring alignment, function signature reformatting, match arm formatting - clippy (pedantic): Range::contains() instead of manual comparisons, i64::from() instead of `as i64` casts, .clamp() instead of .max().min() chains, let-chain refactors (if-let with &&), #[allow(clippy::too_many_arguments)] and #[allow(clippy::field_reassign_with_default)] where warranted - Removed trailing blank lines and extra whitespace No behavioral changes. All existing tests pass unmodified. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
247 lines
7.1 KiB
Rust
247 lines
7.1 KiB
Rust
//! Integration tests for hybrid search combining FTS + vector.
|
|
//!
|
|
//! Tests all three search modes (lexical, semantic, hybrid) and
|
|
//! verifies graceful degradation when embeddings are unavailable.
|
|
|
|
use lore::core::db::create_connection;
|
|
use lore::search::{FtsQueryMode, SearchFilters, SearchMode, search_fts, search_hybrid};
|
|
use rusqlite::Connection;
|
|
use std::path::PathBuf;
|
|
use tempfile::TempDir;
|
|
|
|
fn create_test_db() -> (TempDir, Connection) {
|
|
let tmp = TempDir::new().unwrap();
|
|
let db_path = tmp.path().join("test.db");
|
|
let conn = create_connection(&db_path).unwrap();
|
|
|
|
let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");
|
|
|
|
for version in 1..=9 {
|
|
let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
|
|
.unwrap()
|
|
.filter_map(|e| e.ok())
|
|
.filter(|e| {
|
|
e.file_name()
|
|
.to_string_lossy()
|
|
.starts_with(&format!("{:03}", version))
|
|
})
|
|
.collect();
|
|
|
|
assert!(!entries.is_empty(), "Migration {} not found", version);
|
|
let sql = std::fs::read_to_string(entries[0].path()).unwrap();
|
|
conn.execute_batch(&sql)
|
|
.unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
|
|
}
|
|
|
|
conn.execute(
|
|
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')",
|
|
[],
|
|
)
|
|
.unwrap();
|
|
|
|
(tmp, conn)
|
|
}
|
|
|
|
fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, content: &str) {
|
|
conn.execute(
|
|
"INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url, author_username)
|
|
VALUES (?1, ?2, ?1, 1, ?3, ?4, 'hash_' || ?1, 'https://example.com/' || ?1, 'testuser')",
|
|
rusqlite::params![id, source_type, title, content],
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
#[test]
|
|
fn lexical_mode_uses_fts_only() {
|
|
let (_tmp, conn) = create_test_db();
|
|
|
|
insert_document(
|
|
&conn,
|
|
1,
|
|
"issue",
|
|
"Authentication bug",
|
|
"OAuth token refresh fails silently.",
|
|
);
|
|
insert_document(
|
|
&conn,
|
|
2,
|
|
"issue",
|
|
"Database migration",
|
|
"Migration script crashes on PostgreSQL.",
|
|
);
|
|
|
|
let filters = SearchFilters {
|
|
limit: 10,
|
|
..Default::default()
|
|
};
|
|
|
|
let rt = tokio::runtime::Runtime::new().unwrap();
|
|
let (results, warnings) = rt
|
|
.block_on(search_hybrid(
|
|
&conn,
|
|
None,
|
|
"authentication",
|
|
SearchMode::Lexical,
|
|
&filters,
|
|
FtsQueryMode::Safe,
|
|
))
|
|
.unwrap();
|
|
|
|
assert!(!results.is_empty(), "Lexical search should find results");
|
|
assert_eq!(results[0].document_id, 1);
|
|
// Lexical mode should not produce Ollama-related warnings
|
|
assert!(
|
|
warnings.iter().all(|w| !w.contains("Ollama")),
|
|
"Lexical mode should not warn about Ollama"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn lexical_mode_no_embeddings_required() {
|
|
// Use in-memory DB without sqlite-vec for pure FTS
|
|
let conn = Connection::open_in_memory().unwrap();
|
|
conn.pragma_update(None, "foreign_keys", "ON").unwrap();
|
|
|
|
let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");
|
|
// Only apply through migration 008 (FTS5, no embeddings)
|
|
for version in 1..=8 {
|
|
let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
|
|
.unwrap()
|
|
.filter_map(|e| e.ok())
|
|
.filter(|e| {
|
|
e.file_name()
|
|
.to_string_lossy()
|
|
.starts_with(&format!("{:03}", version))
|
|
})
|
|
.collect();
|
|
let sql = std::fs::read_to_string(entries[0].path()).unwrap();
|
|
conn.execute_batch(&sql).unwrap();
|
|
}
|
|
|
|
conn.execute(
|
|
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')",
|
|
[],
|
|
)
|
|
.unwrap();
|
|
|
|
conn.execute(
|
|
"INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url)
|
|
VALUES (1, 'issue', 1, 1, 'Test issue', 'Content about testing and verification.', 'h1', 'https://example.com/1')",
|
|
[],
|
|
)
|
|
.unwrap();
|
|
|
|
let results = search_fts(&conn, "testing", 10, FtsQueryMode::Safe).unwrap();
|
|
assert!(
|
|
!results.is_empty(),
|
|
"FTS should work without embeddings tables"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn hybrid_mode_degrades_to_fts_without_client() {
|
|
let (_tmp, conn) = create_test_db();
|
|
|
|
insert_document(
|
|
&conn,
|
|
1,
|
|
"issue",
|
|
"Performance issue",
|
|
"Application is slow under load.",
|
|
);
|
|
|
|
let filters = SearchFilters {
|
|
limit: 10,
|
|
..Default::default()
|
|
};
|
|
|
|
let rt = tokio::runtime::Runtime::new().unwrap();
|
|
let (results, warnings) = rt
|
|
.block_on(search_hybrid(
|
|
&conn,
|
|
None, // No Ollama client
|
|
"performance slow",
|
|
SearchMode::Hybrid,
|
|
&filters,
|
|
FtsQueryMode::Safe,
|
|
))
|
|
.unwrap();
|
|
|
|
assert!(!results.is_empty(), "Should fall back to FTS results");
|
|
// Should warn about missing Ollama client
|
|
assert!(
|
|
warnings.iter().any(|w| w.to_lowercase().contains("vector")
|
|
|| w.to_lowercase().contains("ollama")
|
|
|| w.to_lowercase().contains("client")
|
|
|| w.to_lowercase().contains("fallback")
|
|
|| w.to_lowercase().contains("fts")),
|
|
"Should produce a degradation warning, got: {:?}",
|
|
warnings
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rrf_ranking_combines_signals() {
|
|
use lore::search::rank_rrf;
|
|
|
|
// Two documents with different rankings in each signal
|
|
let vector_results = vec![(1_i64, 0.1), (2, 0.5)]; // doc 1 closer
|
|
let fts_results = vec![(2_i64, -5.0), (1, -3.0)]; // doc 2 higher BM25
|
|
|
|
let rrf = rank_rrf(&vector_results, &fts_results);
|
|
|
|
assert_eq!(rrf.len(), 2, "Should return both documents");
|
|
// Both docs appear in both signals, so both get RRF scores
|
|
for r in &rrf {
|
|
assert!(r.rrf_score > 0.0, "RRF score should be positive");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn filters_by_source_type() {
|
|
let (_tmp, conn) = create_test_db();
|
|
|
|
insert_document(
|
|
&conn,
|
|
1,
|
|
"issue",
|
|
"Bug report",
|
|
"Authentication bug in login flow.",
|
|
);
|
|
insert_document(
|
|
&conn,
|
|
2,
|
|
"merge_request",
|
|
"Fix auth",
|
|
"Fixed authentication issue.",
|
|
);
|
|
|
|
let filters = SearchFilters {
|
|
source_type: Some(lore::documents::SourceType::Issue),
|
|
limit: 10,
|
|
..Default::default()
|
|
};
|
|
|
|
let all_ids = vec![1, 2];
|
|
let filtered = lore::search::apply_filters(&conn, &all_ids, &filters).unwrap();
|
|
|
|
assert_eq!(
|
|
filtered.len(),
|
|
1,
|
|
"Filter should remove non-issue documents"
|
|
);
|
|
assert_eq!(filtered[0], 1, "Only issue document should remain");
|
|
}
|
|
|
|
#[test]
|
|
fn search_mode_variants_exist() {
|
|
// Verify all enum variants compile and are distinct
|
|
let hybrid = SearchMode::Hybrid;
|
|
let lexical = SearchMode::Lexical;
|
|
let semantic = SearchMode::Semantic;
|
|
|
|
assert_ne!(hybrid, lexical);
|
|
assert_ne!(hybrid, semantic);
|
|
assert_ne!(lexical, semantic);
|
|
}
|