//! Integration tests for hybrid search combining FTS + vector. //! //! Tests all three search modes (lexical, semantic, hybrid) and //! verifies graceful degradation when embeddings are unavailable. use lore::core::db::create_connection; use lore::search::{FtsQueryMode, SearchFilters, SearchMode, search_fts, search_hybrid}; use rusqlite::Connection; use std::path::PathBuf; use tempfile::TempDir; fn create_test_db() -> (TempDir, Connection) { let tmp = TempDir::new().unwrap(); let db_path = tmp.path().join("test.db"); let conn = create_connection(&db_path).unwrap(); let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations"); for version in 1..=9 { let entries: Vec<_> = std::fs::read_dir(&migrations_dir) .unwrap() .filter_map(|e| e.ok()) .filter(|e| { e.file_name() .to_string_lossy() .starts_with(&format!("{:03}", version)) }) .collect(); assert!(!entries.is_empty(), "Migration {} not found", version); let sql = std::fs::read_to_string(entries[0].path()).unwrap(); conn.execute_batch(&sql) .unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e)); } conn.execute( "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')", [], ) .unwrap(); (tmp, conn) } fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, content: &str) { conn.execute( "INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url, author_username) VALUES (?1, ?2, ?1, 1, ?3, ?4, 'hash_' || ?1, 'https://example.com/' || ?1, 'testuser')", rusqlite::params![id, source_type, title, content], ) .unwrap(); } #[test] fn lexical_mode_uses_fts_only() { let (_tmp, conn) = create_test_db(); insert_document(&conn, 1, "issue", "Authentication bug", "OAuth token refresh fails silently."); insert_document(&conn, 2, "issue", "Database migration", "Migration script crashes on PostgreSQL."); let filters = SearchFilters { limit: 10, ..Default::default() }; let rt = tokio::runtime::Runtime::new().unwrap(); let (results, warnings) = rt .block_on(search_hybrid( &conn, None, "authentication", SearchMode::Lexical, &filters, FtsQueryMode::Safe, )) .unwrap(); assert!(!results.is_empty(), "Lexical search should find results"); assert_eq!(results[0].document_id, 1); // Lexical mode should not produce Ollama-related warnings assert!( warnings.iter().all(|w| !w.contains("Ollama")), "Lexical mode should not warn about Ollama" ); } #[test] fn lexical_mode_no_embeddings_required() { // Use in-memory DB without sqlite-vec for pure FTS let conn = Connection::open_in_memory().unwrap(); conn.pragma_update(None, "foreign_keys", "ON").unwrap(); let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations"); // Only apply through migration 008 (FTS5, no embeddings) for version in 1..=8 { let entries: Vec<_> = std::fs::read_dir(&migrations_dir) .unwrap() .filter_map(|e| e.ok()) .filter(|e| { e.file_name() .to_string_lossy() .starts_with(&format!("{:03}", version)) }) .collect(); let sql = std::fs::read_to_string(entries[0].path()).unwrap(); conn.execute_batch(&sql).unwrap(); } conn.execute( "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')", [], ) .unwrap(); conn.execute( "INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url) VALUES (1, 'issue', 1, 1, 'Test issue', 'Content about testing and verification.', 'h1', 'https://example.com/1')", [], ) .unwrap(); let results = search_fts(&conn, "testing", 10, FtsQueryMode::Safe).unwrap(); assert!(!results.is_empty(), "FTS should work without embeddings tables"); } #[test] fn hybrid_mode_degrades_to_fts_without_client() { let (_tmp, conn) = create_test_db(); insert_document(&conn, 1, "issue", "Performance issue", "Application is slow under load."); let filters = SearchFilters { limit: 10, ..Default::default() }; let rt = tokio::runtime::Runtime::new().unwrap(); let (results, warnings) = rt .block_on(search_hybrid( &conn, None, // No Ollama client "performance slow", SearchMode::Hybrid, &filters, FtsQueryMode::Safe, )) .unwrap(); assert!(!results.is_empty(), "Should fall back to FTS results"); // Should warn about missing Ollama client assert!( warnings.iter().any(|w| w.to_lowercase().contains("vector") || w.to_lowercase().contains("ollama") || w.to_lowercase().contains("client") || w.to_lowercase().contains("fallback") || w.to_lowercase().contains("fts")), "Should produce a degradation warning, got: {:?}", warnings ); } #[test] fn rrf_ranking_combines_signals() { use lore::search::rank_rrf; // Two documents with different rankings in each signal let vector_results = vec![(1_i64, 0.1), (2, 0.5)]; // doc 1 closer let fts_results = vec![(2_i64, -5.0), (1, -3.0)]; // doc 2 higher BM25 let rrf = rank_rrf(&vector_results, &fts_results); assert_eq!(rrf.len(), 2, "Should return both documents"); // Both docs appear in both signals, so both get RRF scores for r in &rrf { assert!(r.rrf_score > 0.0, "RRF score should be positive"); } } #[test] fn filters_by_source_type() { let (_tmp, conn) = create_test_db(); insert_document(&conn, 1, "issue", "Bug report", "Authentication bug in login flow."); insert_document(&conn, 2, "merge_request", "Fix auth", "Fixed authentication issue."); let filters = SearchFilters { source_type: Some(lore::documents::SourceType::Issue), limit: 10, ..Default::default() }; let all_ids = vec![1, 2]; let filtered = lore::search::apply_filters(&conn, &all_ids, &filters).unwrap(); assert_eq!(filtered.len(), 1, "Filter should remove non-issue documents"); assert_eq!(filtered[0], 1, "Only issue document should remain"); } #[test] fn search_mode_variants_exist() { // Verify all enum variants compile and are distinct let hybrid = SearchMode::Hybrid; let lexical = SearchMode::Lexical; let semantic = SearchMode::Semantic; assert_ne!(hybrid, lexical); assert_ne!(hybrid, semantic); assert_ne!(lexical, semantic); }