gitlore/tests/hybrid_search.rs

//! Integration tests for hybrid search combining FTS + vector.
//!
//! Tests all three search modes (lexical, semantic, hybrid) and
//! verifies graceful degradation when embeddings are unavailable.

use lore::core::db::create_connection;
use lore::search::{FtsQueryMode, SearchFilters, SearchMode, search_fts, search_hybrid};
use rusqlite::Connection;
use std::path::PathBuf;
use tempfile::TempDir;

fn create_test_db() -> (TempDir, Connection) {
    let tmp = TempDir::new().unwrap();
    let db_path = tmp.path().join("test.db");
    let conn = create_connection(&db_path).unwrap();

    let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");

    for version in 1..=9 {
        let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
            .unwrap()
            .filter_map(|e| e.ok())
            .filter(|e| {
                e.file_name()
                    .to_string_lossy()
                    .starts_with(&format!("{:03}", version))
            })
            .collect();

        assert!(!entries.is_empty(), "Migration {} not found", version);
        let sql = std::fs::read_to_string(entries[0].path()).unwrap();
        conn.execute_batch(&sql)
            .unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
    }

    conn.execute(
        "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')",
        [],
    )
    .unwrap();

    (tmp, conn)
}

fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, content: &str) {
    conn.execute(
        "INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url, author_username)
         VALUES (?1, ?2, ?1, 1, ?3, ?4, 'hash_' || ?1, 'https://example.com/' || ?1, 'testuser')",
        rusqlite::params![id, source_type, title, content],
    )
    .unwrap();
}

#[test]
fn lexical_mode_uses_fts_only() {
    let (_tmp, conn) = create_test_db();

    insert_document(
        &conn,
        1,
        "issue",
        "Authentication bug",
        "OAuth token refresh fails silently.",
    );
    insert_document(
        &conn,
        2,
        "issue",
        "Database migration",
        "Migration script crashes on PostgreSQL.",
    );

    let filters = SearchFilters {
        limit: 10,
        ..Default::default()
    };

    let rt = tokio::runtime::Runtime::new().unwrap();
    let (results, warnings) = rt
        .block_on(search_hybrid(
            &conn,
            None,
            "authentication",
            SearchMode::Lexical,
            &filters,
            FtsQueryMode::Safe,
        ))
        .unwrap();

    assert!(!results.is_empty(), "Lexical search should find results");
    assert_eq!(results[0].document_id, 1);
    // Lexical mode should not produce Ollama-related warnings
    assert!(
        warnings.iter().all(|w| !w.contains("Ollama")),
        "Lexical mode should not warn about Ollama"
    );
}

#[test]
fn lexical_mode_no_embeddings_required() {
    // Use in-memory DB without sqlite-vec for pure FTS
    let conn = Connection::open_in_memory().unwrap();
    conn.pragma_update(None, "foreign_keys", "ON").unwrap();

    let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");
    // Only apply through migration 008 (FTS5, no embeddings)
    for version in 1..=8 {
        let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
            .unwrap()
            .filter_map(|e| e.ok())
            .filter(|e| {
                e.file_name()
                    .to_string_lossy()
                    .starts_with(&format!("{:03}", version))
            })
            .collect();
        let sql = std::fs::read_to_string(entries[0].path()).unwrap();
        conn.execute_batch(&sql).unwrap();
    }

    conn.execute(
        "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project')",
        [],
    )
    .unwrap();

    conn.execute(
        "INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url)
         VALUES (1, 'issue', 1, 1, 'Test issue', 'Content about testing and verification.', 'h1', 'https://example.com/1')",
        [],
    )
    .unwrap();

    let results = search_fts(&conn, "testing", 10, FtsQueryMode::Safe).unwrap();
    assert!(
        !results.is_empty(),
        "FTS should work without embeddings tables"
    );
}

#[test]
fn hybrid_mode_degrades_to_fts_without_client() {
    let (_tmp, conn) = create_test_db();

    insert_document(
        &conn,
        1,
        "issue",
        "Performance issue",
        "Application is slow under load.",
    );

    let filters = SearchFilters {
        limit: 10,
        ..Default::default()
    };

    let rt = tokio::runtime::Runtime::new().unwrap();
    let (results, warnings) = rt
        .block_on(search_hybrid(
            &conn,
            None, // No Ollama client
            "performance slow",
            SearchMode::Hybrid,
            &filters,
            FtsQueryMode::Safe,
        ))
        .unwrap();

    assert!(!results.is_empty(), "Should fall back to FTS results");
    // Should warn about missing Ollama client
    assert!(
        warnings.iter().any(|w| w.to_lowercase().contains("vector")
            || w.to_lowercase().contains("ollama")
            || w.to_lowercase().contains("client")
            || w.to_lowercase().contains("fallback")
            || w.to_lowercase().contains("fts")),
        "Should produce a degradation warning, got: {:?}",
        warnings
    );
}

#[test]
fn rrf_ranking_combines_signals() {
    use lore::search::rank_rrf;

    // Two documents with different rankings in each signal
    let vector_results = vec![(1_i64, 0.1), (2, 0.5)]; // doc 1 closer
    let fts_results = vec![(2_i64, -5.0), (1, -3.0)]; // doc 2 higher BM25

    let rrf = rank_rrf(&vector_results, &fts_results);

    assert_eq!(rrf.len(), 2, "Should return both documents");
    // Both docs appear in both signals, so both get RRF scores
    for r in &rrf {
        assert!(r.rrf_score > 0.0, "RRF score should be positive");
    }
}

#[test]
fn filters_by_source_type() {
    let (_tmp, conn) = create_test_db();

    insert_document(
        &conn,
        1,
        "issue",
        "Bug report",
        "Authentication bug in login flow.",
    );
    insert_document(
        &conn,
        2,
        "merge_request",
        "Fix auth",
        "Fixed authentication issue.",
    );

    let filters = SearchFilters {
        source_type: Some(lore::documents::SourceType::Issue),
        limit: 10,
        ..Default::default()
    };

    let all_ids = vec![1, 2];
    let filtered = lore::search::apply_filters(&conn, &all_ids, &filters).unwrap();

    assert_eq!(
        filtered.len(),
        1,
        "Filter should remove non-issue documents"
    );
    assert_eq!(filtered[0], 1, "Only issue document should remain");
}

#[test]
fn search_mode_variants_exist() {
    // Verify all enum variants compile and are distinct
    let hybrid = SearchMode::Hybrid;
    let lexical = SearchMode::Lexical;
    let semantic = SearchMode::Semantic;

    assert_ne!(hybrid, lexical);
    assert_ne!(hybrid, semantic);
    assert_ne!(lexical, semantic);
}