gitlore/tests/golden_query_tests.rs

#![allow(dead_code)]

use rusqlite::Connection;
use serde::Deserialize;
use std::path::PathBuf;

use lore::search::{FtsQueryMode, SearchFilters, SearchMode, apply_filters, search_fts};

#[derive(Debug, Deserialize)]
struct GoldenQuery {
    query: String,
    mode: String,
    #[serde(default)]
    filters: GoldenFilters,
    expected_doc_ids: Vec<i64>,
    min_results: usize,
    max_rank: usize,
    description: String,
}

#[derive(Debug, Default, Deserialize)]
struct GoldenFilters {
    source_type: Option<String>,
    author: Option<String>,
    project: Option<String>,
    #[serde(default)]
    labels: Vec<String>,
}

fn load_golden_queries() -> Vec<GoldenQuery> {
    let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/golden_queries.json");
    let content = std::fs::read_to_string(&path)
        .unwrap_or_else(|_| panic!("Failed to read golden queries fixture"));
    serde_json::from_str(&content)
        .unwrap_or_else(|e| panic!("Failed to parse golden queries: {}", e))
}

fn create_seeded_db() -> Connection {
    let conn = Connection::open_in_memory().unwrap();
    conn.pragma_update(None, "foreign_keys", "ON").unwrap();

    let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("migrations");
    for version in 1..=8 {
        let entries: Vec<_> = std::fs::read_dir(&migrations_dir)
            .unwrap()
            .filter_map(|e| e.ok())
            .filter(|e| {
                e.file_name()
                    .to_string_lossy()
                    .starts_with(&format!("{:03}", version))
            })
            .collect();
        assert!(!entries.is_empty(), "Migration {} not found", version);
        let sql = std::fs::read_to_string(entries[0].path()).unwrap();
        conn.execute_batch(&sql)
            .unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
    }

    conn.execute(
        "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
         VALUES (1, 100, 'group/project', 'https://gitlab.example.com/group/project')",
        [],
    )
    .unwrap();

    let documents = vec![
        (
            1,
            "issue",
            "Authentication and login broken with OAuth",
            "Users cannot login when using OAuth tokens. The JWT token refresh fails silently, \
          causing authentication errors. When the access token expires, the refresh flow returns \
          a 401 instead of fetching new credentials. Login page shows a generic error. \
          Multiple users reported authentication failures across all OAuth providers.",
            "testuser",
        ),
        (
            2,
            "merge_request",
            "Add user profile page with avatar upload",
            "This merge request adds a new user profile page. Users can now upload their avatar, \
          edit their display name, and manage notification preferences. The profile page includes \
          responsive design for mobile and desktop viewports.",
            "developer1",
        ),
        (
            3,
            "issue",
            "Database migration failing on PostgreSQL 14",
            "The database migration script crashes on PostgreSQL 14 due to deprecated syntax. \
          The ALTER TABLE command uses a syntax removed in PG14. Migration 042 needs to be \
          rewritten to use the new schema modification syntax. All staging environments affected.",
            "dba_admin",
        ),
        (
            4,
            "merge_request",
            "Performance optimization for dashboard queries",
            "Optimized the dashboard query performance by adding database indexes and implementing \
          Redis caching for frequently accessed reports. Query execution time reduced from 3.2s \
          to 180ms. Added connection pooling and prepared statement caching.",
            "senior_dev",
        ),
        (
            5,
            "discussion",
            "API rate limiting strategies for public endpoints",
            "Discussion about implementing API rate limiting on public-facing endpoints. \
          Proposed approaches: token bucket with sliding window, fixed window counters, \
          or leaky bucket algorithm. Rate limits should be configurable per API key tier. \
          Need to handle burst traffic during peak hours without throttling legitimate users.",
            "architect",
        ),
        (
            6,
            "issue",
            "CSS styling issues on mobile frontend",
            "Multiple CSS styling problems on the mobile frontend. The navigation menu overlaps \
          content on screens smaller than 768px. Button text truncates on compact viewports. \
          Frontend responsive breakpoints need adjustment. The UI components library has \
          conflicting CSS specificity with the theme system.",
            "frontend_dev",
        ),
        (
            7,
            "merge_request",
            "Revamp CI/CD pipeline with Docker caching",
            "Complete overhaul of the CI/CD pipeline. Added Docker layer caching to speed up \
          builds. Deployment stages now run in parallel where possible. Added rollback \
          support for failed deployments. Pipeline runtime reduced from 45min to 12min.",
            "devops_lead",
        ),
        (
            8,
            "issue",
            "Security vulnerability in form submission",
            "A cross-site scripting (XSS) vulnerability was found in the comment submission form. \
          User input is not properly sanitized before rendering. The security scanner also flagged \
          potential SQL injection in the search endpoint. Both vulnerabilities need immediate patching.",
            "security_team",
        ),
    ];

    for (id, source_type, title, content, author) in &documents {
        conn.execute(
            "INSERT INTO documents (id, source_type, source_id, project_id, title, content_text, content_hash, url, author_username)
             VALUES (?1, ?2, ?1, 1, ?3, ?4, 'hash_' || ?1, 'https://gitlab.example.com/group/project/-/' || ?2 || 's/' || ?1, ?5)",
            rusqlite::params![id, source_type, title, content, author],
        )
        .unwrap();
    }

    conn.execute_batch(
        "INSERT INTO document_labels (document_id, label_name) VALUES (1, 'bug');
         INSERT INTO document_labels (document_id, label_name) VALUES (1, 'authentication');
         INSERT INTO document_labels (document_id, label_name) VALUES (3, 'bug');
         INSERT INTO document_labels (document_id, label_name) VALUES (3, 'database');
         INSERT INTO document_labels (document_id, label_name) VALUES (6, 'bug');
         INSERT INTO document_labels (document_id, label_name) VALUES (6, 'frontend');
         INSERT INTO document_labels (document_id, label_name) VALUES (8, 'security');
         INSERT INTO document_labels (document_id, label_name) VALUES (8, 'critical');",
    )
    .unwrap();

    conn
}

fn build_search_filters(golden: &GoldenFilters) -> SearchFilters {
    let source_type = golden.source_type.as_deref().and_then(|s| match s {
        "issue" => Some(lore::documents::SourceType::Issue),
        "merge_request" => Some(lore::documents::SourceType::MergeRequest),
        "discussion" => Some(lore::documents::SourceType::Discussion),
        _ => None,
    });

    SearchFilters {
        source_type,
        author: golden.author.clone(),
        labels: golden.labels.clone(),
        limit: 100,
        ..Default::default()
    }
}

#[test]
fn golden_queries_all_pass() {
    let queries = load_golden_queries();
    let conn = create_seeded_db();

    let mut failures: Vec<String> = Vec::new();

    for (i, gq) in queries.iter().enumerate() {
        let mode = SearchMode::parse(&gq.mode).unwrap_or(SearchMode::Lexical);

        assert_eq!(
            mode,
            SearchMode::Lexical,
            "Golden query {} uses non-lexical mode '{}' which requires Ollama — not supported in CI",
            i,
            gq.mode
        );

        let fts_results = search_fts(&conn, &gq.query, 50, FtsQueryMode::Safe).unwrap();
        let doc_ids: Vec<i64> = fts_results.iter().map(|r| r.document_id).collect();

        let filters = build_search_filters(&gq.filters);
        let filtered_ids = if filters.has_any_filter() {
            apply_filters(&conn, &doc_ids, &filters).unwrap()
        } else {
            doc_ids.clone()
        };

        if filtered_ids.len() < gq.min_results {
            failures.push(format!(
                "FAIL [{}] \"{}\": expected >= {} results, got {} (description: {})",
                i,
                gq.query,
                gq.min_results,
                filtered_ids.len(),
                gq.description
            ));
            continue;
        }

        for expected_id in &gq.expected_doc_ids {
            let position = filtered_ids.iter().position(|id| id == expected_id);
            match position {
                Some(pos) if pos < gq.max_rank => {}
                Some(pos) => {
                    failures.push(format!(
                        "FAIL [{}] \"{}\": expected doc_id {} in top {}, found at rank {} (description: {})",
                        i, gq.query, expected_id, gq.max_rank, pos + 1, gq.description
                    ));
                }
                None => {
                    failures.push(format!(
                        "FAIL [{}] \"{}\": expected doc_id {} not found in results {:?} (description: {})",
                        i, gq.query, expected_id, filtered_ids, gq.description
                    ));
                }
            }
        }
    }

    if !failures.is_empty() {
        panic!(
            "Golden query failures ({}/{}):\n{}",
            failures.len(),
            queries.len(),
            failures.join("\n")
        );
    }
}

#[test]
fn golden_queries_fixture_is_valid() {
    let queries = load_golden_queries();
    assert!(
        queries.len() >= 5,
        "Golden queries fixture should have at least 5 queries, got {}",
        queries.len()
    );

    for (i, gq) in queries.iter().enumerate() {
        assert!(!gq.query.is_empty(), "Query {} has empty query string", i);
        assert!(
            !gq.expected_doc_ids.is_empty(),
            "Query {} has no expected doc IDs",
            i
        );
        assert!(gq.min_results > 0, "Query {} has min_results=0", i);
        assert!(gq.max_rank > 0, "Query {} has max_rank=0", i);
        assert!(
            SearchMode::parse(&gq.mode).is_some(),
            "Query {} has invalid mode '{}'",
            i,
            gq.mode
        );
    }
}