gitlore/crates/lore-tui/src/action/search.rs

#![allow(dead_code)]

use anyhow::{Context, Result};
use rusqlite::Connection;

use crate::message::{EntityKey, EntityKind, SearchMode, SearchResult};
use crate::state::search::SearchCapabilities;

/// Probe the database to detect available search indexes.
///
/// Checks for FTS5 documents and embedding metadata. Returns capabilities
/// that the UI uses to gate available search modes.
pub fn fetch_search_capabilities(conn: &Connection) -> Result<SearchCapabilities> {
    // FTS: check if documents_fts has rows via the docsize shadow table
    // (B-tree, not virtual table scan).
    let has_fts = conn
        .query_row(
            "SELECT EXISTS(SELECT 1 FROM documents_fts_docsize LIMIT 1)",
            [],
            |r| r.get::<_, bool>(0),
        )
        .unwrap_or(false);

    // Embeddings: count rows in embedding_metadata.
    let embedding_count: i64 = conn
        .query_row("SELECT COUNT(*) FROM embedding_metadata", [], |r| r.get(0))
        .unwrap_or(0);

    let has_embeddings = embedding_count > 0;

    // Coverage: embeddings / documents percentage.
    let doc_count: i64 = conn
        .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
        .unwrap_or(0);

    let embedding_coverage_pct = if doc_count > 0 {
        (embedding_count as f32 / doc_count as f32 * 100.0).min(100.0)
    } else {
        0.0
    };

    Ok(SearchCapabilities {
        has_fts,
        has_embeddings,
        embedding_coverage_pct,
    })
}

/// Execute a search query against the local database.
///
/// Dispatches to the correct search backend based on mode:
/// - Lexical: FTS5 only (documents_fts)
/// - Hybrid: FTS5 + vector merge via RRF
/// - Semantic: vector cosine similarity only
///
/// Returns results sorted by score descending.
pub fn execute_search(
    conn: &Connection,
    query: &str,
    mode: SearchMode,
    limit: usize,
) -> Result<Vec<SearchResult>> {
    if query.trim().is_empty() {
        return Ok(Vec::new());
    }

    match mode {
        SearchMode::Lexical => execute_fts_search(conn, query, limit),
        SearchMode::Hybrid | SearchMode::Semantic => {
            // Hybrid and Semantic require the full search pipeline from the
            // core crate (async, Ollama client). For now, fall back to FTS
            // for Hybrid and return empty for Semantic-only.
            // TODO: Wire up async search dispatch when core search is integrated.
            if mode == SearchMode::Hybrid {
                execute_fts_search(conn, query, limit)
            } else {
                Ok(Vec::new())
            }
        }
    }
}

/// FTS5 full-text search against the documents table.
fn execute_fts_search(conn: &Connection, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
    // Sanitize the query for FTS5 (escape special chars, wrap terms in quotes).
    let safe_query = sanitize_fts_query(query);
    if safe_query.is_empty() {
        return Ok(Vec::new());
    }

    // Resolve project_path via JOIN through projects table.
    // Resolve iid via JOIN through the source entity table (issues or merge_requests).
    // snippet column 1 = content_text (column 0 is title).
    let mut stmt = conn
        .prepare(
            "SELECT d.source_type, d.source_id, d.title, d.project_id,
                    p.path_with_namespace,
                    snippet(documents_fts, 1, '>>>', '<<<', '...', 32) AS snip,
                    bm25(documents_fts) AS score,
                    COALESCE(i.iid, mr.iid) AS entity_iid
             FROM documents_fts
             JOIN documents d ON documents_fts.rowid = d.id
             JOIN projects p ON p.id = d.project_id
             LEFT JOIN issues i ON d.source_type = 'issue' AND i.id = d.source_id
             LEFT JOIN merge_requests mr ON d.source_type = 'merge_request' AND mr.id = d.source_id
             WHERE documents_fts MATCH ?1
             ORDER BY score
             LIMIT ?2",
        )
        .context("preparing FTS search query")?;

    let rows = stmt
        .query_map(rusqlite::params![safe_query, limit as i64], |row| {
            let source_type: String = row.get(0)?;
            let _source_id: i64 = row.get(1)?;
            let title: String = row.get::<_, Option<String>>(2)?.unwrap_or_default();
            let project_id: i64 = row.get(3)?;
            let project_path: String = row.get::<_, Option<String>>(4)?.unwrap_or_default();
            let snippet: String = row.get::<_, Option<String>>(5)?.unwrap_or_default();
            let score: f64 = row.get(6)?;
            let entity_iid: Option<i64> = row.get(7)?;
            Ok((
                source_type,
                project_id,
                title,
                project_path,
                snippet,
                score,
                entity_iid,
            ))
        })
        .context("executing FTS search")?;

    let mut results = Vec::new();
    for row in rows {
        let (source_type, project_id, title, project_path, snippet, score, entity_iid) =
            row.context("reading FTS search row")?;

        let kind = match source_type.as_str() {
            "issue" => EntityKind::Issue,
            "merge_request" | "mr" => EntityKind::MergeRequest,
            _ => continue, // Skip unknown source types (discussion, note).
        };

        // Skip if we couldn't resolve the entity's iid (orphaned document).
        let Some(iid) = entity_iid else {
            continue;
        };

        let key = EntityKey {
            project_id,
            iid,
            kind,
        };

        results.push(SearchResult {
            key,
            title,
            score: score.abs(), // bm25 returns negative scores; lower = better.
            snippet,
            project_path,
        });
    }

    Ok(results)
}

/// Sanitize a user query for FTS5 MATCH syntax.
///
/// Wraps individual terms in double quotes to prevent FTS5 syntax errors
/// from user-typed operators (AND, OR, NOT, *, etc.).
fn sanitize_fts_query(query: &str) -> String {
    query
        .split_whitespace()
        .map(|term| {
            // Strip any existing quotes and re-wrap.
            let clean = term.replace('"', "");
            if clean.is_empty() {
                String::new()
            } else {
                format!("\"{clean}\"")
            }
        })
        .filter(|s| !s.is_empty())
        .collect::<Vec<_>>()
        .join(" ")
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Create the minimal schema needed for search queries.
    fn create_dashboard_schema(conn: &Connection) {
        conn.execute_batch(
            "
            CREATE TABLE projects (
                id INTEGER PRIMARY KEY,
                gitlab_project_id INTEGER UNIQUE NOT NULL,
                path_with_namespace TEXT NOT NULL
            );
            CREATE TABLE issues (
                id INTEGER PRIMARY KEY,
                gitlab_id INTEGER UNIQUE NOT NULL,
                project_id INTEGER NOT NULL,
                iid INTEGER NOT NULL,
                title TEXT,
                state TEXT NOT NULL,
                author_username TEXT,
                created_at INTEGER NOT NULL,
                updated_at INTEGER NOT NULL,
                last_seen_at INTEGER NOT NULL
            );
            CREATE TABLE merge_requests (
                id INTEGER PRIMARY KEY,
                gitlab_id INTEGER UNIQUE NOT NULL,
                project_id INTEGER NOT NULL,
                iid INTEGER NOT NULL,
                title TEXT,
                state TEXT,
                author_username TEXT,
                created_at INTEGER,
                updated_at INTEGER,
                last_seen_at INTEGER NOT NULL
            );
            CREATE TABLE discussions (
                id INTEGER PRIMARY KEY,
                gitlab_discussion_id TEXT NOT NULL,
                project_id INTEGER NOT NULL,
                noteable_type TEXT NOT NULL,
                last_seen_at INTEGER NOT NULL
            );
            CREATE TABLE notes (
                id INTEGER PRIMARY KEY,
                gitlab_id INTEGER UNIQUE NOT NULL,
                discussion_id INTEGER NOT NULL,
                project_id INTEGER NOT NULL,
                is_system INTEGER NOT NULL DEFAULT 0,
                author_username TEXT,
                body TEXT,
                created_at INTEGER NOT NULL,
                updated_at INTEGER NOT NULL,
                last_seen_at INTEGER NOT NULL
            );
            CREATE TABLE documents (
                id INTEGER PRIMARY KEY,
                source_type TEXT NOT NULL,
                source_id INTEGER NOT NULL,
                project_id INTEGER NOT NULL,
                content_text TEXT NOT NULL,
                content_hash TEXT NOT NULL
            );
            CREATE TABLE embedding_metadata (
                document_id INTEGER NOT NULL,
                chunk_index INTEGER NOT NULL DEFAULT 0,
                model TEXT NOT NULL,
                dims INTEGER NOT NULL,
                document_hash TEXT NOT NULL,
                chunk_hash TEXT NOT NULL,
                created_at INTEGER NOT NULL,
                PRIMARY KEY(document_id, chunk_index)
            );
            CREATE TABLE sync_runs (
                id INTEGER PRIMARY KEY,
                started_at INTEGER NOT NULL,
                heartbeat_at INTEGER NOT NULL,
                finished_at INTEGER,
                status TEXT NOT NULL,
                command TEXT NOT NULL,
                error TEXT
            );
        ",
        )
        .expect("create dashboard schema");
    }

    #[test]
    fn test_sanitize_fts_query_wraps_terms() {
        let result = sanitize_fts_query("hello world");
        assert_eq!(result, r#""hello" "world""#);
    }

    #[test]
    fn test_sanitize_fts_query_strips_quotes() {
        let result = sanitize_fts_query(r#""hello" "world""#);
        assert_eq!(result, r#""hello" "world""#);
    }

    #[test]
    fn test_sanitize_fts_query_empty() {
        assert_eq!(sanitize_fts_query(""), "");
        assert_eq!(sanitize_fts_query("   "), "");
    }

    #[test]
    fn test_sanitize_fts_query_special_chars() {
        // FTS5 operators should be safely wrapped in quotes.
        let result = sanitize_fts_query("NOT AND OR");
        assert_eq!(result, r#""NOT" "AND" "OR""#);
    }

    #[test]
    fn test_fetch_search_capabilities_no_tables() {
        let conn = Connection::open_in_memory().unwrap();
        create_dashboard_schema(&conn);

        let caps = fetch_search_capabilities(&conn).unwrap();
        assert!(!caps.has_fts);
        assert!(!caps.has_embeddings);
        assert!(!caps.has_any_index());
    }

    #[test]
    fn test_fetch_search_capabilities_with_fts() {
        let conn = Connection::open_in_memory().unwrap();
        create_dashboard_schema(&conn);
        // Create FTS table and its shadow table.
        conn.execute_batch(
            "CREATE VIRTUAL TABLE documents_fts USING fts5(content);
             INSERT INTO documents_fts(content) VALUES ('test document');",
        )
        .unwrap();

        let caps = fetch_search_capabilities(&conn).unwrap();
        assert!(caps.has_fts);
        assert!(!caps.has_embeddings);
    }

    #[test]
    fn test_fetch_search_capabilities_with_embeddings() {
        let conn = Connection::open_in_memory().unwrap();
        create_dashboard_schema(&conn);
        // Insert a document so coverage calculation works.
        conn.execute_batch(
            "INSERT INTO documents(id, source_type, source_id, project_id, content_text, content_hash)
             VALUES (1, 'issue', 1, 1, 'body text', 'abc');
             INSERT INTO embedding_metadata(document_id, chunk_index, model, dims, document_hash, chunk_hash, created_at)
             VALUES (1, 0, 'test', 384, 'abc', 'def', 1700000000);",
        )
        .unwrap();

        let caps = fetch_search_capabilities(&conn).unwrap();
        assert!(!caps.has_fts);
        assert!(caps.has_embeddings);
        assert!(caps.embedding_coverage_pct > 0.0);
    }

    #[test]
    fn test_execute_search_empty_query_returns_empty() {
        let conn = Connection::open_in_memory().unwrap();
        let results = execute_search(&conn, "", SearchMode::Lexical, 10).unwrap();
        assert!(results.is_empty());
    }

    #[test]
    fn test_execute_search_whitespace_only_returns_empty() {
        let conn = Connection::open_in_memory().unwrap();
        let results = execute_search(&conn, "   ", SearchMode::Lexical, 10).unwrap();
        assert!(results.is_empty());
    }
}