#![allow(dead_code)] use anyhow::{Context, Result}; use rusqlite::Connection; use crate::message::{EntityKey, EntityKind, SearchMode, SearchResult}; use crate::state::search::SearchCapabilities; /// Probe the database to detect available search indexes. /// /// Checks for FTS5 documents and embedding metadata. Returns capabilities /// that the UI uses to gate available search modes. pub fn fetch_search_capabilities(conn: &Connection) -> Result { // FTS: check if documents_fts has rows via the docsize shadow table // (B-tree, not virtual table scan). let has_fts = conn .query_row( "SELECT EXISTS(SELECT 1 FROM documents_fts_docsize LIMIT 1)", [], |r| r.get::<_, bool>(0), ) .unwrap_or(false); // Embeddings: count rows in embedding_metadata. let embedding_count: i64 = conn .query_row("SELECT COUNT(*) FROM embedding_metadata", [], |r| r.get(0)) .unwrap_or(0); let has_embeddings = embedding_count > 0; // Coverage: embeddings / documents percentage. let doc_count: i64 = conn .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0)) .unwrap_or(0); let embedding_coverage_pct = if doc_count > 0 { (embedding_count as f32 / doc_count as f32 * 100.0).min(100.0) } else { 0.0 }; Ok(SearchCapabilities { has_fts, has_embeddings, embedding_coverage_pct, }) } /// Execute a search query against the local database. /// /// Dispatches to the correct search backend based on mode: /// - Lexical: FTS5 only (documents_fts) /// - Hybrid: FTS5 + vector merge via RRF /// - Semantic: vector cosine similarity only /// /// Returns results sorted by score descending. pub fn execute_search( conn: &Connection, query: &str, mode: SearchMode, limit: usize, ) -> Result> { if query.trim().is_empty() { return Ok(Vec::new()); } match mode { SearchMode::Lexical => execute_fts_search(conn, query, limit), SearchMode::Hybrid | SearchMode::Semantic => { // Hybrid and Semantic require the full search pipeline from the // core crate (async, Ollama client). For now, fall back to FTS // for Hybrid and return empty for Semantic-only. // TODO: Wire up async search dispatch when core search is integrated. if mode == SearchMode::Hybrid { execute_fts_search(conn, query, limit) } else { Ok(Vec::new()) } } } } /// FTS5 full-text search against the documents table. fn execute_fts_search(conn: &Connection, query: &str, limit: usize) -> Result> { // Sanitize the query for FTS5 (escape special chars, wrap terms in quotes). let safe_query = sanitize_fts_query(query); if safe_query.is_empty() { return Ok(Vec::new()); } // Resolve project_path via JOIN through projects table. // Resolve iid via JOIN through the source entity table (issues or merge_requests). // snippet column 1 = content_text (column 0 is title). let mut stmt = conn .prepare( "SELECT d.source_type, d.source_id, d.title, d.project_id, p.path_with_namespace, snippet(documents_fts, 1, '>>>', '<<<', '...', 32) AS snip, bm25(documents_fts) AS score, COALESCE(i.iid, mr.iid) AS entity_iid FROM documents_fts JOIN documents d ON documents_fts.rowid = d.id JOIN projects p ON p.id = d.project_id LEFT JOIN issues i ON d.source_type = 'issue' AND i.id = d.source_id LEFT JOIN merge_requests mr ON d.source_type = 'merge_request' AND mr.id = d.source_id WHERE documents_fts MATCH ?1 ORDER BY score LIMIT ?2", ) .context("preparing FTS search query")?; let rows = stmt .query_map(rusqlite::params![safe_query, limit as i64], |row| { let source_type: String = row.get(0)?; let _source_id: i64 = row.get(1)?; let title: String = row.get::<_, Option>(2)?.unwrap_or_default(); let project_id: i64 = row.get(3)?; let project_path: String = row.get::<_, Option>(4)?.unwrap_or_default(); let snippet: String = row.get::<_, Option>(5)?.unwrap_or_default(); let score: f64 = row.get(6)?; let entity_iid: Option = row.get(7)?; Ok(( source_type, project_id, title, project_path, snippet, score, entity_iid, )) }) .context("executing FTS search")?; let mut results = Vec::new(); for row in rows { let (source_type, project_id, title, project_path, snippet, score, entity_iid) = row.context("reading FTS search row")?; let kind = match source_type.as_str() { "issue" => EntityKind::Issue, "merge_request" | "mr" => EntityKind::MergeRequest, _ => continue, // Skip unknown source types (discussion, note). }; // Skip if we couldn't resolve the entity's iid (orphaned document). let Some(iid) = entity_iid else { continue; }; let key = EntityKey { project_id, iid, kind, }; results.push(SearchResult { key, title, score: score.abs(), // bm25 returns negative scores; lower = better. snippet, project_path, }); } Ok(results) } /// Sanitize a user query for FTS5 MATCH syntax. /// /// Wraps individual terms in double quotes to prevent FTS5 syntax errors /// from user-typed operators (AND, OR, NOT, *, etc.). fn sanitize_fts_query(query: &str) -> String { query .split_whitespace() .map(|term| { // Strip any existing quotes and re-wrap. let clean = term.replace('"', ""); if clean.is_empty() { String::new() } else { format!("\"{clean}\"") } }) .filter(|s| !s.is_empty()) .collect::>() .join(" ") } #[cfg(test)] mod tests { use super::*; /// Create the minimal schema needed for search queries. fn create_dashboard_schema(conn: &Connection) { conn.execute_batch( " CREATE TABLE projects ( id INTEGER PRIMARY KEY, gitlab_project_id INTEGER UNIQUE NOT NULL, path_with_namespace TEXT NOT NULL ); CREATE TABLE issues ( id INTEGER PRIMARY KEY, gitlab_id INTEGER UNIQUE NOT NULL, project_id INTEGER NOT NULL, iid INTEGER NOT NULL, title TEXT, state TEXT NOT NULL, author_username TEXT, created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL, last_seen_at INTEGER NOT NULL ); CREATE TABLE merge_requests ( id INTEGER PRIMARY KEY, gitlab_id INTEGER UNIQUE NOT NULL, project_id INTEGER NOT NULL, iid INTEGER NOT NULL, title TEXT, state TEXT, author_username TEXT, created_at INTEGER, updated_at INTEGER, last_seen_at INTEGER NOT NULL ); CREATE TABLE discussions ( id INTEGER PRIMARY KEY, gitlab_discussion_id TEXT NOT NULL, project_id INTEGER NOT NULL, noteable_type TEXT NOT NULL, last_seen_at INTEGER NOT NULL ); CREATE TABLE notes ( id INTEGER PRIMARY KEY, gitlab_id INTEGER UNIQUE NOT NULL, discussion_id INTEGER NOT NULL, project_id INTEGER NOT NULL, is_system INTEGER NOT NULL DEFAULT 0, author_username TEXT, body TEXT, created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL, last_seen_at INTEGER NOT NULL ); CREATE TABLE documents ( id INTEGER PRIMARY KEY, source_type TEXT NOT NULL, source_id INTEGER NOT NULL, project_id INTEGER NOT NULL, content_text TEXT NOT NULL, content_hash TEXT NOT NULL ); CREATE TABLE embedding_metadata ( document_id INTEGER NOT NULL, chunk_index INTEGER NOT NULL DEFAULT 0, model TEXT NOT NULL, dims INTEGER NOT NULL, document_hash TEXT NOT NULL, chunk_hash TEXT NOT NULL, created_at INTEGER NOT NULL, PRIMARY KEY(document_id, chunk_index) ); CREATE TABLE sync_runs ( id INTEGER PRIMARY KEY, started_at INTEGER NOT NULL, heartbeat_at INTEGER NOT NULL, finished_at INTEGER, status TEXT NOT NULL, command TEXT NOT NULL, error TEXT ); ", ) .expect("create dashboard schema"); } #[test] fn test_sanitize_fts_query_wraps_terms() { let result = sanitize_fts_query("hello world"); assert_eq!(result, r#""hello" "world""#); } #[test] fn test_sanitize_fts_query_strips_quotes() { let result = sanitize_fts_query(r#""hello" "world""#); assert_eq!(result, r#""hello" "world""#); } #[test] fn test_sanitize_fts_query_empty() { assert_eq!(sanitize_fts_query(""), ""); assert_eq!(sanitize_fts_query(" "), ""); } #[test] fn test_sanitize_fts_query_special_chars() { // FTS5 operators should be safely wrapped in quotes. let result = sanitize_fts_query("NOT AND OR"); assert_eq!(result, r#""NOT" "AND" "OR""#); } #[test] fn test_fetch_search_capabilities_no_tables() { let conn = Connection::open_in_memory().unwrap(); create_dashboard_schema(&conn); let caps = fetch_search_capabilities(&conn).unwrap(); assert!(!caps.has_fts); assert!(!caps.has_embeddings); assert!(!caps.has_any_index()); } #[test] fn test_fetch_search_capabilities_with_fts() { let conn = Connection::open_in_memory().unwrap(); create_dashboard_schema(&conn); // Create FTS table and its shadow table. conn.execute_batch( "CREATE VIRTUAL TABLE documents_fts USING fts5(content); INSERT INTO documents_fts(content) VALUES ('test document');", ) .unwrap(); let caps = fetch_search_capabilities(&conn).unwrap(); assert!(caps.has_fts); assert!(!caps.has_embeddings); } #[test] fn test_fetch_search_capabilities_with_embeddings() { let conn = Connection::open_in_memory().unwrap(); create_dashboard_schema(&conn); // Insert a document so coverage calculation works. conn.execute_batch( "INSERT INTO documents(id, source_type, source_id, project_id, content_text, content_hash) VALUES (1, 'issue', 1, 1, 'body text', 'abc'); INSERT INTO embedding_metadata(document_id, chunk_index, model, dims, document_hash, chunk_hash, created_at) VALUES (1, 0, 'test', 384, 'abc', 'def', 1700000000);", ) .unwrap(); let caps = fetch_search_capabilities(&conn).unwrap(); assert!(!caps.has_fts); assert!(caps.has_embeddings); assert!(caps.embedding_coverage_pct > 0.0); } #[test] fn test_execute_search_empty_query_returns_empty() { let conn = Connection::open_in_memory().unwrap(); let results = execute_search(&conn, "", SearchMode::Lexical, 10).unwrap(); assert!(results.is_empty()); } #[test] fn test_execute_search_whitespace_only_returns_empty() { let conn = Connection::open_in_memory().unwrap(); let results = execute_search(&conn, " ", SearchMode::Lexical, 10).unwrap(); assert!(results.is_empty()); } }