//! Search command: lexical (FTS5) search with filter support and single-query hydration. use console::style; use serde::Serialize; use crate::Config; use crate::core::db::create_connection; use crate::core::error::{LoreError, Result}; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::core::time::{ms_to_iso, parse_since}; use crate::documents::SourceType; use crate::search::{ FtsQueryMode, PathFilter, SearchFilters, apply_filters, get_result_snippet, rank_rrf, search_fts, }; /// Display-ready search result with all fields hydrated. #[derive(Debug, Serialize)] pub struct SearchResultDisplay { pub document_id: i64, pub source_type: String, pub title: String, pub url: Option, pub author: Option, pub created_at: Option, pub updated_at: Option, pub project_path: String, pub labels: Vec, pub paths: Vec, pub snippet: String, pub score: f64, #[serde(skip_serializing_if = "Option::is_none")] pub explain: Option, } /// Ranking explanation for --explain output. #[derive(Debug, Serialize)] pub struct ExplainData { pub vector_rank: Option, pub fts_rank: Option, pub rrf_score: f64, } /// Search response wrapper. #[derive(Debug, Serialize)] pub struct SearchResponse { pub query: String, pub mode: String, pub total_results: usize, pub results: Vec, pub warnings: Vec, } /// Build SearchFilters from CLI args. pub struct SearchCliFilters { pub source_type: Option, pub author: Option, pub project: Option, pub labels: Vec, pub path: Option, pub after: Option, pub updated_after: Option, pub limit: usize, } /// Run a lexical search query. pub fn run_search( config: &Config, query: &str, cli_filters: SearchCliFilters, fts_mode: FtsQueryMode, explain: bool, ) -> Result { let db_path = get_db_path(config.storage.db_path.as_deref()); let conn = create_connection(&db_path)?; // Check if any documents exist let doc_count: i64 = conn .query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0)) .unwrap_or(0); if doc_count == 0 { return Ok(SearchResponse { query: query.to_string(), mode: "lexical".to_string(), total_results: 0, results: vec![], warnings: vec!["No documents indexed. Run 'lore generate-docs' first.".to_string()], }); } // Build filters let source_type = cli_filters .source_type .as_deref() .and_then(SourceType::parse); let project_id = cli_filters .project .as_deref() .map(|p| resolve_project(&conn, p)) .transpose()?; let after = cli_filters .after .as_deref() .map(|s| { parse_since(s).ok_or_else(|| { LoreError::Other(format!( "Invalid --after value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.", s )) }) }) .transpose()?; let updated_after = cli_filters .updated_after .as_deref() .map(|s| { parse_since(s).ok_or_else(|| { LoreError::Other(format!( "Invalid --updated-after value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.", s )) }) }) .transpose()?; let path = cli_filters.path.as_deref().map(|p| { if p.ends_with('/') { PathFilter::Prefix(p.to_string()) } else { PathFilter::Exact(p.to_string()) } }); let filters = SearchFilters { source_type, author: cli_filters.author, project_id, after, updated_after, labels: cli_filters.labels, path, limit: cli_filters.limit, }; // Adaptive recall: wider initial fetch when filters applied let requested = filters.clamp_limit(); let top_k = if filters.has_any_filter() { (requested * 50).clamp(200, 1500) } else { (requested * 10).clamp(50, 1500) }; // FTS search let fts_results = search_fts(&conn, query, top_k, fts_mode)?; let fts_tuples: Vec<(i64, f64)> = fts_results .iter() .map(|r| (r.document_id, r.bm25_score)) .collect(); // Build snippet map before ranking let snippet_map: std::collections::HashMap = fts_results .iter() .map(|r| (r.document_id, r.snippet.clone())) .collect(); // RRF ranking (single-list for lexical mode) let ranked = rank_rrf(&[], &fts_tuples); let ranked_ids: Vec = ranked.iter().map(|r| r.document_id).collect(); // Apply post-retrieval filters let filtered_ids = apply_filters(&conn, &ranked_ids, &filters)?; if filtered_ids.is_empty() { return Ok(SearchResponse { query: query.to_string(), mode: "lexical".to_string(), total_results: 0, results: vec![], warnings: vec![], }); } // Hydrate results in single round-trip let hydrated = hydrate_results(&conn, &filtered_ids)?; // Build display results preserving filter order let rrf_map: std::collections::HashMap = ranked.iter().map(|r| (r.document_id, r)).collect(); let mut results: Vec = Vec::with_capacity(hydrated.len()); for row in &hydrated { let rrf = rrf_map.get(&row.document_id); let fts_snippet = snippet_map.get(&row.document_id).map(|s| s.as_str()); let snippet = get_result_snippet(fts_snippet, &row.content_text); let explain_data = if explain { rrf.map(|r| ExplainData { vector_rank: r.vector_rank, fts_rank: r.fts_rank, rrf_score: r.rrf_score, }) } else { None }; results.push(SearchResultDisplay { document_id: row.document_id, source_type: row.source_type.clone(), title: row.title.clone().unwrap_or_default(), url: row.url.clone(), author: row.author.clone(), created_at: row.created_at.map(ms_to_iso), updated_at: row.updated_at.map(ms_to_iso), project_path: row.project_path.clone(), labels: row.labels.clone(), paths: row.paths.clone(), snippet, score: rrf.map(|r| r.normalized_score).unwrap_or(0.0), explain: explain_data, }); } Ok(SearchResponse { query: query.to_string(), mode: "lexical".to_string(), total_results: results.len(), results, warnings: vec![], }) } /// Raw row from hydration query. struct HydratedRow { document_id: i64, source_type: String, title: Option, url: Option, author: Option, created_at: Option, updated_at: Option, content_text: String, project_path: String, labels: Vec, paths: Vec, } /// Hydrate document IDs into full display rows in a single query. /// /// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key. /// Labels and paths fetched via correlated json_group_array subqueries. fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result> { if document_ids.is_empty() { return Ok(Vec::new()); } let ids_json = serde_json::to_string(document_ids).map_err(|e| LoreError::Other(e.to_string()))?; let sql = r#" SELECT d.id, d.source_type, d.title, d.url, d.author_username, d.created_at, d.updated_at, d.content_text, p.path_with_namespace AS project_path, (SELECT json_group_array(dl.label_name) FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json, (SELECT json_group_array(dp.path) FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json FROM json_each(?1) AS j JOIN documents d ON d.id = j.value JOIN projects p ON p.id = d.project_id ORDER BY j.key "#; let mut stmt = conn.prepare(sql)?; let rows = stmt .query_map([ids_json], |row| { let labels_json: String = row.get(9)?; let paths_json: String = row.get(10)?; Ok(HydratedRow { document_id: row.get(0)?, source_type: row.get(1)?, title: row.get(2)?, url: row.get(3)?, author: row.get(4)?, created_at: row.get(5)?, updated_at: row.get(6)?, content_text: row.get(7)?, project_path: row.get(8)?, labels: parse_json_array(&labels_json), paths: parse_json_array(&paths_json), }) })? .collect::, _>>()?; Ok(rows) } /// Parse a JSON array string into a Vec, filtering out null/empty. fn parse_json_array(json: &str) -> Vec { serde_json::from_str::>(json) .unwrap_or_default() .into_iter() .filter_map(|v| v.as_str().map(|s| s.to_string())) .filter(|s| !s.is_empty()) .collect() } /// Print human-readable search results. pub fn print_search_results(response: &SearchResponse) { if !response.warnings.is_empty() { for w in &response.warnings { eprintln!("{} {}", style("Warning:").yellow(), w); } } if response.results.is_empty() { println!("No results found for '{}'", style(&response.query).bold()); return; } println!( "{} results for '{}' ({})", response.total_results, style(&response.query).bold(), response.mode ); println!(); for (i, result) in response.results.iter().enumerate() { let type_prefix = match result.source_type.as_str() { "issue" => "Issue", "merge_request" => "MR", "discussion" => "Discussion", _ => &result.source_type, }; println!( "[{}] {} - {} (score: {:.2})", i + 1, style(type_prefix).cyan(), result.title, result.score ); if let Some(ref url) = result.url { println!(" {}", style(url).dim()); } println!( " {} | {}", style(&result.project_path).dim(), result .author .as_deref() .map(|a| format!("@{}", a)) .unwrap_or_default() ); if !result.labels.is_empty() { println!(" Labels: {}", result.labels.join(", ")); } // Strip HTML tags from snippet for terminal display let clean_snippet = result.snippet.replace("", "").replace("", ""); println!(" {}", style(clean_snippet).dim()); if let Some(ref explain) = result.explain { println!( " {} fts_rank={} rrf_score={:.6}", style("[explain]").magenta(), explain .fts_rank .map(|r| r.to_string()) .unwrap_or_else(|| "-".into()), explain.rrf_score ); } println!(); } } /// JSON output structures. #[derive(Serialize)] struct SearchJsonOutput<'a> { ok: bool, data: &'a SearchResponse, meta: SearchMeta, } #[derive(Serialize)] struct SearchMeta { elapsed_ms: u64, } /// Print JSON robot-mode output. pub fn print_search_results_json(response: &SearchResponse, elapsed_ms: u64) { let output = SearchJsonOutput { ok: true, data: response, meta: SearchMeta { elapsed_ms }, }; println!("{}", serde_json::to_string(&output).unwrap()); }