use std::collections::HashMap; use console::style; use serde::Serialize; use crate::Config; use crate::core::db::create_connection; use crate::core::error::{LoreError, Result}; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::core::time::{ms_to_iso, parse_since}; use crate::documents::SourceType; use crate::embedding::ollama::{OllamaClient, OllamaConfig}; use crate::search::{ FtsQueryMode, HybridResult, PathFilter, SearchFilters, SearchMode, get_result_snippet, search_fts, search_hybrid, }; #[derive(Debug, Serialize)] pub struct SearchResultDisplay { pub document_id: i64, pub source_type: String, pub title: String, pub url: Option, pub author: Option, pub created_at: Option, pub updated_at: Option, pub project_path: String, pub labels: Vec, pub paths: Vec, pub snippet: String, pub score: f64, #[serde(skip_serializing_if = "Option::is_none")] pub explain: Option, } #[derive(Debug, Serialize)] pub struct ExplainData { pub vector_rank: Option, pub fts_rank: Option, pub rrf_score: f64, } #[derive(Debug, Serialize)] pub struct SearchResponse { pub query: String, pub mode: String, pub total_results: usize, pub results: Vec, pub warnings: Vec, } pub struct SearchCliFilters { pub source_type: Option, pub author: Option, pub project: Option, pub labels: Vec, pub path: Option, pub since: Option, pub updated_since: Option, pub limit: usize, } pub async fn run_search( config: &Config, query: &str, cli_filters: SearchCliFilters, fts_mode: FtsQueryMode, requested_mode: &str, explain: bool, ) -> Result { let db_path = get_db_path(config.storage.db_path.as_deref()); let conn = create_connection(&db_path)?; let mut warnings: Vec = Vec::new(); let actual_mode = SearchMode::parse(requested_mode).unwrap_or(SearchMode::Hybrid); let client = if actual_mode != SearchMode::Lexical { let ollama_cfg = &config.embedding; Some(OllamaClient::new(OllamaConfig { base_url: ollama_cfg.base_url.clone(), model: ollama_cfg.model.clone(), ..OllamaConfig::default() })) } else { None }; let doc_count: i64 = conn .query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0)) .unwrap_or(0); if doc_count == 0 { warnings.push("No documents indexed. Run 'lore generate-docs' first.".to_string()); return Ok(SearchResponse { query: query.to_string(), mode: actual_mode.as_str().to_string(), total_results: 0, results: vec![], warnings, }); } let source_type = cli_filters .source_type .as_deref() .and_then(SourceType::parse); let project_id = cli_filters .project .as_deref() .map(|p| resolve_project(&conn, p)) .transpose()?; let since = cli_filters .since .as_deref() .map(|s| { parse_since(s).ok_or_else(|| { LoreError::Other(format!( "Invalid --since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.", s )) }) }) .transpose()?; let updated_since = cli_filters .updated_since .as_deref() .map(|s| { parse_since(s).ok_or_else(|| { LoreError::Other(format!( "Invalid --updated-since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.", s )) }) }) .transpose()?; let path = cli_filters.path.as_deref().map(|p| { if p.ends_with('/') { PathFilter::Prefix(p.to_string()) } else { PathFilter::Exact(p.to_string()) } }); let filters = SearchFilters { source_type, author: cli_filters.author, project_id, since, updated_since, labels: cli_filters.labels, path, limit: cli_filters.limit, }; // Run FTS separately for snippet extraction (search_hybrid doesn't return snippets). let snippet_top_k = filters .clamp_limit() .checked_mul(10) .unwrap_or(500) .clamp(50, 1500); let fts_results = search_fts(&conn, query, snippet_top_k, fts_mode)?; let snippet_map: HashMap = fts_results .iter() .map(|r| (r.document_id, r.snippet.clone())) .collect(); // search_hybrid handles recall sizing, RRF ranking, and filter application internally. let (hybrid_results, mut hybrid_warnings) = search_hybrid( &conn, client.as_ref(), query, actual_mode, &filters, fts_mode, ) .await?; warnings.append(&mut hybrid_warnings); if hybrid_results.is_empty() { return Ok(SearchResponse { query: query.to_string(), mode: actual_mode.as_str().to_string(), total_results: 0, results: vec![], warnings, }); } let ranked_ids: Vec = hybrid_results.iter().map(|r| r.document_id).collect(); let hydrated = hydrate_results(&conn, &ranked_ids)?; let hybrid_map: HashMap = hybrid_results.iter().map(|r| (r.document_id, r)).collect(); let mut results: Vec = Vec::with_capacity(hydrated.len()); for row in &hydrated { let hr = hybrid_map.get(&row.document_id); let fts_snippet = snippet_map.get(&row.document_id).map(|s| s.as_str()); let snippet = get_result_snippet(fts_snippet, &row.content_text); let explain_data = if explain { hr.map(|r| ExplainData { vector_rank: r.vector_rank, fts_rank: r.fts_rank, rrf_score: r.rrf_score, }) } else { None }; results.push(SearchResultDisplay { document_id: row.document_id, source_type: row.source_type.clone(), title: row.title.clone().unwrap_or_default(), url: row.url.clone(), author: row.author.clone(), created_at: row.created_at.map(ms_to_iso), updated_at: row.updated_at.map(ms_to_iso), project_path: row.project_path.clone(), labels: row.labels.clone(), paths: row.paths.clone(), snippet, score: hr.map(|r| r.score).unwrap_or(0.0), explain: explain_data, }); } Ok(SearchResponse { query: query.to_string(), mode: actual_mode.as_str().to_string(), total_results: results.len(), results, warnings, }) } struct HydratedRow { document_id: i64, source_type: String, title: Option, url: Option, author: Option, created_at: Option, updated_at: Option, content_text: String, project_path: String, labels: Vec, paths: Vec, } fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result> { if document_ids.is_empty() { return Ok(Vec::new()); } let ids_json = serde_json::to_string(document_ids).map_err(|e| LoreError::Other(e.to_string()))?; let sql = r#" SELECT d.id, d.source_type, d.title, d.url, d.author_username, d.created_at, d.updated_at, d.content_text, p.path_with_namespace AS project_path, (SELECT json_group_array(dl.label_name) FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json, (SELECT json_group_array(dp.path) FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json FROM json_each(?1) AS j JOIN documents d ON d.id = j.value JOIN projects p ON p.id = d.project_id ORDER BY j.key "#; let mut stmt = conn.prepare(sql)?; let rows = stmt .query_map([ids_json], |row| { let labels_json: String = row.get(9)?; let paths_json: String = row.get(10)?; Ok(HydratedRow { document_id: row.get(0)?, source_type: row.get(1)?, title: row.get(2)?, url: row.get(3)?, author: row.get(4)?, created_at: row.get(5)?, updated_at: row.get(6)?, content_text: row.get(7)?, project_path: row.get(8)?, labels: parse_json_array(&labels_json), paths: parse_json_array(&paths_json), }) })? .collect::, _>>()?; Ok(rows) } fn parse_json_array(json: &str) -> Vec { serde_json::from_str::>(json) .unwrap_or_default() .into_iter() .filter_map(|v| v.as_str().map(|s| s.to_string())) .filter(|s| !s.is_empty()) .collect() } pub fn print_search_results(response: &SearchResponse) { if !response.warnings.is_empty() { for w in &response.warnings { eprintln!("{} {}", style("Warning:").yellow(), w); } } if response.results.is_empty() { println!("No results found for '{}'", style(&response.query).bold()); return; } println!( "{} results for '{}' ({})", response.total_results, style(&response.query).bold(), response.mode ); println!(); for (i, result) in response.results.iter().enumerate() { let type_prefix = match result.source_type.as_str() { "issue" => "Issue", "merge_request" => "MR", "discussion" => "Discussion", "note" => "Note", _ => &result.source_type, }; println!( "[{}] {} - {} (score: {:.2})", i + 1, style(type_prefix).cyan(), result.title, result.score ); if let Some(ref url) = result.url { println!(" {}", style(url).dim()); } println!( " {} | {}", style(&result.project_path).dim(), result .author .as_deref() .map(|a| format!("@{}", a)) .unwrap_or_default() ); if !result.labels.is_empty() { println!(" Labels: {}", result.labels.join(", ")); } let clean_snippet = result.snippet.replace("", "").replace("", ""); println!(" {}", style(clean_snippet).dim()); if let Some(ref explain) = result.explain { println!( " {} vector_rank={} fts_rank={} rrf_score={:.6}", style("[explain]").magenta(), explain .vector_rank .map(|r| r.to_string()) .unwrap_or_else(|| "-".into()), explain .fts_rank .map(|r| r.to_string()) .unwrap_or_else(|| "-".into()), explain.rrf_score ); } println!(); } } #[derive(Serialize)] struct SearchJsonOutput<'a> { ok: bool, data: &'a SearchResponse, meta: SearchMeta, } #[derive(Serialize)] struct SearchMeta { elapsed_ms: u64, } pub fn print_search_results_json( response: &SearchResponse, elapsed_ms: u64, fields: Option<&[String]>, ) { let output = SearchJsonOutput { ok: true, data: response, meta: SearchMeta { elapsed_ms }, }; let mut value = serde_json::to_value(&output).unwrap(); if let Some(f) = fields { let expanded = crate::cli::robot::expand_fields_preset(f, "search"); crate::cli::robot::filter_fields(&mut value, "results", &expanded); } println!("{}", serde_json::to_string(&value).unwrap()); }