feat(bd-1ksf): wire hybrid search (FTS5 + vector + RRF) to CLI
Make run_search async, replace hardcoded lexical mode with SearchMode::parse(), wire search_hybrid() with OllamaClient for semantic/hybrid modes, graceful degradation when Ollama unavailable. Closes: bd-1ksf
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use console::style;
|
||||
use serde::Serialize;
|
||||
|
||||
@@ -8,9 +10,10 @@ use crate::core::paths::get_db_path;
|
||||
use crate::core::project::resolve_project;
|
||||
use crate::core::time::{ms_to_iso, parse_since};
|
||||
use crate::documents::SourceType;
|
||||
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
|
||||
use crate::search::{
|
||||
FtsQueryMode, PathFilter, SearchFilters, apply_filters, get_result_snippet, rank_rrf,
|
||||
search_fts,
|
||||
FtsQueryMode, HybridResult, PathFilter, SearchFilters, SearchMode, get_result_snippet,
|
||||
search_fts, search_hybrid,
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -58,7 +61,7 @@ pub struct SearchCliFilters {
|
||||
pub limit: usize,
|
||||
}
|
||||
|
||||
pub fn run_search(
|
||||
pub async fn run_search(
|
||||
config: &Config,
|
||||
query: &str,
|
||||
cli_filters: SearchCliFilters,
|
||||
@@ -71,15 +74,18 @@ pub fn run_search(
|
||||
|
||||
let mut warnings: Vec<String> = Vec::new();
|
||||
|
||||
// Determine actual mode: vector search requires embeddings, which need async + Ollama.
|
||||
// Until hybrid/semantic are wired up, we run lexical and warn if the user asked for more.
|
||||
let actual_mode = "lexical";
|
||||
if requested_mode != "lexical" {
|
||||
warnings.push(format!(
|
||||
"Requested mode '{}' is not yet available; falling back to lexical search.",
|
||||
requested_mode
|
||||
));
|
||||
}
|
||||
let actual_mode = SearchMode::parse(requested_mode).unwrap_or(SearchMode::Hybrid);
|
||||
|
||||
let client = if actual_mode != SearchMode::Lexical {
|
||||
let ollama_cfg = &config.embedding;
|
||||
Some(OllamaClient::new(OllamaConfig {
|
||||
base_url: ollama_cfg.base_url.clone(),
|
||||
model: ollama_cfg.model.clone(),
|
||||
..OllamaConfig::default()
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let doc_count: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0))
|
||||
@@ -89,7 +95,7 @@ pub fn run_search(
|
||||
warnings.push("No documents indexed. Run 'lore generate-docs' first.".to_string());
|
||||
return Ok(SearchResponse {
|
||||
query: query.to_string(),
|
||||
mode: actual_mode.to_string(),
|
||||
mode: actual_mode.as_str().to_string(),
|
||||
total_results: 0,
|
||||
results: vec![],
|
||||
warnings,
|
||||
@@ -151,52 +157,54 @@ pub fn run_search(
|
||||
limit: cli_filters.limit,
|
||||
};
|
||||
|
||||
let requested = filters.clamp_limit();
|
||||
let top_k = if filters.has_any_filter() {
|
||||
(requested * 50).clamp(200, 1500)
|
||||
} else {
|
||||
(requested * 10).clamp(50, 1500)
|
||||
};
|
||||
|
||||
let fts_results = search_fts(&conn, query, top_k, fts_mode)?;
|
||||
let fts_tuples: Vec<(i64, f64)> = fts_results
|
||||
.iter()
|
||||
.map(|r| (r.document_id, r.bm25_score))
|
||||
.collect();
|
||||
|
||||
let snippet_map: std::collections::HashMap<i64, String> = fts_results
|
||||
// Run FTS separately for snippet extraction (search_hybrid doesn't return snippets).
|
||||
let snippet_top_k = filters
|
||||
.clamp_limit()
|
||||
.checked_mul(10)
|
||||
.unwrap_or(500)
|
||||
.clamp(50, 1500);
|
||||
let fts_results = search_fts(&conn, query, snippet_top_k, fts_mode)?;
|
||||
let snippet_map: HashMap<i64, String> = fts_results
|
||||
.iter()
|
||||
.map(|r| (r.document_id, r.snippet.clone()))
|
||||
.collect();
|
||||
|
||||
let ranked = rank_rrf(&[], &fts_tuples);
|
||||
let ranked_ids: Vec<i64> = ranked.iter().map(|r| r.document_id).collect();
|
||||
// search_hybrid handles recall sizing, RRF ranking, and filter application internally.
|
||||
let (hybrid_results, mut hybrid_warnings) = search_hybrid(
|
||||
&conn,
|
||||
client.as_ref(),
|
||||
query,
|
||||
actual_mode,
|
||||
&filters,
|
||||
fts_mode,
|
||||
)
|
||||
.await?;
|
||||
warnings.append(&mut hybrid_warnings);
|
||||
|
||||
let filtered_ids = apply_filters(&conn, &ranked_ids, &filters)?;
|
||||
|
||||
if filtered_ids.is_empty() {
|
||||
if hybrid_results.is_empty() {
|
||||
return Ok(SearchResponse {
|
||||
query: query.to_string(),
|
||||
mode: actual_mode.to_string(),
|
||||
mode: actual_mode.as_str().to_string(),
|
||||
total_results: 0,
|
||||
results: vec![],
|
||||
warnings,
|
||||
});
|
||||
}
|
||||
|
||||
let hydrated = hydrate_results(&conn, &filtered_ids)?;
|
||||
let ranked_ids: Vec<i64> = hybrid_results.iter().map(|r| r.document_id).collect();
|
||||
let hydrated = hydrate_results(&conn, &ranked_ids)?;
|
||||
|
||||
let rrf_map: std::collections::HashMap<i64, &crate::search::RrfResult> =
|
||||
ranked.iter().map(|r| (r.document_id, r)).collect();
|
||||
let hybrid_map: HashMap<i64, &HybridResult> =
|
||||
hybrid_results.iter().map(|r| (r.document_id, r)).collect();
|
||||
|
||||
let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
|
||||
for row in &hydrated {
|
||||
let rrf = rrf_map.get(&row.document_id);
|
||||
let hr = hybrid_map.get(&row.document_id);
|
||||
let fts_snippet = snippet_map.get(&row.document_id).map(|s| s.as_str());
|
||||
let snippet = get_result_snippet(fts_snippet, &row.content_text);
|
||||
|
||||
let explain_data = if explain {
|
||||
rrf.map(|r| ExplainData {
|
||||
hr.map(|r| ExplainData {
|
||||
vector_rank: r.vector_rank,
|
||||
fts_rank: r.fts_rank,
|
||||
rrf_score: r.rrf_score,
|
||||
@@ -217,14 +225,14 @@ pub fn run_search(
|
||||
labels: row.labels.clone(),
|
||||
paths: row.paths.clone(),
|
||||
snippet,
|
||||
score: rrf.map(|r| r.normalized_score).unwrap_or(0.0),
|
||||
score: hr.map(|r| r.score).unwrap_or(0.0),
|
||||
explain: explain_data,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(SearchResponse {
|
||||
query: query.to_string(),
|
||||
mode: actual_mode.to_string(),
|
||||
mode: actual_mode.as_str().to_string(),
|
||||
total_results: results.len(),
|
||||
results,
|
||||
warnings,
|
||||
@@ -360,8 +368,12 @@ pub fn print_search_results(response: &SearchResponse) {
|
||||
|
||||
if let Some(ref explain) = result.explain {
|
||||
println!(
|
||||
" {} fts_rank={} rrf_score={:.6}",
|
||||
" {} vector_rank={} fts_rank={} rrf_score={:.6}",
|
||||
style("[explain]").magenta(),
|
||||
explain
|
||||
.vector_rank
|
||||
.map(|r| r.to_string())
|
||||
.unwrap_or_else(|| "-".into()),
|
||||
explain
|
||||
.fts_rank
|
||||
.map(|r| r.to_string())
|
||||
|
||||
Reference in New Issue
Block a user