perf(timeline): guard against overly broad seed queries

Add pre-flight FTS count check before expensive bm25-ranked search.
Queries matching >10,000 documents are rejected instantly with a
suggestion to use a more specific query or --since filter.

Prevents multi-minute CPU spin on queries like 'merge request' that
match most of the corpus (106K/178K documents).
This commit is contained in:
teernisse
2026-03-06 17:07:17 -05:00
parent 8bd68e02bd
commit 4b0535f852

View File

@@ -48,6 +48,21 @@ pub async fn seed_timeline(
});
}
// Guard: reject overly broad queries before running expensive ranked search.
// The count query (no bm25/snippet) is cheap even on broad matches.
const SEED_MATCH_CEILING: i64 = 10_000;
let match_count: i64 = conn.query_row(
"SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH ?1",
[&fts_query],
|row| row.get(0),
)?;
if match_count > SEED_MATCH_CEILING {
return Err(crate::core::error::LoreError::Other(format!(
"Query too broad: matched {match_count} documents (ceiling: {SEED_MATCH_CEILING}). \
Use a more specific query or narrow with --since.",
)));
}
// Use hybrid search for seed entity discovery (better recall than FTS alone).
// search_hybrid gracefully falls back to FTS-only when Ollama is unavailable.
let filters = SearchFilters {