From 4b0535f85202bed3885a4988232f194c8c7d9a6c Mon Sep 17 00:00:00 2001 From: teernisse Date: Fri, 6 Mar 2026 17:07:17 -0500 Subject: [PATCH] perf(timeline): guard against overly broad seed queries Add pre-flight FTS count check before expensive bm25-ranked search. Queries matching >10,000 documents are rejected instantly with a suggestion to use a more specific query or --since filter. Prevents multi-minute CPU spin on queries like 'merge request' that match most of the corpus (106K/178K documents). --- src/timeline/seed.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/timeline/seed.rs b/src/timeline/seed.rs index caa44d0..fd26e31 100644 --- a/src/timeline/seed.rs +++ b/src/timeline/seed.rs @@ -48,6 +48,21 @@ pub async fn seed_timeline( }); } + // Guard: reject overly broad queries before running expensive ranked search. + // The count query (no bm25/snippet) is cheap even on broad matches. + const SEED_MATCH_CEILING: i64 = 10_000; + let match_count: i64 = conn.query_row( + "SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH ?1", + [&fts_query], + |row| row.get(0), + )?; + if match_count > SEED_MATCH_CEILING { + return Err(crate::core::error::LoreError::Other(format!( + "Query too broad: matched {match_count} documents (ceiling: {SEED_MATCH_CEILING}). \ + Use a more specific query or narrow with --since.", + ))); + } + // Use hybrid search for seed entity discovery (better recall than FTS alone). // search_hybrid gracefully falls back to FTS-only when Ollama is unavailable. let filters = SearchFilters {