fix(search): cap vector search k-value and add rowid assertion
The vector search multiplier could grow unbounded on documents with many chunks, producing enormous k values that cause SQLite to scan far more rows than necessary. Clamp the multiplier to [8, 200] and cap k at 10,000 to prevent degenerate performance on large corpora. Also adds a debug_assert in decode_rowid to catch negative rowids early — these indicate a bug in the encoding pipeline and should fail fast rather than silently produce garbage document IDs. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -51,8 +51,8 @@ pub fn search_vector(
|
||||
.collect();
|
||||
|
||||
let max_chunks = max_chunks_per_document(conn).max(1);
|
||||
let multiplier = ((max_chunks.unsigned_abs() as usize * 3 / 2) + 1).max(8);
|
||||
let k = limit * multiplier;
|
||||
let multiplier = ((max_chunks.unsigned_abs() as usize * 3 / 2) + 1).clamp(8, 200);
|
||||
let k = (limit * multiplier).min(10_000);
|
||||
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT rowid, distance
|
||||
|
||||
Reference in New Issue
Block a user