perf: Optimize SQL queries and reduce allocations in hot paths
Change detection queries (embedding/change_detector.rs): - Replace triple-EXISTS subquery pattern with LEFT JOIN + NULL check - SQLite now scans embedding_metadata once instead of three times - Semantically identical: returns docs needing embedding when no embedding exists, hash changed, or config mismatch Count queries (cli/commands/count.rs): - Consolidate 3 separate COUNT queries for issues into single query using conditional aggregation (CASE WHEN state = 'x' THEN 1) - Same optimization for MRs: 5 queries reduced to 1 Search filter queries (search/filters.rs): - Replace N separate EXISTS clauses for label filtering with single IN() clause with COUNT/GROUP BY HAVING pattern - For multi-label AND queries, this reduces N subqueries to 1 FTS tokenization (search/fts.rs): - Replace collect-into-Vec-then-join pattern with direct String building - Pre-allocate capacity hint for result string Discussion truncation (documents/truncation.rs): - Calculate total length without allocating concatenated string first - Only allocate full string when we know it fits within limit Embedding pipeline (embedding/pipeline.rs): - Add Vec::with_capacity hints for chunk work and cleared_docs hashset - Reduces reallocations during embedding batch processing Backoff calculation (core/backoff.rs): - Replace unchecked addition with saturating_add to prevent overflow - Add test case verifying overflow protection Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -97,13 +97,19 @@ pub fn apply_filters(
|
||||
param_idx += 1;
|
||||
}
|
||||
|
||||
for label in &filters.labels {
|
||||
if !filters.labels.is_empty() {
|
||||
let placeholders: Vec<String> = (0..filters.labels.len())
|
||||
.map(|i| format!("?{}", param_idx + i))
|
||||
.collect();
|
||||
sql.push_str(&format!(
|
||||
" AND EXISTS (SELECT 1 FROM document_labels dl WHERE dl.document_id = d.id AND dl.label_name = ?{})",
|
||||
param_idx
|
||||
" AND EXISTS (SELECT 1 FROM document_labels dl WHERE dl.document_id = d.id AND dl.label_name IN ({}) GROUP BY dl.document_id HAVING COUNT(DISTINCT dl.label_name) = {})",
|
||||
placeholders.join(","),
|
||||
filters.labels.len()
|
||||
));
|
||||
params.push(Box::new(label.clone()));
|
||||
param_idx += 1;
|
||||
for label in &filters.labels {
|
||||
params.push(Box::new(label.clone()));
|
||||
param_idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref path_filter) = filters.path {
|
||||
|
||||
@@ -23,22 +23,25 @@ pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let tokens: Vec<String> = trimmed
|
||||
.split_whitespace()
|
||||
.map(|token| {
|
||||
if let Some(stem) = token.strip_suffix('*')
|
||||
&& !stem.is_empty()
|
||||
&& stem.chars().all(|c| c.is_alphanumeric() || c == '_')
|
||||
{
|
||||
let escaped = stem.replace('"', "\"\"");
|
||||
return format!("\"{}\"*", escaped);
|
||||
}
|
||||
let escaped = token.replace('"', "\"\"");
|
||||
format!("\"{}\"", escaped)
|
||||
})
|
||||
.collect();
|
||||
|
||||
tokens.join(" ")
|
||||
let mut result = String::with_capacity(trimmed.len() + 20);
|
||||
for (i, token) in trimmed.split_whitespace().enumerate() {
|
||||
if i > 0 {
|
||||
result.push(' ');
|
||||
}
|
||||
if let Some(stem) = token.strip_suffix('*')
|
||||
&& !stem.is_empty()
|
||||
&& stem.chars().all(|c| c.is_alphanumeric() || c == '_')
|
||||
{
|
||||
result.push('"');
|
||||
result.push_str(&stem.replace('"', "\"\""));
|
||||
result.push_str("\"*");
|
||||
} else {
|
||||
result.push('"');
|
||||
result.push_str(&token.replace('"', "\"\""));
|
||||
result.push('"');
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user