perf: Eliminate double serialization, add SQLite tuning, optimize hot paths
11 isomorphic performance fixes from deep audit (no behavior changes): - Eliminate double serialization: store_payload now accepts pre-serialized bytes (&[u8]) instead of re-serializing from serde_json::Value. Uses Cow<[u8]> for zero-copy when compression is disabled. - Add SQLite cache_size (64MB) and mmap_size (256MB) pragmas - Replace SELECT-then-INSERT label upserts with INSERT...ON CONFLICT RETURNING in both issues.rs and merge_requests.rs - Replace INSERT + SELECT milestone upsert with RETURNING - Use prepare_cached for 5 hot-path queries in extractor.rs - Optimize compute_list_hash: index-sort + incremental SHA-256 instead of clone+sort+join+hash - Pre-allocate embedding float-to-bytes buffer with Vec::with_capacity - Replace RandomState::new() in rand_jitter with atomic counter XOR nanos - Remove redundant per-note payload storage (discussion payload contains all notes already) - Change transform_issue to accept &GitLabIssue (avoids full struct clone) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -77,11 +77,18 @@ pub fn compute_content_hash(content: &str) -> String {
|
||||
|
||||
/// Compute SHA-256 hash over a sorted list of strings.
|
||||
/// Used for labels_hash and paths_hash to detect changes efficiently.
|
||||
/// Sorts by index reference to avoid cloning, hashes incrementally to avoid join allocation.
|
||||
pub fn compute_list_hash(items: &[String]) -> String {
|
||||
let mut sorted = items.to_vec();
|
||||
sorted.sort();
|
||||
let joined = sorted.join("\n");
|
||||
compute_content_hash(&joined)
|
||||
let mut indices: Vec<usize> = (0..items.len()).collect();
|
||||
indices.sort_by(|a, b| items[*a].cmp(&items[*b]));
|
||||
let mut hasher = Sha256::new();
|
||||
for (i, &idx) in indices.iter().enumerate() {
|
||||
if i > 0 {
|
||||
hasher.update(b"\n");
|
||||
}
|
||||
hasher.update(items[idx].as_bytes());
|
||||
}
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
/// Extract a searchable document from an issue.
|
||||
@@ -132,7 +139,7 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
|
||||
};
|
||||
|
||||
// Query labels via junction table
|
||||
let mut label_stmt = conn.prepare(
|
||||
let mut label_stmt = conn.prepare_cached(
|
||||
"SELECT l.name FROM issue_labels il
|
||||
JOIN labels l ON l.id = il.label_id
|
||||
WHERE il.issue_id = ?1
|
||||
@@ -245,7 +252,7 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
|
||||
};
|
||||
|
||||
// Query labels via junction table
|
||||
let mut label_stmt = conn.prepare(
|
||||
let mut label_stmt = conn.prepare_cached(
|
||||
"SELECT l.name FROM mr_labels ml
|
||||
JOIN labels l ON l.id = ml.label_id
|
||||
WHERE ml.merge_request_id = ?1
|
||||
@@ -373,7 +380,7 @@ pub fn extract_discussion_document(
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
// Query parent labels
|
||||
let mut label_stmt = conn.prepare(
|
||||
let mut label_stmt = conn.prepare_cached(
|
||||
"SELECT l.name FROM issue_labels il
|
||||
JOIN labels l ON l.id = il.label_id
|
||||
WHERE il.issue_id = ?1
|
||||
@@ -407,7 +414,7 @@ pub fn extract_discussion_document(
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
// Query parent labels
|
||||
let mut label_stmt = conn.prepare(
|
||||
let mut label_stmt = conn.prepare_cached(
|
||||
"SELECT l.name FROM mr_labels ml
|
||||
JOIN labels l ON l.id = ml.label_id
|
||||
WHERE ml.merge_request_id = ?1
|
||||
@@ -423,7 +430,7 @@ pub fn extract_discussion_document(
|
||||
};
|
||||
|
||||
// Query non-system notes in thread order
|
||||
let mut note_stmt = conn.prepare(
|
||||
let mut note_stmt = conn.prepare_cached(
|
||||
"SELECT n.author_username, n.body, n.created_at, n.gitlab_id,
|
||||
n.note_type, n.position_old_path, n.position_new_path
|
||||
FROM notes n
|
||||
@@ -657,6 +664,7 @@ mod tests {
|
||||
updated_at INTEGER NOT NULL,
|
||||
last_seen_at INTEGER NOT NULL,
|
||||
discussions_synced_for_updated_at INTEGER,
|
||||
resource_events_synced_for_updated_at INTEGER,
|
||||
web_url TEXT,
|
||||
raw_payload_id INTEGER
|
||||
);
|
||||
@@ -899,6 +907,7 @@ mod tests {
|
||||
discussions_sync_last_attempt_at INTEGER,
|
||||
discussions_sync_attempts INTEGER DEFAULT 0,
|
||||
discussions_sync_last_error TEXT,
|
||||
resource_events_synced_for_updated_at INTEGER,
|
||||
web_url TEXT,
|
||||
raw_payload_id INTEGER
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user