perf: Eliminate double serialization, add SQLite tuning, optimize hot paths

11 isomorphic performance fixes from deep audit (no behavior changes): - Eliminate double serialization: store_payload now accepts pre-serialized bytes (&[u8]) instead of re-serializing from serde_json::Value. Uses Cow<[u8]> for zero-copy when compression is disabled. - Add SQLite cache_size (64MB) and mmap_size (256MB) pragmas - Replace SELECT-then-INSERT label upserts with INSERT...ON CONFLICT RETURNING in both issues.rs and merge_requests.rs - Replace INSERT + SELECT milestone upsert with RETURNING - Use prepare_cached for 5 hot-path queries in extractor.rs - Optimize compute_list_hash: index-sort + incremental SHA-256 instead of clone+sort+join+hash - Pre-allocate embedding float-to-bytes buffer with Vec::with_capacity - Replace RandomState::new() in rand_jitter with atomic counter XOR nanos - Remove redundant per-note payload storage (discussion payload contains all notes already) - Change transform_issue to accept &GitLabIssue (avoids full struct clone) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 08:12:37 -05:00
parent f5b4a765b7
commit ee5c5f9645
10 changed files with 172 additions and 157 deletions
--- a/src/documents/extractor.rs
+++ b/src/documents/extractor.rs
@@ -77,11 +77,18 @@ pub fn compute_content_hash(content: &str) -> String {

 /// Compute SHA-256 hash over a sorted list of strings.
 /// Used for labels_hash and paths_hash to detect changes efficiently.
+/// Sorts by index reference to avoid cloning, hashes incrementally to avoid join allocation.
 pub fn compute_list_hash(items: &[String]) -> String {
-    let mut sorted = items.to_vec();
-    sorted.sort();
-    let joined = sorted.join("\n");
-    compute_content_hash(&joined)
+    let mut indices: Vec<usize> = (0..items.len()).collect();
+    indices.sort_by(|a, b| items[*a].cmp(&items[*b]));
+    let mut hasher = Sha256::new();
+    for (i, &idx) in indices.iter().enumerate() {
+        if i > 0 {
+            hasher.update(b"\n");
+        }
+        hasher.update(items[idx].as_bytes());
+    }
+    format!("{:x}", hasher.finalize())
 }

 /// Extract a searchable document from an issue.
@@ -132,7 +139,7 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
    };

    // Query labels via junction table
-    let mut label_stmt = conn.prepare(
+    let mut label_stmt = conn.prepare_cached(
        "SELECT l.name FROM issue_labels il
         JOIN labels l ON l.id = il.label_id
         WHERE il.issue_id = ?1
@@ -245,7 +252,7 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
    };

    // Query labels via junction table
-    let mut label_stmt = conn.prepare(
+    let mut label_stmt = conn.prepare_cached(
        "SELECT l.name FROM mr_labels ml
         JOIN labels l ON l.id = ml.label_id
         WHERE ml.merge_request_id = ?1
@@ -373,7 +380,7 @@ pub fn extract_discussion_document(
                    Err(e) => return Err(e.into()),
                };
                // Query parent labels
-                let mut label_stmt = conn.prepare(
+                let mut label_stmt = conn.prepare_cached(
                    "SELECT l.name FROM issue_labels il
                     JOIN labels l ON l.id = il.label_id
                     WHERE il.issue_id = ?1
@@ -407,7 +414,7 @@ pub fn extract_discussion_document(
                    Err(e) => return Err(e.into()),
                };
                // Query parent labels
-                let mut label_stmt = conn.prepare(
+                let mut label_stmt = conn.prepare_cached(
                    "SELECT l.name FROM mr_labels ml
                     JOIN labels l ON l.id = ml.label_id
                     WHERE ml.merge_request_id = ?1
@@ -423,7 +430,7 @@ pub fn extract_discussion_document(
        };

    // Query non-system notes in thread order
-    let mut note_stmt = conn.prepare(
+    let mut note_stmt = conn.prepare_cached(
        "SELECT n.author_username, n.body, n.created_at, n.gitlab_id,
                n.note_type, n.position_old_path, n.position_new_path
         FROM notes n
@@ -657,6 +664,7 @@ mod tests {
                updated_at INTEGER NOT NULL,
                last_seen_at INTEGER NOT NULL,
                discussions_synced_for_updated_at INTEGER,
+                resource_events_synced_for_updated_at INTEGER,
                web_url TEXT,
                raw_payload_id INTEGER
            );
@@ -899,6 +907,7 @@ mod tests {
                discussions_sync_last_attempt_at INTEGER,
                discussions_sync_attempts INTEGER DEFAULT 0,
                discussions_sync_last_error TEXT,
+                resource_events_synced_for_updated_at INTEGER,
                web_url TEXT,
                raw_payload_id INTEGER
            );