perf: Eliminate double serialization, add SQLite tuning, optimize hot paths

11 isomorphic performance fixes from deep audit (no behavior changes):

- Eliminate double serialization: store_payload now accepts pre-serialized
  bytes (&[u8]) instead of re-serializing from serde_json::Value. Uses
  Cow<[u8]> for zero-copy when compression is disabled.
- Add SQLite cache_size (64MB) and mmap_size (256MB) pragmas
- Replace SELECT-then-INSERT label upserts with INSERT...ON CONFLICT
  RETURNING in both issues.rs and merge_requests.rs
- Replace INSERT + SELECT milestone upsert with RETURNING
- Use prepare_cached for 5 hot-path queries in extractor.rs
- Optimize compute_list_hash: index-sort + incremental SHA-256 instead
  of clone+sort+join+hash
- Pre-allocate embedding float-to-bytes buffer with Vec::with_capacity
- Replace RandomState::new() in rand_jitter with atomic counter XOR nanos
- Remove redundant per-note payload storage (discussion payload contains
  all notes already)
- Change transform_issue to accept &GitLabIssue (avoids full struct clone)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-04 08:12:37 -05:00
parent f5b4a765b7
commit ee5c5f9645
10 changed files with 172 additions and 157 deletions

View File

@@ -111,14 +111,14 @@ async fn ingest_discussions_for_issue(
result.discussions_fetched += 1;
// Store raw payload
let payload_json = serde_json::to_value(&gitlab_discussion)?;
let payload_bytes = serde_json::to_vec(&gitlab_discussion)?;
let payload_id = store_payload(
conn,
StorePayloadOptions {
project_id: Some(local_project_id),
resource_type: "discussion",
gitlab_id: &gitlab_discussion.id,
payload: &payload_json,
json_bytes: &payload_bytes,
compress: config.storage.compress_raw_payloads,
},
)?;
@@ -156,25 +156,10 @@ async fn ingest_discussions_for_issue(
)?;
for note in notes {
// Store raw note payload
let note_payload_json = serde_json::to_value(
gitlab_discussion
.notes
.iter()
.find(|n| n.id == note.gitlab_id),
)?;
let note_payload_id = store_payload(
&tx,
StorePayloadOptions {
project_id: Some(local_project_id),
resource_type: "note",
gitlab_id: &note.gitlab_id.to_string(),
payload: &note_payload_json,
compress: config.storage.compress_raw_payloads,
},
)?;
insert_note(&tx, local_discussion_id, &note, note_payload_id)?;
// Note: per-note raw payload storage is skipped because the discussion
// payload (already stored above) contains all notes. The full note
// content is also stored in the notes table itself.
insert_note(&tx, local_discussion_id, &note, None)?;
}
tx.commit()?;
@@ -246,7 +231,7 @@ fn insert_note(
conn: &Connection,
discussion_id: i64,
note: &crate::gitlab::transformers::NormalizedNote,
payload_id: i64,
payload_id: Option<i64>,
) -> Result<()> {
conn.execute(
"INSERT INTO notes (