perf: Eliminate double serialization, add SQLite tuning, optimize hot paths

11 isomorphic performance fixes from deep audit (no behavior changes):

- Eliminate double serialization: store_payload now accepts pre-serialized
  bytes (&[u8]) instead of re-serializing from serde_json::Value. Uses
  Cow<[u8]> for zero-copy when compression is disabled.
- Add SQLite cache_size (64MB) and mmap_size (256MB) pragmas
- Replace SELECT-then-INSERT label upserts with INSERT...ON CONFLICT
  RETURNING in both issues.rs and merge_requests.rs
- Replace INSERT + SELECT milestone upsert with RETURNING
- Use prepare_cached for 5 hot-path queries in extractor.rs
- Optimize compute_list_hash: index-sort + incremental SHA-256 instead
  of clone+sort+join+hash
- Pre-allocate embedding float-to-bytes buffer with Vec::with_capacity
- Replace RandomState::new() in rand_jitter with atomic counter XOR nanos
- Remove redundant per-note payload storage (discussion payload contains
  all notes already)
- Change transform_issue to accept &GitLabIssue (avoids full struct clone)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-04 08:12:37 -05:00
parent f5b4a765b7
commit ee5c5f9645
10 changed files with 172 additions and 157 deletions

View File

@@ -58,7 +58,7 @@ fn parse_timestamp(ts: &str) -> Result<i64, TransformError> {
}
/// Transform a GitLab issue into local schema format.
pub fn transform_issue(issue: GitLabIssue) -> Result<IssueWithMetadata, TransformError> {
pub fn transform_issue(issue: &GitLabIssue) -> Result<IssueWithMetadata, TransformError> {
let created_at = parse_timestamp(&issue.created_at)?;
let updated_at = parse_timestamp(&issue.updated_at)?;
@@ -83,17 +83,17 @@ pub fn transform_issue(issue: GitLabIssue) -> Result<IssueWithMetadata, Transfor
gitlab_id: issue.id,
iid: issue.iid,
project_id: issue.project_id,
title: issue.title,
description: issue.description,
state: issue.state,
author_username: issue.author.username,
title: issue.title.clone(),
description: issue.description.clone(),
state: issue.state.clone(),
author_username: issue.author.username.clone(),
created_at,
updated_at,
web_url: issue.web_url,
due_date: issue.due_date,
web_url: issue.web_url.clone(),
due_date: issue.due_date.clone(),
milestone_title,
},
label_names: issue.labels,
label_names: issue.labels.clone(),
assignee_usernames,
milestone,
})
@@ -131,7 +131,7 @@ mod tests {
#[test]
fn transforms_issue_with_all_fields() {
let issue = make_test_issue();
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
assert_eq!(result.issue.gitlab_id, 12345);
assert_eq!(result.issue.iid, 42);
@@ -154,14 +154,14 @@ mod tests {
let mut issue = make_test_issue();
issue.description = None;
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
assert!(result.issue.description.is_none());
}
#[test]
fn extracts_label_names() {
let issue = make_test_issue();
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
assert_eq!(result.label_names.len(), 2);
assert_eq!(result.label_names[0], "bug");
@@ -173,14 +173,14 @@ mod tests {
let mut issue = make_test_issue();
issue.labels = vec![];
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
assert!(result.label_names.is_empty());
}
#[test]
fn parses_timestamps_to_ms_epoch() {
let issue = make_test_issue();
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
// 2024-01-15T10:00:00.000Z = 1705312800000 ms
assert_eq!(result.issue.created_at, 1705312800000);
@@ -194,7 +194,7 @@ mod tests {
// GitLab can return timestamps with timezone offset
issue.created_at = "2024-01-15T05:00:00-05:00".to_string();
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
// 05:00 EST = 10:00 UTC = same as original test
assert_eq!(result.issue.created_at, 1705312800000);
}
@@ -215,7 +215,7 @@ mod tests {
},
];
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
assert_eq!(result.assignee_usernames.len(), 2);
assert_eq!(result.assignee_usernames[0], "alice");
assert_eq!(result.assignee_usernames[1], "bob");
@@ -235,7 +235,7 @@ mod tests {
web_url: Some("https://gitlab.example.com/-/milestones/5".to_string()),
});
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
// Denormalized title on issue for quick display
assert_eq!(result.issue.milestone_title, Some("v1.0".to_string()));
@@ -258,7 +258,7 @@ mod tests {
#[test]
fn handles_missing_milestone() {
let issue = make_test_issue();
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
assert!(result.issue.milestone_title.is_none());
assert!(result.milestone.is_none());
@@ -269,7 +269,7 @@ mod tests {
let mut issue = make_test_issue();
issue.due_date = Some("2024-02-15".to_string());
let result = transform_issue(issue).unwrap();
let result = transform_issue(&issue).unwrap();
assert_eq!(result.issue.due_date, Some("2024-02-15".to_string()));
}
}