perf: Eliminate double serialization, add SQLite tuning, optimize hot paths
11 isomorphic performance fixes from deep audit (no behavior changes): - Eliminate double serialization: store_payload now accepts pre-serialized bytes (&[u8]) instead of re-serializing from serde_json::Value. Uses Cow<[u8]> for zero-copy when compression is disabled. - Add SQLite cache_size (64MB) and mmap_size (256MB) pragmas - Replace SELECT-then-INSERT label upserts with INSERT...ON CONFLICT RETURNING in both issues.rs and merge_requests.rs - Replace INSERT + SELECT milestone upsert with RETURNING - Use prepare_cached for 5 hot-path queries in extractor.rs - Optimize compute_list_hash: index-sort + incremental SHA-256 instead of clone+sort+join+hash - Pre-allocate embedding float-to-bytes buffer with Vec::with_capacity - Replace RandomState::new() in rand_jitter with atomic counter XOR nanos - Remove redundant per-note payload storage (discussion payload contains all notes already) - Change transform_issue to accept &GitLabIssue (avoids full struct clone) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -196,8 +196,8 @@ fn process_single_issue(
|
||||
let now = now_ms();
|
||||
|
||||
// Transform issue first (outside transaction - no DB access)
|
||||
let payload_json = serde_json::to_value(issue)?;
|
||||
let transformed = transform_issue(issue.clone())?;
|
||||
let payload_bytes = serde_json::to_vec(issue)?;
|
||||
let transformed = transform_issue(issue)?;
|
||||
let issue_row = &transformed.issue;
|
||||
|
||||
// Wrap all DB operations in a transaction for atomicity
|
||||
@@ -207,7 +207,7 @@ fn process_single_issue(
|
||||
config,
|
||||
project_id,
|
||||
issue,
|
||||
&payload_json,
|
||||
&payload_bytes,
|
||||
issue_row,
|
||||
&transformed.label_names,
|
||||
&transformed.assignee_usernames,
|
||||
@@ -226,7 +226,7 @@ fn process_issue_in_transaction(
|
||||
config: &Config,
|
||||
project_id: i64,
|
||||
issue: &GitLabIssue,
|
||||
payload_json: &serde_json::Value,
|
||||
payload_bytes: &[u8],
|
||||
issue_row: &crate::gitlab::transformers::IssueRow,
|
||||
label_names: &[String],
|
||||
assignee_usernames: &[String],
|
||||
@@ -242,7 +242,7 @@ fn process_issue_in_transaction(
|
||||
project_id: Some(project_id),
|
||||
resource_type: "issue",
|
||||
gitlab_id: &issue.id.to_string(),
|
||||
payload: payload_json,
|
||||
json_bytes: payload_bytes,
|
||||
compress: config.storage.compress_raw_payloads,
|
||||
},
|
||||
)?;
|
||||
@@ -332,33 +332,27 @@ fn process_issue_in_transaction(
|
||||
}
|
||||
|
||||
/// Upsert a label within a transaction, returning its ID.
|
||||
/// Uses INSERT...ON CONFLICT...RETURNING for a single round-trip.
|
||||
fn upsert_label_tx(
|
||||
tx: &Transaction<'_>,
|
||||
project_id: i64,
|
||||
name: &str,
|
||||
created_count: &mut usize,
|
||||
) -> Result<i64> {
|
||||
// Try to get existing
|
||||
let existing: Option<i64> = tx
|
||||
.query_row(
|
||||
"SELECT id FROM labels WHERE project_id = ? AND name = ?",
|
||||
(project_id, name),
|
||||
|row| row.get(0),
|
||||
)
|
||||
.ok();
|
||||
let id: i64 = tx.query_row(
|
||||
"INSERT INTO labels (project_id, name) VALUES (?1, ?2)
|
||||
ON CONFLICT(project_id, name) DO UPDATE SET name = excluded.name
|
||||
RETURNING id",
|
||||
(project_id, name),
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
if let Some(id) = existing {
|
||||
return Ok(id);
|
||||
// If the rowid matches last_insert_rowid, this was a new insert
|
||||
if tx.last_insert_rowid() == id {
|
||||
*created_count += 1;
|
||||
}
|
||||
|
||||
// Insert new
|
||||
tx.execute(
|
||||
"INSERT INTO labels (project_id, name) VALUES (?, ?)",
|
||||
(project_id, name),
|
||||
)?;
|
||||
*created_count += 1;
|
||||
|
||||
Ok(tx.last_insert_rowid())
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Link an issue to a label within a transaction.
|
||||
@@ -371,12 +365,13 @@ fn link_issue_label_tx(tx: &Transaction<'_>, issue_id: i64, label_id: i64) -> Re
|
||||
}
|
||||
|
||||
/// Upsert a milestone within a transaction, returning its local ID.
|
||||
/// Uses RETURNING to avoid a separate SELECT round-trip.
|
||||
fn upsert_milestone_tx(
|
||||
tx: &Transaction<'_>,
|
||||
project_id: i64,
|
||||
milestone: &MilestoneRow,
|
||||
) -> Result<i64> {
|
||||
tx.execute(
|
||||
let local_id: i64 = tx.query_row(
|
||||
"INSERT INTO milestones (gitlab_id, project_id, iid, title, description, state, due_date, web_url)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
|
||||
ON CONFLICT(project_id, gitlab_id) DO UPDATE SET
|
||||
@@ -385,7 +380,8 @@ fn upsert_milestone_tx(
|
||||
description = excluded.description,
|
||||
state = excluded.state,
|
||||
due_date = excluded.due_date,
|
||||
web_url = excluded.web_url",
|
||||
web_url = excluded.web_url
|
||||
RETURNING id",
|
||||
(
|
||||
milestone.gitlab_id,
|
||||
project_id,
|
||||
@@ -396,12 +392,6 @@ fn upsert_milestone_tx(
|
||||
&milestone.due_date,
|
||||
&milestone.web_url,
|
||||
),
|
||||
)?;
|
||||
|
||||
// Get the local ID (whether inserted or updated)
|
||||
let local_id: i64 = tx.query_row(
|
||||
"SELECT id FROM milestones WHERE project_id = ? AND gitlab_id = ?",
|
||||
(project_id, milestone.gitlab_id),
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user