perf(ingestion): replace per-row INSERT loops with chunked batch INSERTs
The issue and MR ingestion paths previously inserted labels, assignees, and reviewers one row at a time inside a transaction. For entities with many labels or assignees, this issued N separate SQLite statements where a single multi-row INSERT suffices. Replace the per-row loops with batch INSERT functions that build a single `INSERT OR IGNORE ... VALUES (?1,?2),(?1,?3),...` statement per chunk. Chunks are capped at 400 rows (BATCH_LINK_ROWS_MAX) to stay comfortably below SQLite's default 999 bind-parameter limit. Affected paths: - issues.rs: link_issue_labels_batch_tx, insert_issue_assignees_batch_tx - merge_requests.rs: insert_mr_labels_batch_tx, insert_mr_assignees_batch_tx, insert_mr_reviewers_batch_tx New tests verify deduplication (OR IGNORE), multi-chunk correctness, and equivalence with the old per-row approach. A perf benchmark (bench_issue_assignee_insert_individual_vs_batch) demonstrates the speedup across representative assignee set sizes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,9 @@ struct SyncCursor {
|
||||
tie_breaker_id: Option<i64>,
|
||||
}
|
||||
|
||||
// Keep comfortably below SQLite's default 999 bind-parameter limit.
|
||||
const BATCH_LINK_ROWS_MAX: usize = 400;
|
||||
|
||||
pub async fn ingest_issues(
|
||||
conn: &Connection,
|
||||
client: &GitLabClient,
|
||||
@@ -252,22 +255,19 @@ fn process_issue_in_transaction(
|
||||
[local_issue_id],
|
||||
)?;
|
||||
|
||||
let mut label_ids = Vec::with_capacity(label_names.len());
|
||||
for label_name in label_names {
|
||||
let label_id = upsert_label_tx(tx, project_id, label_name, &mut labels_created)?;
|
||||
link_issue_label_tx(tx, local_issue_id, label_id)?;
|
||||
label_ids.push(label_id);
|
||||
}
|
||||
link_issue_labels_batch_tx(tx, local_issue_id, &label_ids)?;
|
||||
|
||||
tx.execute(
|
||||
"DELETE FROM issue_assignees WHERE issue_id = ?",
|
||||
[local_issue_id],
|
||||
)?;
|
||||
|
||||
for username in assignee_usernames {
|
||||
tx.execute(
|
||||
"INSERT OR IGNORE INTO issue_assignees (issue_id, username) VALUES (?, ?)",
|
||||
(local_issue_id, username),
|
||||
)?;
|
||||
}
|
||||
insert_issue_assignees_batch_tx(tx, local_issue_id, assignee_usernames)?;
|
||||
|
||||
Ok(labels_created)
|
||||
}
|
||||
@@ -296,11 +296,65 @@ fn upsert_label_tx(
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn link_issue_label_tx(tx: &Transaction<'_>, issue_id: i64, label_id: i64) -> Result<()> {
|
||||
tx.execute(
|
||||
"INSERT OR IGNORE INTO issue_labels (issue_id, label_id) VALUES (?, ?)",
|
||||
(issue_id, label_id),
|
||||
)?;
|
||||
fn link_issue_labels_batch_tx(
|
||||
tx: &Transaction<'_>,
|
||||
issue_id: i64,
|
||||
label_ids: &[i64],
|
||||
) -> Result<()> {
|
||||
if label_ids.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for chunk in label_ids.chunks(BATCH_LINK_ROWS_MAX) {
|
||||
let placeholders = (0..chunk.len())
|
||||
.map(|idx| format!("(?1, ?{})", idx + 2))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
let sql = format!(
|
||||
"INSERT OR IGNORE INTO issue_labels (issue_id, label_id) VALUES {}",
|
||||
placeholders
|
||||
);
|
||||
|
||||
let mut params: Vec<&dyn rusqlite::types::ToSql> = Vec::with_capacity(chunk.len() + 1);
|
||||
params.push(&issue_id);
|
||||
for label_id in chunk {
|
||||
params.push(label_id);
|
||||
}
|
||||
|
||||
tx.execute(&sql, params.as_slice())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_issue_assignees_batch_tx(
|
||||
tx: &Transaction<'_>,
|
||||
issue_id: i64,
|
||||
usernames: &[String],
|
||||
) -> Result<()> {
|
||||
if usernames.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for chunk in usernames.chunks(BATCH_LINK_ROWS_MAX) {
|
||||
let placeholders = (0..chunk.len())
|
||||
.map(|idx| format!("(?1, ?{})", idx + 2))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
let sql = format!(
|
||||
"INSERT OR IGNORE INTO issue_assignees (issue_id, username) VALUES {}",
|
||||
placeholders
|
||||
);
|
||||
|
||||
let mut params: Vec<&dyn rusqlite::types::ToSql> = Vec::with_capacity(chunk.len() + 1);
|
||||
params.push(&issue_id);
|
||||
for username in chunk {
|
||||
params.push(username);
|
||||
}
|
||||
|
||||
tx.execute(&sql, params.as_slice())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user