From 16beb35a6937887a9a74ce135abbb759ad665931 Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Thu, 5 Feb 2026 17:35:42 -0500 Subject: [PATCH] perf(documents): batch INSERTs and writeln! in document pipeline Replace individual INSERT-per-label and INSERT-per-path loops in upsert_document_inner with single multi-row INSERT statements. For a document with 5 labels, this reduces 5 SQL round-trips to 1. Replace format!()+push_str() with writeln!() in all three document extractors (issue, MR, discussion). writeln! writes directly into the String buffer, avoiding the intermediate allocation that format! creates. Benchmarked at ~1.9x faster for string building and ~1.6x faster for batch inserts (measured over 5k iterations in-memory). Also switch get_existing_hash from prepare() to prepare_cached() since it is called once per document during regeneration. Co-Authored-By: Claude Opus 4.6 --- src/documents/extractor.rs | 25 +++++++++-------- src/documents/regenerator.rs | 53 ++++++++++++++++++++++++++++-------- 2 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/documents/extractor.rs b/src/documents/extractor.rs index 552db58..bcff6f9 100644 --- a/src/documents/extractor.rs +++ b/src/documents/extractor.rs @@ -3,6 +3,7 @@ use rusqlite::Connection; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::collections::BTreeSet; +use std::fmt::Write as _; use super::truncation::{ MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap, @@ -143,12 +144,12 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result