From 2bfa4f1f8c3ac454426876768c9e2dd7760266d8 Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Fri, 6 Feb 2026 22:42:26 -0500 Subject: [PATCH] perf(documents): eliminate redundant hash query in regeneration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The document regenerator was making two queries per document: 1. get_existing_hash() — SELECT content_hash 2. upsert_document_inner() — SELECT id, content_hash, labels_hash, paths_hash Query 2 already returns the content_hash needed for change detection. Remove get_existing_hash() entirely and compute content_changed inside upsert_document_inner() from the existing row data. upsert_document_inner now returns Result (true = content changed) which propagates up through upsert_document and regenerate_one, replacing the separate pre-check. The triple-hash fast-path (all three hashes match → return Ok(false) with no writes) is preserved. This halves the query count for unchanged documents, which dominate incremental syncs. Co-Authored-By: Claude Opus 4.6 --- src/documents/regenerator.rs | 42 +++++++++++------------------------- 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/src/documents/regenerator.rs b/src/documents/regenerator.rs index f43e15d..2a71749 100644 --- a/src/documents/regenerator.rs +++ b/src/documents/regenerator.rs @@ -95,38 +95,15 @@ fn regenerate_one(conn: &Connection, source_type: SourceType, source_id: i64) -> return Ok(true); }; - let existing_hash = get_existing_hash(conn, source_type, source_id)?; - let changed = existing_hash.as_ref() != Some(&doc.content_hash); - - upsert_document(conn, &doc)?; - - Ok(changed) + upsert_document(conn, &doc) } -fn get_existing_hash( - conn: &Connection, - source_type: SourceType, - source_id: i64, -) -> Result> { - let mut stmt = conn.prepare_cached( - "SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2", - )?; - - let hash: Option = stmt - .query_row(rusqlite::params![source_type.as_str(), source_id], |row| { - row.get(0) - }) - .optional()?; - - Ok(hash) -} - -fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> { +fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result { conn.execute_batch("SAVEPOINT upsert_doc")?; match upsert_document_inner(conn, doc) { - Ok(()) => { + Ok(changed) => { conn.execute_batch("RELEASE upsert_doc")?; - Ok(()) + Ok(changed) } Err(e) => { let _ = conn.execute_batch("ROLLBACK TO upsert_doc; RELEASE upsert_doc"); @@ -135,7 +112,7 @@ fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> { } } -fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> { +fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result { let existing: Option<(i64, String, String, String)> = conn .query_row( "SELECT id, content_hash, labels_hash, paths_hash FROM documents @@ -145,12 +122,17 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> { ) .optional()?; + let content_changed = match &existing { + Some((_, old_content_hash, _, _)) => old_content_hash != &doc.content_hash, + None => true, + }; + if let Some((_, ref old_content_hash, ref old_labels_hash, ref old_paths_hash)) = existing && old_content_hash == &doc.content_hash && old_labels_hash == &doc.labels_hash && old_paths_hash == &doc.paths_hash { - return Ok(()); + return Ok(false); } let labels_json = serde_json::to_string(&doc.labels).unwrap_or_else(|_| "[]".to_string()); @@ -260,7 +242,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> { } } - Ok(()) + Ok(content_changed) } fn delete_document(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {