perf(documents): eliminate redundant hash query in regeneration
The document regenerator was making two queries per document: 1. get_existing_hash() — SELECT content_hash 2. upsert_document_inner() — SELECT id, content_hash, labels_hash, paths_hash Query 2 already returns the content_hash needed for change detection. Remove get_existing_hash() entirely and compute content_changed inside upsert_document_inner() from the existing row data. upsert_document_inner now returns Result<bool> (true = content changed) which propagates up through upsert_document and regenerate_one, replacing the separate pre-check. The triple-hash fast-path (all three hashes match → return Ok(false) with no writes) is preserved. This halves the query count for unchanged documents, which dominate incremental syncs. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -95,38 +95,15 @@ fn regenerate_one(conn: &Connection, source_type: SourceType, source_id: i64) ->
|
||||
return Ok(true);
|
||||
};
|
||||
|
||||
let existing_hash = get_existing_hash(conn, source_type, source_id)?;
|
||||
let changed = existing_hash.as_ref() != Some(&doc.content_hash);
|
||||
|
||||
upsert_document(conn, &doc)?;
|
||||
|
||||
Ok(changed)
|
||||
upsert_document(conn, &doc)
|
||||
}
|
||||
|
||||
fn get_existing_hash(
|
||||
conn: &Connection,
|
||||
source_type: SourceType,
|
||||
source_id: i64,
|
||||
) -> Result<Option<String>> {
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2",
|
||||
)?;
|
||||
|
||||
let hash: Option<String> = stmt
|
||||
.query_row(rusqlite::params![source_type.as_str(), source_id], |row| {
|
||||
row.get(0)
|
||||
})
|
||||
.optional()?;
|
||||
|
||||
Ok(hash)
|
||||
}
|
||||
|
||||
fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> {
|
||||
fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<bool> {
|
||||
conn.execute_batch("SAVEPOINT upsert_doc")?;
|
||||
match upsert_document_inner(conn, doc) {
|
||||
Ok(()) => {
|
||||
Ok(changed) => {
|
||||
conn.execute_batch("RELEASE upsert_doc")?;
|
||||
Ok(())
|
||||
Ok(changed)
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = conn.execute_batch("ROLLBACK TO upsert_doc; RELEASE upsert_doc");
|
||||
@@ -135,7 +112,7 @@ fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
|
||||
fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool> {
|
||||
let existing: Option<(i64, String, String, String)> = conn
|
||||
.query_row(
|
||||
"SELECT id, content_hash, labels_hash, paths_hash FROM documents
|
||||
@@ -145,12 +122,17 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
|
||||
)
|
||||
.optional()?;
|
||||
|
||||
let content_changed = match &existing {
|
||||
Some((_, old_content_hash, _, _)) => old_content_hash != &doc.content_hash,
|
||||
None => true,
|
||||
};
|
||||
|
||||
if let Some((_, ref old_content_hash, ref old_labels_hash, ref old_paths_hash)) = existing
|
||||
&& old_content_hash == &doc.content_hash
|
||||
&& old_labels_hash == &doc.labels_hash
|
||||
&& old_paths_hash == &doc.paths_hash
|
||||
{
|
||||
return Ok(());
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let labels_json = serde_json::to_string(&doc.labels).unwrap_or_else(|_| "[]".to_string());
|
||||
@@ -260,7 +242,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(content_changed)
|
||||
}
|
||||
|
||||
fn delete_document(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user