perf: eliminate unnecessary clones and pre-allocate collections

Three micro-optimizations with zero behavioral change: 1. timeline_collect.rs: Reorder format!() before enum construction so the owned String moves into the variant directly, eliminating .clone() on state, label, and milestone strings in StateChanged, LabelAdded/Removed, and MilestoneSet/Removed event paths. 2. pipeline.rs: Use Arc<str> for doc_hash shared across a document's chunks instead of cloning the full String per chunk. Also remove redundant embed_buf.reserve() since extend_from_slice already handles growth and the buffer is reused across iterations. 3. rrf.rs: Pre-allocate HashMap with combined vector+fts result count via with_capacity() to avoid rehashing during RRF score accumulation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 08:08:14 -05:00
parent cc11d3e5a0
commit 435a208c93
3 changed files with 25 additions and 32 deletions
--- a/src/embedding/pipeline.rs
+++ b/src/embedding/pipeline.rs
@@ -1,4 +1,5 @@
 use std::collections::{HashMap, HashSet};
+use std::sync::Arc;

 use futures::future::join_all;
 use rusqlite::Connection;
@@ -28,7 +29,7 @@ struct ChunkWork {
    doc_id: i64,
    chunk_index: usize,
    total_chunks: usize,
-    doc_hash: String,
+    doc_hash: Arc<str>,
    chunk_hash: String,
    text: String,
 }
@@ -212,12 +213,13 @@ async fn embed_page(

        chunks_needed.insert(doc.document_id, total_chunks);

+        let doc_hash: Arc<str> = Arc::from(doc.content_hash.as_str());
        for (chunk_index, text) in chunks {
            all_chunks.push(ChunkWork {
                doc_id: doc.document_id,
                chunk_index,
                total_chunks,
-                doc_hash: doc.content_hash.clone(),
+                doc_hash: Arc::clone(&doc_hash),
                chunk_hash: sha256_hash(&text),
                text,
            });
@@ -501,7 +503,6 @@ fn store_embedding(
    let rowid = encode_rowid(doc_id, chunk_index as i64);

    embed_buf.clear();
-    embed_buf.reserve(embedding.len() * 4);
    for f in embedding {
        embed_buf.extend_from_slice(&f.to_le_bytes());
    }