perf: eliminate unnecessary clones and pre-allocate collections

Three micro-optimizations with zero behavioral change: 1. timeline_collect.rs: Reorder format!() before enum construction so the owned String moves into the variant directly, eliminating .clone() on state, label, and milestone strings in StateChanged, LabelAdded/Removed, and MilestoneSet/Removed event paths. 2. pipeline.rs: Use Arc<str> for doc_hash shared across a document's chunks instead of cloning the full String per chunk. Also remove redundant embed_buf.reserve() since extend_from_slice already handles growth and the buffer is reused across iterations. 3. rrf.rs: Pre-allocate HashMap with combined vector+fts result count via with_capacity() to avoid rehashing during RRF score accumulation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 08:08:14 -05:00
parent cc11d3e5a0
commit 435a208c93
3 changed files with 25 additions and 32 deletions
--- a/src/core/timeline_collect.rs
+++ b/src/core/timeline_collect.rs
@@ -146,16 +146,15 @@ fn collect_state_events(
            continue;
        }

+        let summary = format!("State changed to {state}");
        events.push(TimelineEvent {
            timestamp: created_at,
            entity_type: entity.entity_type.clone(),
            entity_id: entity.entity_id,
            entity_iid: entity.entity_iid,
            project_path: entity.project_path.clone(),
-            event_type: TimelineEventType::StateChanged {
-                state: state.clone(),
-            },
-            summary: format!("State changed to {state}"),
+            event_type: TimelineEventType::StateChanged { state },
+            summary,
            actor,
            url: None,
            is_seed,
@@ -195,18 +194,14 @@ fn collect_label_events(
        let label = label_name.unwrap_or_else(|| "[deleted label]".to_owned());

        let (event_type, summary) = match action.as_str() {
-            "add" => (
-                TimelineEventType::LabelAdded {
-                    label: label.clone(),
-                },
-                format!("Label added: {label}"),
-            ),
-            "remove" => (
-                TimelineEventType::LabelRemoved {
-                    label: label.clone(),
-                },
-                format!("Label removed: {label}"),
-            ),
+            "add" => {
+                let summary = format!("Label added: {label}");
+                (TimelineEventType::LabelAdded { label }, summary)
+            }
+            "remove" => {
+                let summary = format!("Label removed: {label}");
+                (TimelineEventType::LabelRemoved { label }, summary)
+            }
            _ => continue,
        };

@@ -257,18 +252,14 @@ fn collect_milestone_events(
        let milestone = milestone_title.unwrap_or_else(|| "[deleted milestone]".to_owned());

        let (event_type, summary) = match action.as_str() {
-            "add" => (
-                TimelineEventType::MilestoneSet {
-                    milestone: milestone.clone(),
-                },
-                format!("Milestone set: {milestone}"),
-            ),
-            "remove" => (
-                TimelineEventType::MilestoneRemoved {
-                    milestone: milestone.clone(),
-                },
-                format!("Milestone removed: {milestone}"),
-            ),
+            "add" => {
+                let summary = format!("Milestone set: {milestone}");
+                (TimelineEventType::MilestoneSet { milestone }, summary)
+            }
+            "remove" => {
+                let summary = format!("Milestone removed: {milestone}");
+                (TimelineEventType::MilestoneRemoved { milestone }, summary)
+            }
            _ => continue,
        };

--- a/src/embedding/pipeline.rs
+++ b/src/embedding/pipeline.rs
@@ -1,4 +1,5 @@
 use std::collections::{HashMap, HashSet};
+use std::sync::Arc;

 use futures::future::join_all;
 use rusqlite::Connection;
@@ -28,7 +29,7 @@ struct ChunkWork {
    doc_id: i64,
    chunk_index: usize,
    total_chunks: usize,
-    doc_hash: String,
+    doc_hash: Arc<str>,
    chunk_hash: String,
    text: String,
 }
@@ -212,12 +213,13 @@ async fn embed_page(

        chunks_needed.insert(doc.document_id, total_chunks);

+        let doc_hash: Arc<str> = Arc::from(doc.content_hash.as_str());
        for (chunk_index, text) in chunks {
            all_chunks.push(ChunkWork {
                doc_id: doc.document_id,
                chunk_index,
                total_chunks,
-                doc_hash: doc.content_hash.clone(),
+                doc_hash: Arc::clone(&doc_hash),
                chunk_hash: sha256_hash(&text),
                text,
            });
@@ -501,7 +503,6 @@ fn store_embedding(
    let rowid = encode_rowid(doc_id, chunk_index as i64);

    embed_buf.clear();
-    embed_buf.reserve(embedding.len() * 4);
    for f in embedding {
        embed_buf.extend_from_slice(&f.to_le_bytes());
    }
--- a/src/search/rrf.rs
+++ b/src/search/rrf.rs
@@ -15,7 +15,8 @@ pub fn rank_rrf(vector_results: &[(i64, f64)], fts_results: &[(i64, f64)]) -> Ve
        return Vec::new();
    }

-    let mut scores: HashMap<i64, (f64, Option<usize>, Option<usize>)> = HashMap::new();
+    let mut scores: HashMap<i64, (f64, Option<usize>, Option<usize>)> =
+        HashMap::with_capacity(vector_results.len() + fts_results.len());

    for (i, &(doc_id, _)) in vector_results.iter().enumerate() {
        let rank = i + 1;