style: Apply cargo fmt and clippy fixes across codebase

Automated formatting and lint corrections from parallel agent work: - cargo fmt: import reordering (alphabetical), line wrapping to respect max width, trailing comma normalization, destructuring alignment, function signature reformatting, match arm formatting - clippy (pedantic): Range::contains() instead of manual comparisons, i64::from() instead of `as i64` casts, .clamp() instead of .max().min() chains, let-chain refactors (if-let with &&), #[allow(clippy::too_many_arguments)] and #[allow(clippy::field_reassign_with_default)] where warranted - Removed trailing blank lines and extra whitespace No behavioral changes. All existing tests pass unmodified. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 13:01:59 -05:00
parent ff94f24702
commit a50fc78823
42 changed files with 1431 additions and 623 deletions
--- a/src/embedding/chunking.rs
+++ b/src/embedding/chunking.rs
@@ -78,7 +78,9 @@ pub fn split_into_chunks(content: &str) -> Vec<(usize, String)> {
 fn find_paragraph_break(window: &str) -> Option<usize> {
    // Search backward from 2/3 of the way through to find a good split
    let search_start = window.len() * 2 / 3;
-    window[search_start..].rfind("\n\n").map(|pos| search_start + pos + 2)
+    window[search_start..]
+        .rfind("\n\n")
+        .map(|pos| search_start + pos + 2)
        .or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2))
 }

@@ -102,7 +104,9 @@ fn find_sentence_break(window: &str) -> Option<usize> {
 /// Find the last word boundary (space) in the window.
 fn find_word_break(window: &str) -> Option<usize> {
    let search_start = window.len() / 2;
-    window[search_start..].rfind(' ').map(|pos| search_start + pos + 1)
+    window[search_start..]
+        .rfind(' ')
+        .map(|pos| search_start + pos + 1)
        .or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1))
 }

@@ -155,7 +159,11 @@ mod tests {
        }

        let chunks = split_into_chunks(&content);
-        assert!(chunks.len() >= 2, "Expected multiple chunks, got {}", chunks.len());
+        assert!(
+            chunks.len() >= 2,
+            "Expected multiple chunks, got {}",
+            chunks.len()
+        );

        // Verify indices are sequential
        for (i, (idx, _)) in chunks.iter().enumerate() {
@@ -183,7 +191,8 @@ mod tests {
            let end_of_first = &chunks[0].1;
            let start_of_second = &chunks[1].1;
            // The end of first chunk should overlap with start of second
-            let overlap_region = &end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..];
+            let overlap_region =
+                &end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..];
            assert!(
                start_of_second.starts_with(overlap_region)
                    || overlap_region.contains(&start_of_second[..100.min(start_of_second.len())]),
--- a/src/embedding/mod.rs
+++ b/src/embedding/mod.rs
@@ -4,6 +4,6 @@ pub mod chunking;
 pub mod ollama;
 pub mod pipeline;

-pub use change_detector::{count_pending_documents, find_pending_documents, PendingDocument};
-pub use chunking::{split_into_chunks, CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS};
-pub use pipeline::{embed_documents, EmbedResult};
+pub use change_detector::{PendingDocument, count_pending_documents, find_pending_documents};
+pub use chunking::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks};
+pub use pipeline::{EmbedResult, embed_documents};
--- a/src/embedding/ollama.rs
+++ b/src/embedding/ollama.rs
@@ -67,15 +67,15 @@ impl OllamaClient {
    pub async fn health_check(&self) -> Result<()> {
        let url = format!("{}/api/tags", self.config.base_url);

-        let response = self
-            .client
-            .get(&url)
-            .send()
-            .await
-            .map_err(|e| LoreError::OllamaUnavailable {
-                base_url: self.config.base_url.clone(),
-                source: Some(e),
-            })?;
+        let response =
+            self.client
+                .get(&url)
+                .send()
+                .await
+                .map_err(|e| LoreError::OllamaUnavailable {
+                    base_url: self.config.base_url.clone(),
+                    source: Some(e),
+                })?;

        let tags: TagsResponse =
            response
@@ -111,12 +111,16 @@ impl OllamaClient {
            input: texts,
        };

-        let response = self.client.post(&url).json(&request).send().await.map_err(
-            |e| LoreError::OllamaUnavailable {
+        let response = self
+            .client
+            .post(&url)
+            .json(&request)
+            .send()
+            .await
+            .map_err(|e| LoreError::OllamaUnavailable {
                base_url: self.config.base_url.clone(),
                source: Some(e),
-            },
-        )?;
+            })?;

        let status = response.status();
        if !status.is_success() {
--- a/src/embedding/pipeline.rs
+++ b/src/embedding/pipeline.rs
@@ -8,8 +8,8 @@ use tracing::{info, warn};

 use crate::core::error::Result;
 use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
-use crate::embedding::chunk_ids::{encode_rowid, CHUNK_ROWID_MULTIPLIER};
-use crate::embedding::chunking::{split_into_chunks, CHUNK_MAX_BYTES, EXPECTED_DIMS};
+use crate::embedding::chunk_ids::{CHUNK_ROWID_MULTIPLIER, encode_rowid};
+use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS, split_into_chunks};
 use crate::embedding::ollama::OllamaClient;

 const BATCH_SIZE: usize = 32;
@@ -211,11 +211,14 @@ pub async fn embed_documents(
                        || (err_lower.contains("413") && err_lower.contains("http"));

                    if is_context_error && batch.len() > 1 {
-                        warn!("Batch failed with context length error, retrying chunks individually");
+                        warn!(
+                            "Batch failed with context length error, retrying chunks individually"
+                        );
                        for chunk in batch {
                            match client.embed_batch(vec![chunk.text.clone()]).await {
-                                Ok(embeddings) if !embeddings.is_empty()
-                                    && embeddings[0].len() == EXPECTED_DIMS =>
+                                Ok(embeddings)
+                                    if !embeddings.is_empty()
+                                        && embeddings[0].len() == EXPECTED_DIMS =>
                                {
                                    // Clear old embeddings on first successful chunk
                                    if !cleared_docs.contains(&chunk.doc_id) {
@@ -272,7 +275,6 @@ pub async fn embed_documents(
                    }
                }
            }
-
        }

        // Fire progress for all normal documents after embedding completes.
@@ -314,6 +316,7 @@ fn clear_document_embeddings(conn: &Connection, document_id: i64) -> Result<()>
 }

 /// Store an embedding vector and its metadata.
+#[allow(clippy::too_many_arguments)]
 fn store_embedding(
    conn: &Connection,
    doc_id: i64,
@@ -347,8 +350,15 @@ fn store_embedding(
          created_at, attempt_count, last_error, chunk_max_bytes, chunk_count)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1, NULL, ?8, ?9)",
        rusqlite::params![
-            doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64,
-            doc_hash, chunk_hash, now, CHUNK_MAX_BYTES as i64, chunk_count
+            doc_id,
+            chunk_index as i64,
+            model_name,
+            EXPECTED_DIMS as i64,
+            doc_hash,
+            chunk_hash,
+            now,
+            CHUNK_MAX_BYTES as i64,
+            chunk_count
        ],
    )?;

@@ -377,8 +387,15 @@ fn record_embedding_error(
           last_attempt_at = ?7,
           chunk_max_bytes = ?9",
        rusqlite::params![
-            doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64,
-            doc_hash, chunk_hash, now, error, CHUNK_MAX_BYTES as i64
+            doc_id,
+            chunk_index as i64,
+            model_name,
+            EXPECTED_DIMS as i64,
+            doc_hash,
+            chunk_hash,
+            now,
+            error,
+            CHUNK_MAX_BYTES as i64
        ],
    )?;
    Ok(())