style: Apply cargo fmt and clippy fixes across codebase

Automated formatting and lint corrections from parallel agent work:

- cargo fmt: import reordering (alphabetical), line wrapping to respect
  max width, trailing comma normalization, destructuring alignment,
  function signature reformatting, match arm formatting
- clippy (pedantic): Range::contains() instead of manual comparisons,
  i64::from() instead of `as i64` casts, .clamp() instead of
  .max().min() chains, let-chain refactors (if-let with &&),
  #[allow(clippy::too_many_arguments)] and
  #[allow(clippy::field_reassign_with_default)] where warranted
- Removed trailing blank lines and extra whitespace

No behavioral changes. All existing tests pass unmodified.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-03 13:01:59 -05:00
parent ff94f24702
commit a50fc78823
42 changed files with 1431 additions and 623 deletions

View File

@@ -78,7 +78,9 @@ pub fn split_into_chunks(content: &str) -> Vec<(usize, String)> {
fn find_paragraph_break(window: &str) -> Option<usize> {
// Search backward from 2/3 of the way through to find a good split
let search_start = window.len() * 2 / 3;
window[search_start..].rfind("\n\n").map(|pos| search_start + pos + 2)
window[search_start..]
.rfind("\n\n")
.map(|pos| search_start + pos + 2)
.or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2))
}
@@ -102,7 +104,9 @@ fn find_sentence_break(window: &str) -> Option<usize> {
/// Find the last word boundary (space) in the window.
fn find_word_break(window: &str) -> Option<usize> {
let search_start = window.len() / 2;
window[search_start..].rfind(' ').map(|pos| search_start + pos + 1)
window[search_start..]
.rfind(' ')
.map(|pos| search_start + pos + 1)
.or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1))
}
@@ -155,7 +159,11 @@ mod tests {
}
let chunks = split_into_chunks(&content);
assert!(chunks.len() >= 2, "Expected multiple chunks, got {}", chunks.len());
assert!(
chunks.len() >= 2,
"Expected multiple chunks, got {}",
chunks.len()
);
// Verify indices are sequential
for (i, (idx, _)) in chunks.iter().enumerate() {
@@ -183,7 +191,8 @@ mod tests {
let end_of_first = &chunks[0].1;
let start_of_second = &chunks[1].1;
// The end of first chunk should overlap with start of second
let overlap_region = &end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..];
let overlap_region =
&end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..];
assert!(
start_of_second.starts_with(overlap_region)
|| overlap_region.contains(&start_of_second[..100.min(start_of_second.len())]),

View File

@@ -4,6 +4,6 @@ pub mod chunking;
pub mod ollama;
pub mod pipeline;
pub use change_detector::{count_pending_documents, find_pending_documents, PendingDocument};
pub use chunking::{split_into_chunks, CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS};
pub use pipeline::{embed_documents, EmbedResult};
pub use change_detector::{PendingDocument, count_pending_documents, find_pending_documents};
pub use chunking::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks};
pub use pipeline::{EmbedResult, embed_documents};

View File

@@ -67,15 +67,15 @@ impl OllamaClient {
pub async fn health_check(&self) -> Result<()> {
let url = format!("{}/api/tags", self.config.base_url);
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| LoreError::OllamaUnavailable {
base_url: self.config.base_url.clone(),
source: Some(e),
})?;
let response =
self.client
.get(&url)
.send()
.await
.map_err(|e| LoreError::OllamaUnavailable {
base_url: self.config.base_url.clone(),
source: Some(e),
})?;
let tags: TagsResponse =
response
@@ -111,12 +111,16 @@ impl OllamaClient {
input: texts,
};
let response = self.client.post(&url).json(&request).send().await.map_err(
|e| LoreError::OllamaUnavailable {
let response = self
.client
.post(&url)
.json(&request)
.send()
.await
.map_err(|e| LoreError::OllamaUnavailable {
base_url: self.config.base_url.clone(),
source: Some(e),
},
)?;
})?;
let status = response.status();
if !status.is_success() {

View File

@@ -8,8 +8,8 @@ use tracing::{info, warn};
use crate::core::error::Result;
use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
use crate::embedding::chunk_ids::{encode_rowid, CHUNK_ROWID_MULTIPLIER};
use crate::embedding::chunking::{split_into_chunks, CHUNK_MAX_BYTES, EXPECTED_DIMS};
use crate::embedding::chunk_ids::{CHUNK_ROWID_MULTIPLIER, encode_rowid};
use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS, split_into_chunks};
use crate::embedding::ollama::OllamaClient;
const BATCH_SIZE: usize = 32;
@@ -211,11 +211,14 @@ pub async fn embed_documents(
|| (err_lower.contains("413") && err_lower.contains("http"));
if is_context_error && batch.len() > 1 {
warn!("Batch failed with context length error, retrying chunks individually");
warn!(
"Batch failed with context length error, retrying chunks individually"
);
for chunk in batch {
match client.embed_batch(vec![chunk.text.clone()]).await {
Ok(embeddings) if !embeddings.is_empty()
&& embeddings[0].len() == EXPECTED_DIMS =>
Ok(embeddings)
if !embeddings.is_empty()
&& embeddings[0].len() == EXPECTED_DIMS =>
{
// Clear old embeddings on first successful chunk
if !cleared_docs.contains(&chunk.doc_id) {
@@ -272,7 +275,6 @@ pub async fn embed_documents(
}
}
}
}
// Fire progress for all normal documents after embedding completes.
@@ -314,6 +316,7 @@ fn clear_document_embeddings(conn: &Connection, document_id: i64) -> Result<()>
}
/// Store an embedding vector and its metadata.
#[allow(clippy::too_many_arguments)]
fn store_embedding(
conn: &Connection,
doc_id: i64,
@@ -347,8 +350,15 @@ fn store_embedding(
created_at, attempt_count, last_error, chunk_max_bytes, chunk_count)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1, NULL, ?8, ?9)",
rusqlite::params![
doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64,
doc_hash, chunk_hash, now, CHUNK_MAX_BYTES as i64, chunk_count
doc_id,
chunk_index as i64,
model_name,
EXPECTED_DIMS as i64,
doc_hash,
chunk_hash,
now,
CHUNK_MAX_BYTES as i64,
chunk_count
],
)?;
@@ -377,8 +387,15 @@ fn record_embedding_error(
last_attempt_at = ?7,
chunk_max_bytes = ?9",
rusqlite::params![
doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64,
doc_hash, chunk_hash, now, error, CHUNK_MAX_BYTES as i64
doc_id,
chunk_index as i64,
model_name,
EXPECTED_DIMS as i64,
doc_hash,
chunk_hash,
now,
error,
CHUNK_MAX_BYTES as i64
],
)?;
Ok(())