From 45126f04a6d00204645c00ba62082723bade1a8c Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Mon, 9 Feb 2026 10:16:14 -0500 Subject: [PATCH] fix: document upsert project_id, truncation budget, and Ollama model matching - regenerator: Include project_id in the ON CONFLICT UPDATE clause for document upserts. Previously, if a document moved between projects (e.g., during re-ingestion), the project_id would remain stale. - truncation: Compute the omission marker ("N notes omitted") before checking whether first+last notes fit in the budget. The old order computed the marker after the budget check, meaning the marker's byte cost was unaccounted for and could cause over-budget output. - ollama: Tighten model name matching to require either an exact match or a colon-delimited tag prefix (model == name or name starts with "model:"). The prior starts_with check would false-positive on "nomic-embed-text-v2" when looking for "nomic-embed-text". Tests updated to cover exact match, tagged, wrong model, and prefix false-positive cases. Co-Authored-By: Claude Opus 4.6 --- src/documents/regenerator.rs | 1 + src/documents/truncation.rs | 6 +++--- src/embedding/ollama.rs | 36 +++++++++++++++++++++++++++--------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/documents/regenerator.rs b/src/documents/regenerator.rs index 2a71749..c19c8d1 100644 --- a/src/documents/regenerator.rs +++ b/src/documents/regenerator.rs @@ -145,6 +145,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result is_truncated, truncated_reason) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15) ON CONFLICT(source_type, source_id) DO UPDATE SET + project_id = excluded.project_id, author_username = excluded.author_username, label_names = excluded.label_names, labels_hash = excluded.labels_hash, diff --git a/src/documents/truncation.rs b/src/documents/truncation.rs index 5c08359..255f232 100644 --- a/src/documents/truncation.rs +++ b/src/documents/truncation.rs @@ -110,7 +110,9 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio } let first_note = &formatted[0]; - if first_note.len() + last_note.len() > max_bytes { + let omitted = formatted.len() - 2; + let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted); + if first_note.len() + marker.len() + last_note.len() > max_bytes { let truncated = truncate_utf8(first_note, max_bytes.saturating_sub(11)); let content = format!("{}[truncated]", truncated); return TruncationResult { @@ -120,8 +122,6 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio }; } - let omitted = formatted.len() - 2; - let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted); let content = format!("{}{}{}", formatted[0], marker, last_note); TruncationResult { content, diff --git a/src/embedding/ollama.rs b/src/embedding/ollama.rs index 8f2eafb..daa0903 100644 --- a/src/embedding/ollama.rs +++ b/src/embedding/ollama.rs @@ -87,10 +87,9 @@ impl OllamaClient { source: Some(e), })?; - let model_found = tags - .models - .iter() - .any(|m| m.name.starts_with(&self.config.model)); + let model_found = tags.models.iter().any(|m| { + m.name == self.config.model || m.name.starts_with(&format!("{}:", self.config.model)) + }); if !model_found { return Err(LoreError::OllamaModelNotFound { @@ -169,13 +168,32 @@ mod tests { } #[test] - fn test_health_check_model_starts_with() { + fn test_health_check_model_matching() { let model = "nomic-embed-text"; - let tag_name = "nomic-embed-text:latest"; - assert!(tag_name.starts_with(model)); - let wrong_model = "llama2"; - assert!(!tag_name.starts_with(wrong_model)); + let tag_name = "nomic-embed-text:latest"; + assert!( + tag_name == model || tag_name.starts_with(&format!("{model}:")), + "should match model with tag" + ); + + let exact_name = "nomic-embed-text"; + assert!( + exact_name == model || exact_name.starts_with(&format!("{model}:")), + "should match exact model name" + ); + + let wrong_model = "llama2:latest"; + assert!( + !(wrong_model == model || wrong_model.starts_with(&format!("{model}:"))), + "should not match wrong model" + ); + + let similar_model = "nomic-embed-text-v2:latest"; + assert!( + !(similar_model == model || similar_model.starts_with(&format!("{model}:"))), + "should not false-positive on model name prefix" + ); } #[test]