From 45126f04a6d00204645c00ba62082723bade1a8c Mon Sep 17 00:00:00 2001
From: Taylor Eernisse <teernisse@visiostack.com>
Date: Mon, 9 Feb 2026 10:16:14 -0500
Subject: [PATCH] fix: document upsert project_id, truncation budget, and
 Ollama model matching

- regenerator: Include project_id in the ON CONFLICT UPDATE clause for
  document upserts. Previously, if a document moved between projects
  (e.g., during re-ingestion), the project_id would remain stale.

- truncation: Compute the omission marker ("N notes omitted") before
  checking whether first+last notes fit in the budget. The old order
  computed the marker after the budget check, meaning the marker's byte
  cost was unaccounted for and could cause over-budget output.

- ollama: Tighten model name matching to require either an exact match
  or a colon-delimited tag prefix (model == name or name starts with
  "model:"). The prior starts_with check would false-positive on
  "nomic-embed-text-v2" when looking for "nomic-embed-text". Tests
  updated to cover exact match, tagged, wrong model, and prefix
  false-positive cases.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/documents/regenerator.rs |  1 +
 src/documents/truncation.rs  |  6 +++---
 src/embedding/ollama.rs      | 36 +++++++++++++++++++++++++++---------
 3 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/src/documents/regenerator.rs b/src/documents/regenerator.rs
index 2a71749..c19c8d1 100644
--- a/src/documents/regenerator.rs
+++ b/src/documents/regenerator.rs
@@ -145,6 +145,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
           is_truncated, truncated_reason)
          VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)
          ON CONFLICT(source_type, source_id) DO UPDATE SET
+           project_id = excluded.project_id,
            author_username = excluded.author_username,
            label_names = excluded.label_names,
            labels_hash = excluded.labels_hash,
diff --git a/src/documents/truncation.rs b/src/documents/truncation.rs
index 5c08359..255f232 100644
--- a/src/documents/truncation.rs
+++ b/src/documents/truncation.rs
@@ -110,7 +110,9 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio
     }
 
     let first_note = &formatted[0];
-    if first_note.len() + last_note.len() > max_bytes {
+    let omitted = formatted.len() - 2;
+    let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted);
+    if first_note.len() + marker.len() + last_note.len() > max_bytes {
         let truncated = truncate_utf8(first_note, max_bytes.saturating_sub(11));
         let content = format!("{}[truncated]", truncated);
         return TruncationResult {
@@ -120,8 +122,6 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio
         };
     }
 
-    let omitted = formatted.len() - 2;
-    let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted);
     let content = format!("{}{}{}", formatted[0], marker, last_note);
     TruncationResult {
         content,
diff --git a/src/embedding/ollama.rs b/src/embedding/ollama.rs
index 8f2eafb..daa0903 100644
--- a/src/embedding/ollama.rs
+++ b/src/embedding/ollama.rs
@@ -87,10 +87,9 @@ impl OllamaClient {
                     source: Some(e),
                 })?;
 
-        let model_found = tags
-            .models
-            .iter()
-            .any(|m| m.name.starts_with(&self.config.model));
+        let model_found = tags.models.iter().any(|m| {
+            m.name == self.config.model || m.name.starts_with(&format!("{}:", self.config.model))
+        });
 
         if !model_found {
             return Err(LoreError::OllamaModelNotFound {
@@ -169,13 +168,32 @@ mod tests {
     }
 
     #[test]
-    fn test_health_check_model_starts_with() {
+    fn test_health_check_model_matching() {
         let model = "nomic-embed-text";
-        let tag_name = "nomic-embed-text:latest";
-        assert!(tag_name.starts_with(model));
 
-        let wrong_model = "llama2";
-        assert!(!tag_name.starts_with(wrong_model));
+        let tag_name = "nomic-embed-text:latest";
+        assert!(
+            tag_name == model || tag_name.starts_with(&format!("{model}:")),
+            "should match model with tag"
+        );
+
+        let exact_name = "nomic-embed-text";
+        assert!(
+            exact_name == model || exact_name.starts_with(&format!("{model}:")),
+            "should match exact model name"
+        );
+
+        let wrong_model = "llama2:latest";
+        assert!(
+            !(wrong_model == model || wrong_model.starts_with(&format!("{model}:"))),
+            "should not match wrong model"
+        );
+
+        let similar_model = "nomic-embed-text-v2:latest";
+        assert!(
+            !(similar_model == model || similar_model.starts_with(&format!("{model}:"))),
+            "should not false-positive on model name prefix"
+        );
     }
 
     #[test]