fix: document upsert project_id, truncation budget, and Ollama model matching

- regenerator: Include project_id in the ON CONFLICT UPDATE clause for document upserts. Previously, if a document moved between projects (e.g., during re-ingestion), the project_id would remain stale. - truncation: Compute the omission marker ("N notes omitted") before checking whether first+last notes fit in the budget. The old order computed the marker after the budget check, meaning the marker's byte cost was unaccounted for and could cause over-budget output. - ollama: Tighten model name matching to require either an exact match or a colon-delimited tag prefix (model == name or name starts with "model:"). The prior starts_with check would false-positive on "nomic-embed-text-v2" when looking for "nomic-embed-text". Tests updated to cover exact match, tagged, wrong model, and prefix false-positive cases. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 10:16:14 -05:00
parent dfa44e5bcd
commit 45126f04a6
3 changed files with 31 additions and 12 deletions
--- a/src/documents/regenerator.rs
+++ b/src/documents/regenerator.rs
@@ -145,6 +145,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
          is_truncated, truncated_reason)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)
         ON CONFLICT(source_type, source_id) DO UPDATE SET
           project_id = excluded.project_id,
           author_username = excluded.author_username,
           label_names = excluded.label_names,
           labels_hash = excluded.labels_hash,
--- a/src/documents/truncation.rs
+++ b/src/documents/truncation.rs
@@ -110,7 +110,9 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio
    }
    let first_note = &formatted[0];
-    if first_note.len() + last_note.len() > max_bytes {
+    let omitted = formatted.len() - 2;
    let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted);
    if first_note.len() + marker.len() + last_note.len() > max_bytes {
        let truncated = truncate_utf8(first_note, max_bytes.saturating_sub(11));
        let content = format!("{}[truncated]", truncated);
        return TruncationResult {
@@ -120,8 +122,6 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio
        };
    }
    let omitted = formatted.len() - 2;
    let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted);
    let content = format!("{}{}{}", formatted[0], marker, last_note);
    TruncationResult {
        content,
--- a/src/embedding/ollama.rs
+++ b/src/embedding/ollama.rs
@@ -87,10 +87,9 @@ impl OllamaClient {
                    source: Some(e),
                })?;
-        let model_found = tags
+        let model_found = tags.models.iter().any(|m| {
-            .models
+            m.name == self.config.model || m.name.starts_with(&format!("{}:", self.config.model))
-            .iter()
+        });
            .any(|m| m.name.starts_with(&self.config.model));
        if !model_found {
            return Err(LoreError::OllamaModelNotFound {
@@ -169,13 +168,32 @@ mod tests {
    }
    #[test]
-    fn test_health_check_model_starts_with() {
+    fn test_health_check_model_matching() {
        let model = "nomic-embed-text";
        let tag_name = "nomic-embed-text:latest";
        assert!(tag_name.starts_with(model));
-        let wrong_model = "llama2";
+        let tag_name = "nomic-embed-text:latest";
-        assert!(!tag_name.starts_with(wrong_model));
+        assert!(
            tag_name == model || tag_name.starts_with(&format!("{model}:")),
            "should match model with tag"
        );
        let exact_name = "nomic-embed-text";
        assert!(
            exact_name == model || exact_name.starts_with(&format!("{model}:")),
            "should match exact model name"
        );
        let wrong_model = "llama2:latest";
        assert!(
            !(wrong_model == model || wrong_model.starts_with(&format!("{model}:"))),
            "should not match wrong model"
        );
        let similar_model = "nomic-embed-text-v2:latest";
        assert!(
            !(similar_model == model || similar_model.starts_with(&format!("{model}:"))),
            "should not false-positive on model name prefix"
        );
    }
    #[test]