fix: document upsert project_id, truncation budget, and Ollama model matching

- regenerator: Include project_id in the ON CONFLICT UPDATE clause for
  document upserts. Previously, if a document moved between projects
  (e.g., during re-ingestion), the project_id would remain stale.

- truncation: Compute the omission marker ("N notes omitted") before
  checking whether first+last notes fit in the budget. The old order
  computed the marker after the budget check, meaning the marker's byte
  cost was unaccounted for and could cause over-budget output.

- ollama: Tighten model name matching to require either an exact match
  or a colon-delimited tag prefix (model == name or name starts with
  "model:"). The prior starts_with check would false-positive on
  "nomic-embed-text-v2" when looking for "nomic-embed-text". Tests
  updated to cover exact match, tagged, wrong model, and prefix
  false-positive cases.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-09 10:16:14 -05:00
parent dfa44e5bcd
commit 45126f04a6
3 changed files with 31 additions and 12 deletions

View File

@@ -145,6 +145,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
is_truncated, truncated_reason) is_truncated, truncated_reason)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)
ON CONFLICT(source_type, source_id) DO UPDATE SET ON CONFLICT(source_type, source_id) DO UPDATE SET
project_id = excluded.project_id,
author_username = excluded.author_username, author_username = excluded.author_username,
label_names = excluded.label_names, label_names = excluded.label_names,
labels_hash = excluded.labels_hash, labels_hash = excluded.labels_hash,

View File

@@ -110,7 +110,9 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio
} }
let first_note = &formatted[0]; let first_note = &formatted[0];
if first_note.len() + last_note.len() > max_bytes { let omitted = formatted.len() - 2;
let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted);
if first_note.len() + marker.len() + last_note.len() > max_bytes {
let truncated = truncate_utf8(first_note, max_bytes.saturating_sub(11)); let truncated = truncate_utf8(first_note, max_bytes.saturating_sub(11));
let content = format!("{}[truncated]", truncated); let content = format!("{}[truncated]", truncated);
return TruncationResult { return TruncationResult {
@@ -120,8 +122,6 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio
}; };
} }
let omitted = formatted.len() - 2;
let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted);
let content = format!("{}{}{}", formatted[0], marker, last_note); let content = format!("{}{}{}", formatted[0], marker, last_note);
TruncationResult { TruncationResult {
content, content,

View File

@@ -87,10 +87,9 @@ impl OllamaClient {
source: Some(e), source: Some(e),
})?; })?;
let model_found = tags let model_found = tags.models.iter().any(|m| {
.models m.name == self.config.model || m.name.starts_with(&format!("{}:", self.config.model))
.iter() });
.any(|m| m.name.starts_with(&self.config.model));
if !model_found { if !model_found {
return Err(LoreError::OllamaModelNotFound { return Err(LoreError::OllamaModelNotFound {
@@ -169,13 +168,32 @@ mod tests {
} }
#[test] #[test]
fn test_health_check_model_starts_with() { fn test_health_check_model_matching() {
let model = "nomic-embed-text"; let model = "nomic-embed-text";
let tag_name = "nomic-embed-text:latest";
assert!(tag_name.starts_with(model));
let wrong_model = "llama2"; let tag_name = "nomic-embed-text:latest";
assert!(!tag_name.starts_with(wrong_model)); assert!(
tag_name == model || tag_name.starts_with(&format!("{model}:")),
"should match model with tag"
);
let exact_name = "nomic-embed-text";
assert!(
exact_name == model || exact_name.starts_with(&format!("{model}:")),
"should match exact model name"
);
let wrong_model = "llama2:latest";
assert!(
!(wrong_model == model || wrong_model.starts_with(&format!("{model}:"))),
"should not match wrong model"
);
let similar_model = "nomic-embed-text-v2:latest";
assert!(
!(similar_model == model || similar_model.starts_with(&format!("{model}:"))),
"should not false-positive on model name prefix"
);
} }
#[test] #[test]