fix: document upsert project_id, truncation budget, and Ollama model matching
- regenerator: Include project_id in the ON CONFLICT UPDATE clause for
document upserts. Previously, if a document moved between projects
(e.g., during re-ingestion), the project_id would remain stale.
- truncation: Compute the omission marker ("N notes omitted") before
checking whether first+last notes fit in the budget. The old order
computed the marker after the budget check, meaning the marker's byte
cost was unaccounted for and could cause over-budget output.
- ollama: Tighten model name matching to require either an exact match
or a colon-delimited tag prefix (model == name or name starts with
"model:"). The prior starts_with check would false-positive on
"nomic-embed-text-v2" when looking for "nomic-embed-text". Tests
updated to cover exact match, tagged, wrong model, and prefix
false-positive cases.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -145,6 +145,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
|
||||
is_truncated, truncated_reason)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)
|
||||
ON CONFLICT(source_type, source_id) DO UPDATE SET
|
||||
project_id = excluded.project_id,
|
||||
author_username = excluded.author_username,
|
||||
label_names = excluded.label_names,
|
||||
labels_hash = excluded.labels_hash,
|
||||
|
||||
@@ -110,7 +110,9 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio
|
||||
}
|
||||
|
||||
let first_note = &formatted[0];
|
||||
if first_note.len() + last_note.len() > max_bytes {
|
||||
let omitted = formatted.len() - 2;
|
||||
let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted);
|
||||
if first_note.len() + marker.len() + last_note.len() > max_bytes {
|
||||
let truncated = truncate_utf8(first_note, max_bytes.saturating_sub(11));
|
||||
let content = format!("{}[truncated]", truncated);
|
||||
return TruncationResult {
|
||||
@@ -120,8 +122,6 @@ pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> Truncatio
|
||||
};
|
||||
}
|
||||
|
||||
let omitted = formatted.len() - 2;
|
||||
let marker = format!("\n\n[... {} notes omitted for length ...]\n\n", omitted);
|
||||
let content = format!("{}{}{}", formatted[0], marker, last_note);
|
||||
TruncationResult {
|
||||
content,
|
||||
|
||||
@@ -87,10 +87,9 @@ impl OllamaClient {
|
||||
source: Some(e),
|
||||
})?;
|
||||
|
||||
let model_found = tags
|
||||
.models
|
||||
.iter()
|
||||
.any(|m| m.name.starts_with(&self.config.model));
|
||||
let model_found = tags.models.iter().any(|m| {
|
||||
m.name == self.config.model || m.name.starts_with(&format!("{}:", self.config.model))
|
||||
});
|
||||
|
||||
if !model_found {
|
||||
return Err(LoreError::OllamaModelNotFound {
|
||||
@@ -169,13 +168,32 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_health_check_model_starts_with() {
|
||||
fn test_health_check_model_matching() {
|
||||
let model = "nomic-embed-text";
|
||||
let tag_name = "nomic-embed-text:latest";
|
||||
assert!(tag_name.starts_with(model));
|
||||
|
||||
let wrong_model = "llama2";
|
||||
assert!(!tag_name.starts_with(wrong_model));
|
||||
let tag_name = "nomic-embed-text:latest";
|
||||
assert!(
|
||||
tag_name == model || tag_name.starts_with(&format!("{model}:")),
|
||||
"should match model with tag"
|
||||
);
|
||||
|
||||
let exact_name = "nomic-embed-text";
|
||||
assert!(
|
||||
exact_name == model || exact_name.starts_with(&format!("{model}:")),
|
||||
"should match exact model name"
|
||||
);
|
||||
|
||||
let wrong_model = "llama2:latest";
|
||||
assert!(
|
||||
!(wrong_model == model || wrong_model.starts_with(&format!("{model}:"))),
|
||||
"should not match wrong model"
|
||||
);
|
||||
|
||||
let similar_model = "nomic-embed-text-v2:latest";
|
||||
assert!(
|
||||
!(similar_model == model || similar_model.starts_with(&format!("{model}:"))),
|
||||
"should not false-positive on model name prefix"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user