fix: Retry loop safety, doctor model matching, regenerator robustness

Three defensive improvements from peer code review:

Replace unreachable!() in GitLab client retry loops:
Both request() and request_with_headers() had unreachable!() after
their for loops. While the logic was sound (the final iteration always
reaches the return/break), any refactor to the loop condition would
turn this into a runtime panic. Restructured both to store
last_response with explicit break, making the control flow
self-documenting and the .expect() message useful if ever violated.

Doctor model name comparison asymmetry:
Ollama model names were stripped of their tag (:latest, :v1.5) for
comparison, but the configured model name was compared as-is. A config
value like "nomic-embed-text:v1.5" would never match. Now strips the
tag from both sides before comparing.

Regenerator savepoint cleanup and progress accuracy:
- upsert_document's error path did ROLLBACK TO but never RELEASE,
  leaving a dangling savepoint that could nest on the next call. Added
  RELEASE after rollback so the connection is clean.
- estimated_total for progress reporting was computed once at start but
  the dirty queue can grow during processing. Now recounts each loop
  iteration with max() so the progress fraction never goes backwards.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-04 14:16:54 -05:00
parent 1fdc6d03cc
commit 925ec9f574
3 changed files with 59 additions and 18 deletions

View File

@@ -418,7 +418,11 @@ async fn check_ollama(config: Option<&Config>) -> OllamaCheck {
.map(|m| m.name.split(':').next().unwrap_or(&m.name)) .map(|m| m.name.split(':').next().unwrap_or(&m.name))
.collect(); .collect();
if !model_names.iter().any(|m| *m == model) { // Strip tag from configured model name too (e.g.
// "nomic-embed-text:v1.5" → "nomic-embed-text") so both
// sides are compared at the same granularity.
let model_base = model.split(':').next().unwrap_or(model);
if !model_names.contains(&model_base) {
return OllamaCheck { return OllamaCheck {
result: CheckResult { result: CheckResult {
status: CheckStatus::Warning, status: CheckStatus::Warning,

View File

@@ -21,16 +21,37 @@ pub struct RegenerateResult {
/// ///
/// Uses per-item error handling (fail-soft) and drains the queue completely /// Uses per-item error handling (fail-soft) and drains the queue completely
/// via a bounded batch loop. Each dirty item is processed independently. /// via a bounded batch loop. Each dirty item is processed independently.
#[instrument(skip(conn), fields(items_processed, items_skipped, errors))] ///
pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult> { /// `progress_callback` reports `(processed, estimated_total)` after each item.
#[instrument(
skip(conn, progress_callback),
fields(items_processed, items_skipped, errors)
)]
pub fn regenerate_dirty_documents(
conn: &Connection,
progress_callback: Option<&dyn Fn(usize, usize)>,
) -> Result<RegenerateResult> {
let mut result = RegenerateResult::default(); let mut result = RegenerateResult::default();
// Estimated total for progress reporting. Recount each loop iteration
// so the denominator grows if new items are enqueued during processing
// (the queue can grow while we drain it). We use max() so the value
// never shrinks — preventing the progress fraction from going backwards.
let mut estimated_total: usize = 0;
loop { loop {
let dirty = get_dirty_sources(conn)?; let dirty = get_dirty_sources(conn)?;
if dirty.is_empty() { if dirty.is_empty() {
break; break;
} }
// Recount remaining + already-processed to get the true total.
let remaining: usize = conn
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |row| row.get(0))
.unwrap_or(0_i64) as usize;
let processed_so_far = result.regenerated + result.unchanged + result.errored;
estimated_total = estimated_total.max(processed_so_far + remaining);
for (source_type, source_id) in &dirty { for (source_type, source_id) in &dirty {
match regenerate_one(conn, *source_type, *source_id) { match regenerate_one(conn, *source_type, *source_id) {
Ok(changed) => { Ok(changed) => {
@@ -52,6 +73,11 @@ pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult>
result.errored += 1; result.errored += 1;
} }
} }
let processed = result.regenerated + result.unchanged + result.errored;
if let Some(cb) = progress_callback {
cb(processed, estimated_total);
}
} }
} }
@@ -123,7 +149,9 @@ fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> {
Ok(()) Ok(())
} }
Err(e) => { Err(e) => {
let _ = conn.execute_batch("ROLLBACK TO upsert_doc"); // ROLLBACK TO restores the savepoint but leaves it active.
// RELEASE removes it so the connection is clean for the next call.
let _ = conn.execute_batch("ROLLBACK TO upsert_doc; RELEASE upsert_doc");
Err(e) Err(e)
} }
} }
@@ -358,7 +386,7 @@ mod tests {
).unwrap(); ).unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let result = regenerate_dirty_documents(&conn).unwrap(); let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 1); assert_eq!(result.regenerated, 1);
assert_eq!(result.unchanged, 0); assert_eq!(result.unchanged, 0);
assert_eq!(result.errored, 0); assert_eq!(result.errored, 0);
@@ -385,12 +413,12 @@ mod tests {
// First regeneration creates the document // First regeneration creates the document
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let r1 = regenerate_dirty_documents(&conn).unwrap(); let r1 = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(r1.regenerated, 1); assert_eq!(r1.regenerated, 1);
// Second regeneration — same data, should be unchanged // Second regeneration — same data, should be unchanged
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let r2 = regenerate_dirty_documents(&conn).unwrap(); let r2 = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(r2.unchanged, 1); assert_eq!(r2.unchanged, 1);
assert_eq!(r2.regenerated, 0); assert_eq!(r2.regenerated, 0);
} }
@@ -403,7 +431,7 @@ mod tests {
[], [],
).unwrap(); ).unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
regenerate_dirty_documents(&conn).unwrap(); regenerate_dirty_documents(&conn, None).unwrap();
// Delete the issue and re-mark dirty // Delete the issue and re-mark dirty
conn.execute("PRAGMA foreign_keys = OFF", []).unwrap(); conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
@@ -411,7 +439,7 @@ mod tests {
conn.execute("PRAGMA foreign_keys = ON", []).unwrap(); conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let result = regenerate_dirty_documents(&conn).unwrap(); let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 1); // Deletion counts as "changed" assert_eq!(result.regenerated, 1); // Deletion counts as "changed"
let count: i64 = conn let count: i64 = conn
@@ -431,7 +459,7 @@ mod tests {
mark_dirty(&conn, SourceType::Issue, i).unwrap(); mark_dirty(&conn, SourceType::Issue, i).unwrap();
} }
let result = regenerate_dirty_documents(&conn).unwrap(); let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 10); assert_eq!(result.regenerated, 10);
// Queue should be empty // Queue should be empty
@@ -459,11 +487,11 @@ mod tests {
// First run creates document // First run creates document
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
regenerate_dirty_documents(&conn).unwrap(); regenerate_dirty_documents(&conn, None).unwrap();
// Second run — triple hash match, should skip ALL writes // Second run — triple hash match, should skip ALL writes
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let result = regenerate_dirty_documents(&conn).unwrap(); let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.unchanged, 1); assert_eq!(result.unchanged, 1);
// Labels should still be present (not deleted and re-inserted) // Labels should still be present (not deleted and re-inserted)

View File

@@ -122,6 +122,7 @@ impl GitLabClient {
/// Make an authenticated API request with automatic 429 retry. /// Make an authenticated API request with automatic 429 retry.
async fn request<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> { async fn request<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> {
let url = format!("{}{}", self.base_url, path); let url = format!("{}{}", self.base_url, path);
let mut last_response = None;
for attempt in 0..=Self::MAX_RETRIES { for attempt in 0..=Self::MAX_RETRIES {
let delay = self.rate_limiter.lock().await.check_delay(); let delay = self.rate_limiter.lock().await.check_delay();
@@ -155,10 +156,15 @@ impl GitLabClient {
continue; continue;
} }
return self.handle_response(response, path).await; last_response = Some(response);
break;
} }
unreachable!("loop always returns") // Safety: the loop always executes at least once (0..=MAX_RETRIES)
// and either sets last_response+break, or continues (only when
// attempt < MAX_RETRIES). The final iteration always reaches break.
self.handle_response(last_response.expect("retry loop ran at least once"), path)
.await
} }
/// Parse retry-after header from a 429 response, defaulting to 60s. /// Parse retry-after header from a 429 response, defaulting to 60s.
@@ -543,6 +549,7 @@ impl GitLabClient {
params: &[(&str, String)], params: &[(&str, String)],
) -> Result<(T, HeaderMap)> { ) -> Result<(T, HeaderMap)> {
let url = format!("{}{}", self.base_url, path); let url = format!("{}{}", self.base_url, path);
let mut last_response = None;
for attempt in 0..=Self::MAX_RETRIES { for attempt in 0..=Self::MAX_RETRIES {
let delay = self.rate_limiter.lock().await.check_delay(); let delay = self.rate_limiter.lock().await.check_delay();
@@ -577,12 +584,14 @@ impl GitLabClient {
continue; continue;
} }
let headers = response.headers().clone(); last_response = Some(response);
let body = self.handle_response(response, path).await?; break;
return Ok((body, headers));
} }
unreachable!("loop always returns") let response = last_response.expect("retry loop ran at least once");
let headers = response.headers().clone();
let body = self.handle_response(response, path).await?;
Ok((body, headers))
} }
} }