fix: Retry loop safety, doctor model matching, regenerator robustness
Three defensive improvements from peer code review: Replace unreachable!() in GitLab client retry loops: Both request() and request_with_headers() had unreachable!() after their for loops. While the logic was sound (the final iteration always reaches the return/break), any refactor to the loop condition would turn this into a runtime panic. Restructured both to store last_response with explicit break, making the control flow self-documenting and the .expect() message useful if ever violated. Doctor model name comparison asymmetry: Ollama model names were stripped of their tag (:latest, :v1.5) for comparison, but the configured model name was compared as-is. A config value like "nomic-embed-text:v1.5" would never match. Now strips the tag from both sides before comparing. Regenerator savepoint cleanup and progress accuracy: - upsert_document's error path did ROLLBACK TO but never RELEASE, leaving a dangling savepoint that could nest on the next call. Added RELEASE after rollback so the connection is clean. - estimated_total for progress reporting was computed once at start but the dirty queue can grow during processing. Now recounts each loop iteration with max() so the progress fraction never goes backwards. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -418,7 +418,11 @@ async fn check_ollama(config: Option<&Config>) -> OllamaCheck {
|
||||
.map(|m| m.name.split(':').next().unwrap_or(&m.name))
|
||||
.collect();
|
||||
|
||||
if !model_names.iter().any(|m| *m == model) {
|
||||
// Strip tag from configured model name too (e.g.
|
||||
// "nomic-embed-text:v1.5" → "nomic-embed-text") so both
|
||||
// sides are compared at the same granularity.
|
||||
let model_base = model.split(':').next().unwrap_or(model);
|
||||
if !model_names.contains(&model_base) {
|
||||
return OllamaCheck {
|
||||
result: CheckResult {
|
||||
status: CheckStatus::Warning,
|
||||
|
||||
@@ -21,16 +21,37 @@ pub struct RegenerateResult {
|
||||
///
|
||||
/// Uses per-item error handling (fail-soft) and drains the queue completely
|
||||
/// via a bounded batch loop. Each dirty item is processed independently.
|
||||
#[instrument(skip(conn), fields(items_processed, items_skipped, errors))]
|
||||
pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult> {
|
||||
///
|
||||
/// `progress_callback` reports `(processed, estimated_total)` after each item.
|
||||
#[instrument(
|
||||
skip(conn, progress_callback),
|
||||
fields(items_processed, items_skipped, errors)
|
||||
)]
|
||||
pub fn regenerate_dirty_documents(
|
||||
conn: &Connection,
|
||||
progress_callback: Option<&dyn Fn(usize, usize)>,
|
||||
) -> Result<RegenerateResult> {
|
||||
let mut result = RegenerateResult::default();
|
||||
|
||||
// Estimated total for progress reporting. Recount each loop iteration
|
||||
// so the denominator grows if new items are enqueued during processing
|
||||
// (the queue can grow while we drain it). We use max() so the value
|
||||
// never shrinks — preventing the progress fraction from going backwards.
|
||||
let mut estimated_total: usize = 0;
|
||||
|
||||
loop {
|
||||
let dirty = get_dirty_sources(conn)?;
|
||||
if dirty.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
// Recount remaining + already-processed to get the true total.
|
||||
let remaining: usize = conn
|
||||
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |row| row.get(0))
|
||||
.unwrap_or(0_i64) as usize;
|
||||
let processed_so_far = result.regenerated + result.unchanged + result.errored;
|
||||
estimated_total = estimated_total.max(processed_so_far + remaining);
|
||||
|
||||
for (source_type, source_id) in &dirty {
|
||||
match regenerate_one(conn, *source_type, *source_id) {
|
||||
Ok(changed) => {
|
||||
@@ -52,6 +73,11 @@ pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult>
|
||||
result.errored += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let processed = result.regenerated + result.unchanged + result.errored;
|
||||
if let Some(cb) = progress_callback {
|
||||
cb(processed, estimated_total);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -123,7 +149,9 @@ fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = conn.execute_batch("ROLLBACK TO upsert_doc");
|
||||
// ROLLBACK TO restores the savepoint but leaves it active.
|
||||
// RELEASE removes it so the connection is clean for the next call.
|
||||
let _ = conn.execute_batch("ROLLBACK TO upsert_doc; RELEASE upsert_doc");
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
@@ -358,7 +386,7 @@ mod tests {
|
||||
).unwrap();
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
|
||||
let result = regenerate_dirty_documents(&conn).unwrap();
|
||||
let result = regenerate_dirty_documents(&conn, None).unwrap();
|
||||
assert_eq!(result.regenerated, 1);
|
||||
assert_eq!(result.unchanged, 0);
|
||||
assert_eq!(result.errored, 0);
|
||||
@@ -385,12 +413,12 @@ mod tests {
|
||||
|
||||
// First regeneration creates the document
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
let r1 = regenerate_dirty_documents(&conn).unwrap();
|
||||
let r1 = regenerate_dirty_documents(&conn, None).unwrap();
|
||||
assert_eq!(r1.regenerated, 1);
|
||||
|
||||
// Second regeneration — same data, should be unchanged
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
let r2 = regenerate_dirty_documents(&conn).unwrap();
|
||||
let r2 = regenerate_dirty_documents(&conn, None).unwrap();
|
||||
assert_eq!(r2.unchanged, 1);
|
||||
assert_eq!(r2.regenerated, 0);
|
||||
}
|
||||
@@ -403,7 +431,7 @@ mod tests {
|
||||
[],
|
||||
).unwrap();
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
regenerate_dirty_documents(&conn).unwrap();
|
||||
regenerate_dirty_documents(&conn, None).unwrap();
|
||||
|
||||
// Delete the issue and re-mark dirty
|
||||
conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
|
||||
@@ -411,7 +439,7 @@ mod tests {
|
||||
conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
|
||||
let result = regenerate_dirty_documents(&conn).unwrap();
|
||||
let result = regenerate_dirty_documents(&conn, None).unwrap();
|
||||
assert_eq!(result.regenerated, 1); // Deletion counts as "changed"
|
||||
|
||||
let count: i64 = conn
|
||||
@@ -431,7 +459,7 @@ mod tests {
|
||||
mark_dirty(&conn, SourceType::Issue, i).unwrap();
|
||||
}
|
||||
|
||||
let result = regenerate_dirty_documents(&conn).unwrap();
|
||||
let result = regenerate_dirty_documents(&conn, None).unwrap();
|
||||
assert_eq!(result.regenerated, 10);
|
||||
|
||||
// Queue should be empty
|
||||
@@ -459,11 +487,11 @@ mod tests {
|
||||
|
||||
// First run creates document
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
regenerate_dirty_documents(&conn).unwrap();
|
||||
regenerate_dirty_documents(&conn, None).unwrap();
|
||||
|
||||
// Second run — triple hash match, should skip ALL writes
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
let result = regenerate_dirty_documents(&conn).unwrap();
|
||||
let result = regenerate_dirty_documents(&conn, None).unwrap();
|
||||
assert_eq!(result.unchanged, 1);
|
||||
|
||||
// Labels should still be present (not deleted and re-inserted)
|
||||
|
||||
@@ -122,6 +122,7 @@ impl GitLabClient {
|
||||
/// Make an authenticated API request with automatic 429 retry.
|
||||
async fn request<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> {
|
||||
let url = format!("{}{}", self.base_url, path);
|
||||
let mut last_response = None;
|
||||
|
||||
for attempt in 0..=Self::MAX_RETRIES {
|
||||
let delay = self.rate_limiter.lock().await.check_delay();
|
||||
@@ -155,10 +156,15 @@ impl GitLabClient {
|
||||
continue;
|
||||
}
|
||||
|
||||
return self.handle_response(response, path).await;
|
||||
last_response = Some(response);
|
||||
break;
|
||||
}
|
||||
|
||||
unreachable!("loop always returns")
|
||||
// Safety: the loop always executes at least once (0..=MAX_RETRIES)
|
||||
// and either sets last_response+break, or continues (only when
|
||||
// attempt < MAX_RETRIES). The final iteration always reaches break.
|
||||
self.handle_response(last_response.expect("retry loop ran at least once"), path)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Parse retry-after header from a 429 response, defaulting to 60s.
|
||||
@@ -543,6 +549,7 @@ impl GitLabClient {
|
||||
params: &[(&str, String)],
|
||||
) -> Result<(T, HeaderMap)> {
|
||||
let url = format!("{}{}", self.base_url, path);
|
||||
let mut last_response = None;
|
||||
|
||||
for attempt in 0..=Self::MAX_RETRIES {
|
||||
let delay = self.rate_limiter.lock().await.check_delay();
|
||||
@@ -577,12 +584,14 @@ impl GitLabClient {
|
||||
continue;
|
||||
}
|
||||
|
||||
let headers = response.headers().clone();
|
||||
let body = self.handle_response(response, path).await?;
|
||||
return Ok((body, headers));
|
||||
last_response = Some(response);
|
||||
break;
|
||||
}
|
||||
|
||||
unreachable!("loop always returns")
|
||||
let response = last_response.expect("retry loop ran at least once");
|
||||
let headers = response.headers().clone();
|
||||
let body = self.handle_response(response, path).await?;
|
||||
Ok((body, headers))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user