diff --git a/src/cli/commands/doctor.rs b/src/cli/commands/doctor.rs index 40f0697..6b7e96c 100644 --- a/src/cli/commands/doctor.rs +++ b/src/cli/commands/doctor.rs @@ -418,7 +418,11 @@ async fn check_ollama(config: Option<&Config>) -> OllamaCheck { .map(|m| m.name.split(':').next().unwrap_or(&m.name)) .collect(); - if !model_names.iter().any(|m| *m == model) { + // Strip tag from configured model name too (e.g. + // "nomic-embed-text:v1.5" → "nomic-embed-text") so both + // sides are compared at the same granularity. + let model_base = model.split(':').next().unwrap_or(model); + if !model_names.contains(&model_base) { return OllamaCheck { result: CheckResult { status: CheckStatus::Warning, diff --git a/src/documents/regenerator.rs b/src/documents/regenerator.rs index d39eb8f..ff2f965 100644 --- a/src/documents/regenerator.rs +++ b/src/documents/regenerator.rs @@ -21,16 +21,37 @@ pub struct RegenerateResult { /// /// Uses per-item error handling (fail-soft) and drains the queue completely /// via a bounded batch loop. Each dirty item is processed independently. -#[instrument(skip(conn), fields(items_processed, items_skipped, errors))] -pub fn regenerate_dirty_documents(conn: &Connection) -> Result { +/// +/// `progress_callback` reports `(processed, estimated_total)` after each item. +#[instrument( + skip(conn, progress_callback), + fields(items_processed, items_skipped, errors) +)] +pub fn regenerate_dirty_documents( + conn: &Connection, + progress_callback: Option<&dyn Fn(usize, usize)>, +) -> Result { let mut result = RegenerateResult::default(); + // Estimated total for progress reporting. Recount each loop iteration + // so the denominator grows if new items are enqueued during processing + // (the queue can grow while we drain it). We use max() so the value + // never shrinks — preventing the progress fraction from going backwards. + let mut estimated_total: usize = 0; + loop { let dirty = get_dirty_sources(conn)?; if dirty.is_empty() { break; } + // Recount remaining + already-processed to get the true total. + let remaining: usize = conn + .query_row("SELECT COUNT(*) FROM dirty_sources", [], |row| row.get(0)) + .unwrap_or(0_i64) as usize; + let processed_so_far = result.regenerated + result.unchanged + result.errored; + estimated_total = estimated_total.max(processed_so_far + remaining); + for (source_type, source_id) in &dirty { match regenerate_one(conn, *source_type, *source_id) { Ok(changed) => { @@ -52,6 +73,11 @@ pub fn regenerate_dirty_documents(conn: &Connection) -> Result result.errored += 1; } } + + let processed = result.regenerated + result.unchanged + result.errored; + if let Some(cb) = progress_callback { + cb(processed, estimated_total); + } } } @@ -123,7 +149,9 @@ fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> { Ok(()) } Err(e) => { - let _ = conn.execute_batch("ROLLBACK TO upsert_doc"); + // ROLLBACK TO restores the savepoint but leaves it active. + // RELEASE removes it so the connection is clean for the next call. + let _ = conn.execute_batch("ROLLBACK TO upsert_doc; RELEASE upsert_doc"); Err(e) } } @@ -358,7 +386,7 @@ mod tests { ).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap(); - let result = regenerate_dirty_documents(&conn).unwrap(); + let result = regenerate_dirty_documents(&conn, None).unwrap(); assert_eq!(result.regenerated, 1); assert_eq!(result.unchanged, 0); assert_eq!(result.errored, 0); @@ -385,12 +413,12 @@ mod tests { // First regeneration creates the document mark_dirty(&conn, SourceType::Issue, 1).unwrap(); - let r1 = regenerate_dirty_documents(&conn).unwrap(); + let r1 = regenerate_dirty_documents(&conn, None).unwrap(); assert_eq!(r1.regenerated, 1); // Second regeneration — same data, should be unchanged mark_dirty(&conn, SourceType::Issue, 1).unwrap(); - let r2 = regenerate_dirty_documents(&conn).unwrap(); + let r2 = regenerate_dirty_documents(&conn, None).unwrap(); assert_eq!(r2.unchanged, 1); assert_eq!(r2.regenerated, 0); } @@ -403,7 +431,7 @@ mod tests { [], ).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap(); - regenerate_dirty_documents(&conn).unwrap(); + regenerate_dirty_documents(&conn, None).unwrap(); // Delete the issue and re-mark dirty conn.execute("PRAGMA foreign_keys = OFF", []).unwrap(); @@ -411,7 +439,7 @@ mod tests { conn.execute("PRAGMA foreign_keys = ON", []).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap(); - let result = regenerate_dirty_documents(&conn).unwrap(); + let result = regenerate_dirty_documents(&conn, None).unwrap(); assert_eq!(result.regenerated, 1); // Deletion counts as "changed" let count: i64 = conn @@ -431,7 +459,7 @@ mod tests { mark_dirty(&conn, SourceType::Issue, i).unwrap(); } - let result = regenerate_dirty_documents(&conn).unwrap(); + let result = regenerate_dirty_documents(&conn, None).unwrap(); assert_eq!(result.regenerated, 10); // Queue should be empty @@ -459,11 +487,11 @@ mod tests { // First run creates document mark_dirty(&conn, SourceType::Issue, 1).unwrap(); - regenerate_dirty_documents(&conn).unwrap(); + regenerate_dirty_documents(&conn, None).unwrap(); // Second run — triple hash match, should skip ALL writes mark_dirty(&conn, SourceType::Issue, 1).unwrap(); - let result = regenerate_dirty_documents(&conn).unwrap(); + let result = regenerate_dirty_documents(&conn, None).unwrap(); assert_eq!(result.unchanged, 1); // Labels should still be present (not deleted and re-inserted) diff --git a/src/gitlab/client.rs b/src/gitlab/client.rs index dd9552b..e0aeffa 100644 --- a/src/gitlab/client.rs +++ b/src/gitlab/client.rs @@ -122,6 +122,7 @@ impl GitLabClient { /// Make an authenticated API request with automatic 429 retry. async fn request(&self, path: &str) -> Result { let url = format!("{}{}", self.base_url, path); + let mut last_response = None; for attempt in 0..=Self::MAX_RETRIES { let delay = self.rate_limiter.lock().await.check_delay(); @@ -155,10 +156,15 @@ impl GitLabClient { continue; } - return self.handle_response(response, path).await; + last_response = Some(response); + break; } - unreachable!("loop always returns") + // Safety: the loop always executes at least once (0..=MAX_RETRIES) + // and either sets last_response+break, or continues (only when + // attempt < MAX_RETRIES). The final iteration always reaches break. + self.handle_response(last_response.expect("retry loop ran at least once"), path) + .await } /// Parse retry-after header from a 429 response, defaulting to 60s. @@ -543,6 +549,7 @@ impl GitLabClient { params: &[(&str, String)], ) -> Result<(T, HeaderMap)> { let url = format!("{}{}", self.base_url, path); + let mut last_response = None; for attempt in 0..=Self::MAX_RETRIES { let delay = self.rate_limiter.lock().await.check_delay(); @@ -577,12 +584,14 @@ impl GitLabClient { continue; } - let headers = response.headers().clone(); - let body = self.handle_response(response, path).await?; - return Ok((body, headers)); + last_response = Some(response); + break; } - unreachable!("loop always returns") + let response = last_response.expect("retry loop ran at least once"); + let headers = response.headers().clone(); + let body = self.handle_response(response, path).await?; + Ok((body, headers)) } }