2026-02-04 14:16:54 -05:00
parent 1fdc6d03cc
commit 925ec9f574
3 changed files with 59 additions and 18 deletions
						
						
							
						
						
						
@@ -418,7 +418,11 @@ async fn check_ollama(config: Option<&Config>) -> OllamaCheck {
                        .map(|m| m.name.split(':').next().unwrap_or(&m.name))
                        .map(|m| m.name.split(':').next().unwrap_or(&m.name))
                        .collect();
                        .collect();
                    if !model_names.iter().any(|m| *m == model) {
                    // Strip tag from configured model name too (e.g.
                    // "nomic-embed-text:v1.5" → "nomic-embed-text") so both
                    // sides are compared at the same granularity.
                    let model_base = model.split(':').next().unwrap_or(model);
                    if !model_names.contains(&model_base) {
                        return OllamaCheck {
                        return OllamaCheck {
                            result: CheckResult {
                            result: CheckResult {
                                status: CheckStatus::Warning,
                                status: CheckStatus::Warning,
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -21,16 +21,37 @@ pub struct RegenerateResult {
///
///
/// Uses per-item error handling (fail-soft) and drains the queue completely
/// Uses per-item error handling (fail-soft) and drains the queue completely
/// via a bounded batch loop. Each dirty item is processed independently.
/// via a bounded batch loop. Each dirty item is processed independently.
#[instrument(skip(conn), fields(items_processed, items_skipped, errors))]
///
pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult> {
/// `progress_callback` reports `(processed, estimated_total)` after each item.
#[instrument(
    skip(conn, progress_callback),
    fields(items_processed, items_skipped, errors)
)]
pub fn regenerate_dirty_documents(
    conn: &Connection,
    progress_callback: Option<&dyn Fn(usize, usize)>,
) -> Result<RegenerateResult> {
    let mut result = RegenerateResult::default();
    let mut result = RegenerateResult::default();
    // Estimated total for progress reporting.  Recount each loop iteration
    // so the denominator grows if new items are enqueued during processing
    // (the queue can grow while we drain it).  We use max() so the value
    // never shrinks — preventing the progress fraction from going backwards.
    let mut estimated_total: usize = 0;
    loop {
    loop {
        let dirty = get_dirty_sources(conn)?;
        let dirty = get_dirty_sources(conn)?;
        if dirty.is_empty() {
        if dirty.is_empty() {
            break;
            break;
        }
        }
        // Recount remaining + already-processed to get the true total.
        let remaining: usize = conn
            .query_row("SELECT COUNT(*) FROM dirty_sources", [], |row| row.get(0))
            .unwrap_or(0_i64) as usize;
        let processed_so_far = result.regenerated + result.unchanged + result.errored;
        estimated_total = estimated_total.max(processed_so_far + remaining);
        for (source_type, source_id) in &dirty {
        for (source_type, source_id) in &dirty {
            match regenerate_one(conn, *source_type, *source_id) {
            match regenerate_one(conn, *source_type, *source_id) {
                Ok(changed) => {
                Ok(changed) => {
						
						
						
							
						
						
@@ -52,6 +73,11 @@ pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult>
                    result.errored += 1;
                    result.errored += 1;
                }
                }
            }
            }
            let processed = result.regenerated + result.unchanged + result.errored;
            if let Some(cb) = progress_callback {
                cb(processed, estimated_total);
            }
        }
        }
    }
    }
						
							
						
						
							
						
						
						
@@ -123,7 +149,9 @@ fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> {
            Ok(())
            Ok(())
        }
        }
        Err(e) => {
        Err(e) => {
            let _ = conn.execute_batch("ROLLBACK TO upsert_doc");
            // ROLLBACK TO restores the savepoint but leaves it active.
            // RELEASE removes it so the connection is clean for the next call.
            let _ = conn.execute_batch("ROLLBACK TO upsert_doc; RELEASE upsert_doc");
            Err(e)
            Err(e)
        }
        }
    }
    }
						
							
						
						
							
						
						
						
@@ -358,7 +386,7 @@ mod tests {
        ).unwrap();
        ).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        let result = regenerate_dirty_documents(&conn).unwrap();
        let result = regenerate_dirty_documents(&conn, None).unwrap();
        assert_eq!(result.regenerated, 1);
        assert_eq!(result.regenerated, 1);
        assert_eq!(result.unchanged, 0);
        assert_eq!(result.unchanged, 0);
        assert_eq!(result.errored, 0);
        assert_eq!(result.errored, 0);
						
							
						
						
							
						
						
						
@@ -385,12 +413,12 @@ mod tests {
        // First regeneration creates the document
        // First regeneration creates the document
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        let r1 = regenerate_dirty_documents(&conn).unwrap();
        let r1 = regenerate_dirty_documents(&conn, None).unwrap();
        assert_eq!(r1.regenerated, 1);
        assert_eq!(r1.regenerated, 1);
        // Second regeneration — same data, should be unchanged
        // Second regeneration — same data, should be unchanged
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        let r2 = regenerate_dirty_documents(&conn).unwrap();
        let r2 = regenerate_dirty_documents(&conn, None).unwrap();
        assert_eq!(r2.unchanged, 1);
        assert_eq!(r2.unchanged, 1);
        assert_eq!(r2.regenerated, 0);
        assert_eq!(r2.regenerated, 0);
    }
    }
						
						
						
							
						
						
@@ -403,7 +431,7 @@ mod tests {
            [],
            [],
        ).unwrap();
        ).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        regenerate_dirty_documents(&conn).unwrap();
        regenerate_dirty_documents(&conn, None).unwrap();
        // Delete the issue and re-mark dirty
        // Delete the issue and re-mark dirty
        conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
        conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
						
						
						
							
						
						
@@ -411,7 +439,7 @@ mod tests {
        conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
        conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        let result = regenerate_dirty_documents(&conn).unwrap();
        let result = regenerate_dirty_documents(&conn, None).unwrap();
        assert_eq!(result.regenerated, 1); // Deletion counts as "changed"
        assert_eq!(result.regenerated, 1); // Deletion counts as "changed"
        let count: i64 = conn
        let count: i64 = conn
						
						
						
							
						
						
@@ -431,7 +459,7 @@ mod tests {
            mark_dirty(&conn, SourceType::Issue, i).unwrap();
            mark_dirty(&conn, SourceType::Issue, i).unwrap();
        }
        }
        let result = regenerate_dirty_documents(&conn).unwrap();
        let result = regenerate_dirty_documents(&conn, None).unwrap();
        assert_eq!(result.regenerated, 10);
        assert_eq!(result.regenerated, 10);
        // Queue should be empty
        // Queue should be empty
						
							
						
						
							
						
						
						
@@ -459,11 +487,11 @@ mod tests {
        // First run creates document
        // First run creates document
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        regenerate_dirty_documents(&conn).unwrap();
        regenerate_dirty_documents(&conn, None).unwrap();
        // Second run — triple hash match, should skip ALL writes
        // Second run — triple hash match, should skip ALL writes
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        let result = regenerate_dirty_documents(&conn).unwrap();
        let result = regenerate_dirty_documents(&conn, None).unwrap();
        assert_eq!(result.unchanged, 1);
        assert_eq!(result.unchanged, 1);
        // Labels should still be present (not deleted and re-inserted)
        // Labels should still be present (not deleted and re-inserted)
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -122,6 +122,7 @@ impl GitLabClient {
    /// Make an authenticated API request with automatic 429 retry.
    /// Make an authenticated API request with automatic 429 retry.
    async fn request<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> {
    async fn request<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> {
        let url = format!("{}{}", self.base_url, path);
        let url = format!("{}{}", self.base_url, path);
        let mut last_response = None;
        for attempt in 0..=Self::MAX_RETRIES {
        for attempt in 0..=Self::MAX_RETRIES {
            let delay = self.rate_limiter.lock().await.check_delay();
            let delay = self.rate_limiter.lock().await.check_delay();
						
							
						
						
							
						
						
						
@@ -155,10 +156,15 @@ impl GitLabClient {
                continue;
                continue;
            }
            }
            return self.handle_response(response, path).await;
            last_response = Some(response);
            break;
        }
        }
        unreachable!("loop always returns")
        // Safety: the loop always executes at least once (0..=MAX_RETRIES)
        // and either sets last_response+break, or continues (only when
        // attempt < MAX_RETRIES). The final iteration always reaches break.
        self.handle_response(last_response.expect("retry loop ran at least once"), path)
            .await
    }
    }
    /// Parse retry-after header from a 429 response, defaulting to 60s.
    /// Parse retry-after header from a 429 response, defaulting to 60s.
						
							
						
						
							
						
						
						
@@ -543,6 +549,7 @@ impl GitLabClient {
        params: &[(&str, String)],
        params: &[(&str, String)],
    ) -> Result<(T, HeaderMap)> {
    ) -> Result<(T, HeaderMap)> {
        let url = format!("{}{}", self.base_url, path);
        let url = format!("{}{}", self.base_url, path);
        let mut last_response = None;
        for attempt in 0..=Self::MAX_RETRIES {
        for attempt in 0..=Self::MAX_RETRIES {
            let delay = self.rate_limiter.lock().await.check_delay();
            let delay = self.rate_limiter.lock().await.check_delay();
						
							
						
						
							
						
						
						
@@ -577,12 +584,14 @@ impl GitLabClient {
                continue;
                continue;
            }
            }
            let headers = response.headers().clone();
            last_response = Some(response);
            let body = self.handle_response(response, path).await?;
            break;
            return Ok((body, headers));
        }
        }
        unreachable!("loop always returns")
        let response = last_response.expect("retry loop ran at least once");
        let headers = response.headers().clone();
        let body = self.handle_response(response, path).await?;
        Ok((body, headers))
    }
    }
}
}