fix(core): reduce ollama startup blocking and handle cold starts gracefully

The ensure_ollama() function previously blocked for up to 10 seconds waiting for Ollama to become reachable after spawning. Cold starts can take 30-60s, so this often timed out and reported a misleading error. Now waits only 5 seconds (enough for hot restarts), and if Ollama is still starting, reports started=true with no error instead of treating it as a failure. The embed stage runs 60-90s later (after ingestion), by which time Ollama is ready. The handler log message is updated to distinguish hot restarts from cold starts still in progress.
2026-03-13 08:58:18 -04:00
parent 347ea91bea
commit 796b6b7289
2 changed files with 18 additions and 12 deletions
--- a/src/app/handlers.rs
+++ b/src/app/handlers.rs
@@ -1672,8 +1672,10 @@ async fn handle_sync_cmd(
                "Ollama is not installed — embeddings will be skipped. {}",
                result.install_hint.as_deref().unwrap_or("")
            );
-        } else if result.started {
+        } else if result.started && result.running {
            tracing::info!("Started ollama serve (was not running)");
+        } else if result.started {
+            tracing::info!("Spawned ollama serve (cold start in progress, should be ready by embed stage)");
        } else if !result.running {
            tracing::warn!(
                "Failed to start Ollama: {}",
--- a/src/core/ollama_mgmt.rs
+++ b/src/core/ollama_mgmt.rs
@@ -104,12 +104,12 @@ pub fn install_instructions() -> &'static str {
    }
 }

-// ── Ensure (sync, may block up to ~10s while waiting for startup) ──
+// ── Ensure (sync, spawns ollama if needed) ──

 /// Result of attempting to ensure Ollama is running.
 #[derive(Debug, Serialize)]
 pub struct OllamaEnsureResult {
-    /// Whether the `ollama` binary was found on PATH.
+    /// Whether the `ollama` binary was found.
    pub installed: bool,
    /// Whether Ollama was already running before we tried anything.
    pub was_running: bool,
@@ -131,9 +131,10 @@ pub struct OllamaEnsureResult {
 /// Only attempts to start `ollama serve` when the configured URL points at
 /// localhost. For remote URLs, only checks reachability.
 ///
-/// This blocks for up to ~10 seconds waiting for Ollama to become reachable
-/// after a start attempt. Intended for cron/lock mode where a brief delay
-/// is acceptable.
+/// After spawning, waits only briefly (5 seconds) for hot restarts. Cold
+/// starts can take 30-60 seconds, but the embed stage runs much later
+/// (after ingestion, typically 60-90s) and will find Ollama ready by then.
+/// This avoids blocking the sync pipeline unnecessarily.
 pub fn ensure_ollama(base_url: &str) -> OllamaEnsureResult {
    let is_local = is_local_url(base_url);

@@ -202,8 +203,11 @@ pub fn ensure_ollama(base_url: &str) -> OllamaEnsureResult {
        };
    }

-    // Step 5: Wait for it to become reachable (up to ~10 seconds)
-    for _ in 0..20 {
+    // Step 5: Brief wait for hot restarts (5 seconds).
+    // Cold starts take 30-60s but we don't block for that — ingestion runs
+    // for 60-90s before the embed stage needs Ollama, giving it plenty of
+    // time to boot in the background.
+    for _ in 0..10 {
        std::thread::sleep(Duration::from_millis(500));
        if is_ollama_reachable(base_url) {
            return OllamaEnsureResult {
@@ -217,14 +221,14 @@ pub fn ensure_ollama(base_url: &str) -> OllamaEnsureResult {
        }
    }

+    // Spawn succeeded but Ollama is still starting up — report as started
+    // (not an error). It should be ready by the time the embed stage runs.
    OllamaEnsureResult {
        installed: true,
        was_running: false,
-        started: false,
+        started: true,
        running: false,
-        error: Some(
-            "Spawned 'ollama serve' but it did not become reachable within 10 seconds".to_string(),
-        ),
+        error: None,
        install_hint: None,
    }
 }