fix(core): reduce ollama startup blocking and handle cold starts gracefully
The ensure_ollama() function previously blocked for up to 10 seconds waiting for Ollama to become reachable after spawning. Cold starts can take 30-60s, so this often timed out and reported a misleading error. Now waits only 5 seconds (enough for hot restarts), and if Ollama is still starting, reports started=true with no error instead of treating it as a failure. The embed stage runs 60-90s later (after ingestion), by which time Ollama is ready. The handler log message is updated to distinguish hot restarts from cold starts still in progress.
This commit is contained in:
@@ -1672,8 +1672,10 @@ async fn handle_sync_cmd(
|
||||
"Ollama is not installed — embeddings will be skipped. {}",
|
||||
result.install_hint.as_deref().unwrap_or("")
|
||||
);
|
||||
} else if result.started {
|
||||
} else if result.started && result.running {
|
||||
tracing::info!("Started ollama serve (was not running)");
|
||||
} else if result.started {
|
||||
tracing::info!("Spawned ollama serve (cold start in progress, should be ready by embed stage)");
|
||||
} else if !result.running {
|
||||
tracing::warn!(
|
||||
"Failed to start Ollama: {}",
|
||||
|
||||
@@ -104,12 +104,12 @@ pub fn install_instructions() -> &'static str {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Ensure (sync, may block up to ~10s while waiting for startup) ──
|
||||
// ── Ensure (sync, spawns ollama if needed) ──
|
||||
|
||||
/// Result of attempting to ensure Ollama is running.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct OllamaEnsureResult {
|
||||
/// Whether the `ollama` binary was found on PATH.
|
||||
/// Whether the `ollama` binary was found.
|
||||
pub installed: bool,
|
||||
/// Whether Ollama was already running before we tried anything.
|
||||
pub was_running: bool,
|
||||
@@ -131,9 +131,10 @@ pub struct OllamaEnsureResult {
|
||||
/// Only attempts to start `ollama serve` when the configured URL points at
|
||||
/// localhost. For remote URLs, only checks reachability.
|
||||
///
|
||||
/// This blocks for up to ~10 seconds waiting for Ollama to become reachable
|
||||
/// after a start attempt. Intended for cron/lock mode where a brief delay
|
||||
/// is acceptable.
|
||||
/// After spawning, waits only briefly (5 seconds) for hot restarts. Cold
|
||||
/// starts can take 30-60 seconds, but the embed stage runs much later
|
||||
/// (after ingestion, typically 60-90s) and will find Ollama ready by then.
|
||||
/// This avoids blocking the sync pipeline unnecessarily.
|
||||
pub fn ensure_ollama(base_url: &str) -> OllamaEnsureResult {
|
||||
let is_local = is_local_url(base_url);
|
||||
|
||||
@@ -202,8 +203,11 @@ pub fn ensure_ollama(base_url: &str) -> OllamaEnsureResult {
|
||||
};
|
||||
}
|
||||
|
||||
// Step 5: Wait for it to become reachable (up to ~10 seconds)
|
||||
for _ in 0..20 {
|
||||
// Step 5: Brief wait for hot restarts (5 seconds).
|
||||
// Cold starts take 30-60s but we don't block for that — ingestion runs
|
||||
// for 60-90s before the embed stage needs Ollama, giving it plenty of
|
||||
// time to boot in the background.
|
||||
for _ in 0..10 {
|
||||
std::thread::sleep(Duration::from_millis(500));
|
||||
if is_ollama_reachable(base_url) {
|
||||
return OllamaEnsureResult {
|
||||
@@ -217,14 +221,14 @@ pub fn ensure_ollama(base_url: &str) -> OllamaEnsureResult {
|
||||
}
|
||||
}
|
||||
|
||||
// Spawn succeeded but Ollama is still starting up — report as started
|
||||
// (not an error). It should be ready by the time the embed stage runs.
|
||||
OllamaEnsureResult {
|
||||
installed: true,
|
||||
was_running: false,
|
||||
started: false,
|
||||
started: true,
|
||||
running: false,
|
||||
error: Some(
|
||||
"Spawned 'ollama serve' but it did not become reachable within 10 seconds".to_string(),
|
||||
),
|
||||
error: None,
|
||||
install_hint: None,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user