The sync command's stage spinners now show real-time aggregate progress
for each pipeline phase instead of static "syncing..." messages.
- Add `progress_callback` parameter to `run_embed` and
`run_generate_docs` so callers can receive `(processed, total)` updates
- Add `stage_bar` parameter to `run_ingest` for aggregate progress
across concurrently-ingested projects using shared AtomicUsize counters
- Update `stage_spinner` to use `{prefix}` for the `[N/M]` label,
allowing `{msg}` to be updated independently with progress details
- Thread `ProgressBar` clones into each concurrent project task so
per-entity progress (fetch, discussions, events) is reflected on the
aggregate spinner
- Pass `None` for progress callbacks at standalone CLI entry points
(handle_ingest, handle_generate_docs, handle_embed) to preserve
existing behavior when commands are run outside of sync
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
100 lines
2.9 KiB
Rust
100 lines
2.9 KiB
Rust
//! Embed command: generate vector embeddings for documents via Ollama.
|
|
|
|
use console::style;
|
|
use serde::Serialize;
|
|
|
|
use crate::Config;
|
|
use crate::core::db::create_connection;
|
|
use crate::core::error::Result;
|
|
use crate::core::paths::get_db_path;
|
|
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
|
|
use crate::embedding::pipeline::embed_documents;
|
|
|
|
/// Result of the embed command.
|
|
#[derive(Debug, Default, Serialize)]
|
|
pub struct EmbedCommandResult {
|
|
pub embedded: usize,
|
|
pub failed: usize,
|
|
pub skipped: usize,
|
|
}
|
|
|
|
/// Run the embed command.
|
|
///
|
|
/// `progress_callback` reports `(processed, total)` as documents are embedded.
|
|
pub async fn run_embed(
|
|
config: &Config,
|
|
full: bool,
|
|
retry_failed: bool,
|
|
progress_callback: Option<Box<dyn Fn(usize, usize)>>,
|
|
) -> Result<EmbedCommandResult> {
|
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
|
let conn = create_connection(&db_path)?;
|
|
|
|
// Build Ollama config from user settings
|
|
let ollama_config = OllamaConfig {
|
|
base_url: config.embedding.base_url.clone(),
|
|
model: config.embedding.model.clone(),
|
|
..OllamaConfig::default()
|
|
};
|
|
let client = OllamaClient::new(ollama_config);
|
|
|
|
// Health check — fail fast if Ollama is down or model missing
|
|
client.health_check().await?;
|
|
|
|
if full {
|
|
// Clear ALL embeddings and metadata atomically for a complete re-embed.
|
|
// Wrapped in a transaction so a crash between the two DELETEs can't
|
|
// leave orphaned data.
|
|
conn.execute_batch(
|
|
"BEGIN;
|
|
DELETE FROM embedding_metadata;
|
|
DELETE FROM embeddings;
|
|
COMMIT;",
|
|
)?;
|
|
} else if retry_failed {
|
|
// Clear errors so they become pending again
|
|
conn.execute(
|
|
"UPDATE embedding_metadata SET last_error = NULL, attempt_count = 0
|
|
WHERE last_error IS NOT NULL",
|
|
[],
|
|
)?;
|
|
}
|
|
|
|
let model_name = &config.embedding.model;
|
|
let result = embed_documents(&conn, &client, model_name, progress_callback).await?;
|
|
|
|
Ok(EmbedCommandResult {
|
|
embedded: result.embedded,
|
|
failed: result.failed,
|
|
skipped: result.skipped,
|
|
})
|
|
}
|
|
|
|
/// Print human-readable output.
|
|
pub fn print_embed(result: &EmbedCommandResult) {
|
|
println!("{} Embedding complete", style("done").green().bold(),);
|
|
println!(" Embedded: {}", result.embedded);
|
|
if result.failed > 0 {
|
|
println!(" Failed: {}", style(result.failed).red());
|
|
}
|
|
if result.skipped > 0 {
|
|
println!(" Skipped: {}", result.skipped);
|
|
}
|
|
}
|
|
|
|
/// JSON output.
|
|
#[derive(Serialize)]
|
|
struct EmbedJsonOutput<'a> {
|
|
ok: bool,
|
|
data: &'a EmbedCommandResult,
|
|
}
|
|
|
|
/// Print JSON robot-mode output.
|
|
pub fn print_embed_json(result: &EmbedCommandResult) {
|
|
let output = EmbedJsonOutput {
|
|
ok: true,
|
|
data: result,
|
|
};
|
|
println!("{}", serde_json::to_string(&output).unwrap());
|
|
}
|