use crate::cli::render::Theme; use serde::Serialize; use crate::Config; use crate::cli::robot::RobotMeta; use crate::core::db::{LATEST_SCHEMA_VERSION, create_connection, get_schema_version}; use crate::core::error::{LoreError, Result}; use crate::core::paths::get_db_path; use crate::core::shutdown::ShutdownSignal; use crate::embedding::ollama::{OllamaClient, OllamaConfig}; use crate::embedding::pipeline::{DEFAULT_EMBED_CONCURRENCY, embed_documents}; #[derive(Debug, Default, Serialize)] pub struct EmbedCommandResult { pub docs_embedded: usize, pub chunks_embedded: usize, pub failed: usize, pub skipped: usize, } pub async fn run_embed( config: &Config, full: bool, retry_failed: bool, progress_callback: Option>, signal: &ShutdownSignal, ) -> Result { let db_path = get_db_path(config.storage.db_path.as_deref()); let conn = create_connection(&db_path)?; let schema_version = get_schema_version(&conn); if schema_version < LATEST_SCHEMA_VERSION { return Err(LoreError::MigrationFailed { version: schema_version, message: format!( "Database is at schema version {schema_version} but {LATEST_SCHEMA_VERSION} is required. \ Run 'lore migrate' first." ), source: None, }); } let ollama_config = OllamaConfig { base_url: config.embedding.base_url.clone(), model: config.embedding.model.clone(), ..OllamaConfig::default() }; let client = OllamaClient::new(ollama_config); client.health_check().await?; if full { conn.execute_batch( "BEGIN; DELETE FROM embedding_metadata; DELETE FROM embeddings; COMMIT;", )?; } else if retry_failed { // DELETE (not UPDATE) so the LEFT JOIN in find_pending_documents returns NULL, // making the doc appear pending again. UPDATE would leave a matching row that // still satisfies the config-param check, making the doc permanently invisible. conn.execute_batch( "BEGIN; DELETE FROM embeddings WHERE rowid / 1000 IN ( SELECT DISTINCT document_id FROM embedding_metadata WHERE last_error IS NOT NULL ); DELETE FROM embedding_metadata WHERE last_error IS NOT NULL; COMMIT;", )?; } let model_name = &config.embedding.model; let concurrency = if config.embedding.concurrency > 0 { config.embedding.concurrency as usize } else { DEFAULT_EMBED_CONCURRENCY }; let result = embed_documents( &conn, &client, model_name, concurrency, progress_callback, signal, ) .await?; Ok(EmbedCommandResult { docs_embedded: result.docs_embedded, chunks_embedded: result.chunks_embedded, failed: result.failed, skipped: result.skipped, }) } pub fn print_embed(result: &EmbedCommandResult) { if result.docs_embedded == 0 && result.failed == 0 && result.skipped == 0 { println!( "\n {} nothing to embed", Theme::success().bold().render("Embedding") ); return; } println!( "\n {} {} documents ({} chunks)", Theme::success().bold().render("Embedded"), Theme::bold().render(&result.docs_embedded.to_string()), result.chunks_embedded ); if result.failed > 0 { println!( " {}", Theme::error().render(&format!("{} failed", result.failed)) ); } if result.skipped > 0 { println!( " {}", Theme::dim().render(&format!("{} skipped", result.skipped)) ); } } #[derive(Serialize)] struct EmbedJsonOutput<'a> { ok: bool, data: &'a EmbedCommandResult, meta: RobotMeta, } pub fn print_embed_json(result: &EmbedCommandResult, elapsed_ms: u64) { let output = EmbedJsonOutput { ok: true, data: result, meta: RobotMeta { elapsed_ms }, }; match serde_json::to_string(&output) { Ok(json) => println!("{json}"), Err(e) => eprintln!("Error serializing to JSON: {e}"), } }