gitlore/src/cli/commands/embed.rs

//! Embed command: generate vector embeddings for documents via Ollama.

use console::style;
use serde::Serialize;

use crate::Config;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::embedding::pipeline::embed_documents;

/// Result of the embed command.
#[derive(Debug, Default, Serialize)]
pub struct EmbedCommandResult {
    pub embedded: usize,
    pub failed: usize,
    pub skipped: usize,
}

/// Run the embed command.
///
/// `progress_callback` reports `(processed, total)` as documents are embedded.
pub async fn run_embed(
    config: &Config,
    full: bool,
    retry_failed: bool,
    progress_callback: Option<Box<dyn Fn(usize, usize)>>,
) -> Result<EmbedCommandResult> {
    let db_path = get_db_path(config.storage.db_path.as_deref());
    let conn = create_connection(&db_path)?;

    // Build Ollama config from user settings
    let ollama_config = OllamaConfig {
        base_url: config.embedding.base_url.clone(),
        model: config.embedding.model.clone(),
        ..OllamaConfig::default()
    };
    let client = OllamaClient::new(ollama_config);

    // Health check — fail fast if Ollama is down or model missing
    client.health_check().await?;

    if full {
        // Clear ALL embeddings and metadata atomically for a complete re-embed.
        // Wrapped in a transaction so a crash between the two DELETEs can't
        // leave orphaned data.
        conn.execute_batch(
            "BEGIN;
             DELETE FROM embedding_metadata;
             DELETE FROM embeddings;
             COMMIT;",
        )?;
    } else if retry_failed {
        // Clear errors so they become pending again
        conn.execute(
            "UPDATE embedding_metadata SET last_error = NULL, attempt_count = 0
             WHERE last_error IS NOT NULL",
            [],
        )?;
    }

    let model_name = &config.embedding.model;
    let result = embed_documents(&conn, &client, model_name, progress_callback).await?;

    Ok(EmbedCommandResult {
        embedded: result.embedded,
        failed: result.failed,
        skipped: result.skipped,
    })
}

/// Print human-readable output.
pub fn print_embed(result: &EmbedCommandResult) {
    println!("{} Embedding complete", style("done").green().bold(),);
    println!("  Embedded: {}", result.embedded);
    if result.failed > 0 {
        println!("  Failed:   {}", style(result.failed).red());
    }
    if result.skipped > 0 {
        println!("  Skipped:  {}", result.skipped);
    }
}

/// JSON output.
#[derive(Serialize)]
struct EmbedJsonOutput<'a> {
    ok: bool,
    data: &'a EmbedCommandResult,
}

/// Print JSON robot-mode output.
pub fn print_embed_json(result: &EmbedCommandResult) {
    let output = EmbedJsonOutput {
        ok: true,
        data: result,
    };
    println!("{}", serde_json::to_string(&output).unwrap());
}