Files
gitlore/src/cli/commands/embed.rs
teernisse 097249f4e6 fix(robot): replace JSON serialization unwrap with graceful error handling
Replace serde_json::to_string(&output).unwrap() with match-based error
handling across all robot-mode JSON printers. On serialization failure,
the error is now written to stderr instead of panicking. This hardens
the CLI against unexpected Serialize failures in production.

Affected commands: count (2), embed, generate-docs, ingest (2), search,
stats, sync (2), sync-status, timeline.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 13:28:53 -05:00

145 lines
4.2 KiB
Rust

use crate::cli::render::Theme;
use serde::Serialize;
use crate::Config;
use crate::cli::robot::RobotMeta;
use crate::core::db::{LATEST_SCHEMA_VERSION, create_connection, get_schema_version};
use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::shutdown::ShutdownSignal;
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::embedding::pipeline::{DEFAULT_EMBED_CONCURRENCY, embed_documents};
#[derive(Debug, Default, Serialize)]
pub struct EmbedCommandResult {
pub docs_embedded: usize,
pub chunks_embedded: usize,
pub failed: usize,
pub skipped: usize,
}
pub async fn run_embed(
config: &Config,
full: bool,
retry_failed: bool,
progress_callback: Option<Box<dyn Fn(usize, usize)>>,
signal: &ShutdownSignal,
) -> Result<EmbedCommandResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let schema_version = get_schema_version(&conn);
if schema_version < LATEST_SCHEMA_VERSION {
return Err(LoreError::MigrationFailed {
version: schema_version,
message: format!(
"Database is at schema version {schema_version} but {LATEST_SCHEMA_VERSION} is required. \
Run 'lore migrate' first."
),
source: None,
});
}
let ollama_config = OllamaConfig {
base_url: config.embedding.base_url.clone(),
model: config.embedding.model.clone(),
..OllamaConfig::default()
};
let client = OllamaClient::new(ollama_config);
client.health_check().await?;
if full {
conn.execute_batch(
"BEGIN;
DELETE FROM embedding_metadata;
DELETE FROM embeddings;
COMMIT;",
)?;
} else if retry_failed {
// DELETE (not UPDATE) so the LEFT JOIN in find_pending_documents returns NULL,
// making the doc appear pending again. UPDATE would leave a matching row that
// still satisfies the config-param check, making the doc permanently invisible.
conn.execute_batch(
"BEGIN;
DELETE FROM embeddings WHERE rowid / 1000 IN (
SELECT DISTINCT document_id FROM embedding_metadata
WHERE last_error IS NOT NULL
);
DELETE FROM embedding_metadata WHERE last_error IS NOT NULL;
COMMIT;",
)?;
}
let model_name = &config.embedding.model;
let concurrency = if config.embedding.concurrency > 0 {
config.embedding.concurrency as usize
} else {
DEFAULT_EMBED_CONCURRENCY
};
let result = embed_documents(
&conn,
&client,
model_name,
concurrency,
progress_callback,
signal,
)
.await?;
Ok(EmbedCommandResult {
docs_embedded: result.docs_embedded,
chunks_embedded: result.chunks_embedded,
failed: result.failed,
skipped: result.skipped,
})
}
pub fn print_embed(result: &EmbedCommandResult) {
if result.docs_embedded == 0 && result.failed == 0 && result.skipped == 0 {
println!(
"\n {} nothing to embed",
Theme::success().bold().render("Embedding")
);
return;
}
println!(
"\n {} {} documents ({} chunks)",
Theme::success().bold().render("Embedded"),
Theme::bold().render(&result.docs_embedded.to_string()),
result.chunks_embedded
);
if result.failed > 0 {
println!(
" {}",
Theme::error().render(&format!("{} failed", result.failed))
);
}
if result.skipped > 0 {
println!(
" {}",
Theme::dim().render(&format!("{} skipped", result.skipped))
);
}
}
#[derive(Serialize)]
struct EmbedJsonOutput<'a> {
ok: bool,
data: &'a EmbedCommandResult,
meta: RobotMeta,
}
pub fn print_embed_json(result: &EmbedCommandResult, elapsed_ms: u64) {
let output = EmbedJsonOutput {
ok: true,
data: result,
meta: RobotMeta { elapsed_ms },
};
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}