Replace serde_json::to_string(&output).unwrap() with match-based error handling across all robot-mode JSON printers. On serialization failure, the error is now written to stderr instead of panicking. This hardens the CLI against unexpected Serialize failures in production. Affected commands: count (2), embed, generate-docs, ingest (2), search, stats, sync (2), sync-status, timeline. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
145 lines
4.2 KiB
Rust
145 lines
4.2 KiB
Rust
use crate::cli::render::Theme;
|
|
use serde::Serialize;
|
|
|
|
use crate::Config;
|
|
use crate::cli::robot::RobotMeta;
|
|
use crate::core::db::{LATEST_SCHEMA_VERSION, create_connection, get_schema_version};
|
|
use crate::core::error::{LoreError, Result};
|
|
use crate::core::paths::get_db_path;
|
|
use crate::core::shutdown::ShutdownSignal;
|
|
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
|
|
use crate::embedding::pipeline::{DEFAULT_EMBED_CONCURRENCY, embed_documents};
|
|
|
|
#[derive(Debug, Default, Serialize)]
|
|
pub struct EmbedCommandResult {
|
|
pub docs_embedded: usize,
|
|
pub chunks_embedded: usize,
|
|
pub failed: usize,
|
|
pub skipped: usize,
|
|
}
|
|
|
|
pub async fn run_embed(
|
|
config: &Config,
|
|
full: bool,
|
|
retry_failed: bool,
|
|
progress_callback: Option<Box<dyn Fn(usize, usize)>>,
|
|
signal: &ShutdownSignal,
|
|
) -> Result<EmbedCommandResult> {
|
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
|
let conn = create_connection(&db_path)?;
|
|
|
|
let schema_version = get_schema_version(&conn);
|
|
if schema_version < LATEST_SCHEMA_VERSION {
|
|
return Err(LoreError::MigrationFailed {
|
|
version: schema_version,
|
|
message: format!(
|
|
"Database is at schema version {schema_version} but {LATEST_SCHEMA_VERSION} is required. \
|
|
Run 'lore migrate' first."
|
|
),
|
|
source: None,
|
|
});
|
|
}
|
|
|
|
let ollama_config = OllamaConfig {
|
|
base_url: config.embedding.base_url.clone(),
|
|
model: config.embedding.model.clone(),
|
|
..OllamaConfig::default()
|
|
};
|
|
let client = OllamaClient::new(ollama_config);
|
|
|
|
client.health_check().await?;
|
|
|
|
if full {
|
|
conn.execute_batch(
|
|
"BEGIN;
|
|
DELETE FROM embedding_metadata;
|
|
DELETE FROM embeddings;
|
|
COMMIT;",
|
|
)?;
|
|
} else if retry_failed {
|
|
// DELETE (not UPDATE) so the LEFT JOIN in find_pending_documents returns NULL,
|
|
// making the doc appear pending again. UPDATE would leave a matching row that
|
|
// still satisfies the config-param check, making the doc permanently invisible.
|
|
conn.execute_batch(
|
|
"BEGIN;
|
|
DELETE FROM embeddings WHERE rowid / 1000 IN (
|
|
SELECT DISTINCT document_id FROM embedding_metadata
|
|
WHERE last_error IS NOT NULL
|
|
);
|
|
DELETE FROM embedding_metadata WHERE last_error IS NOT NULL;
|
|
COMMIT;",
|
|
)?;
|
|
}
|
|
|
|
let model_name = &config.embedding.model;
|
|
let concurrency = if config.embedding.concurrency > 0 {
|
|
config.embedding.concurrency as usize
|
|
} else {
|
|
DEFAULT_EMBED_CONCURRENCY
|
|
};
|
|
let result = embed_documents(
|
|
&conn,
|
|
&client,
|
|
model_name,
|
|
concurrency,
|
|
progress_callback,
|
|
signal,
|
|
)
|
|
.await?;
|
|
|
|
Ok(EmbedCommandResult {
|
|
docs_embedded: result.docs_embedded,
|
|
chunks_embedded: result.chunks_embedded,
|
|
failed: result.failed,
|
|
skipped: result.skipped,
|
|
})
|
|
}
|
|
|
|
pub fn print_embed(result: &EmbedCommandResult) {
|
|
if result.docs_embedded == 0 && result.failed == 0 && result.skipped == 0 {
|
|
println!(
|
|
"\n {} nothing to embed",
|
|
Theme::success().bold().render("Embedding")
|
|
);
|
|
return;
|
|
}
|
|
|
|
println!(
|
|
"\n {} {} documents ({} chunks)",
|
|
Theme::success().bold().render("Embedded"),
|
|
Theme::bold().render(&result.docs_embedded.to_string()),
|
|
result.chunks_embedded
|
|
);
|
|
if result.failed > 0 {
|
|
println!(
|
|
" {}",
|
|
Theme::error().render(&format!("{} failed", result.failed))
|
|
);
|
|
}
|
|
if result.skipped > 0 {
|
|
println!(
|
|
" {}",
|
|
Theme::dim().render(&format!("{} skipped", result.skipped))
|
|
);
|
|
}
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct EmbedJsonOutput<'a> {
|
|
ok: bool,
|
|
data: &'a EmbedCommandResult,
|
|
meta: RobotMeta,
|
|
}
|
|
|
|
pub fn print_embed_json(result: &EmbedCommandResult, elapsed_ms: u64) {
|
|
let output = EmbedJsonOutput {
|
|
ok: true,
|
|
data: result,
|
|
meta: RobotMeta { elapsed_ms },
|
|
};
|
|
match serde_json::to_string(&output) {
|
|
Ok(json) => println!("{json}"),
|
|
Err(e) => eprintln!("Error serializing to JSON: {e}"),
|
|
}
|
|
}
|