feat(cli): Add search, stats, embed, sync, health, and robot-docs commands

Extends the CLI with six new commands that complete the search pipeline: - lore search <QUERY>: Hybrid search with mode selection (lexical, hybrid, semantic), rich filtering (--type, --author, --project, --label, --path, --after, --updated-after), result limits, and optional explain mode showing RRF score breakdowns. Safe FTS mode sanitizes user input; raw mode passes through for power users. - lore stats: Document and index statistics with optional --check for integrity verification and --repair to fix inconsistencies (orphaned documents, missing FTS entries, stale dirty queue items). - lore embed: Generate vector embeddings via Ollama. Supports --retry-failed to re-attempt previously failed embeddings. - lore generate-docs: Drain the dirty queue to regenerate documents. --full seeds all entities for complete rebuild. --project scopes to a single project. - lore sync: Full pipeline orchestration (ingest issues + MRs, generate-docs, embed) with --no-embed and --no-docs flags for partial runs. Reports per-stage results and total elapsed time. - lore health: Quick pre-flight check (config exists, DB exists, schema current). Returns exit code 1 if unhealthy. Designed for agent pre-flight scripts. - lore robot-docs: Machine-readable command manifest for agent self-discovery. Returns all commands, flags, examples, exit codes, and recommended workflows as structured JSON. Also enhances lore init with --gitlab-url, --token-env-var, and --projects flags for fully non-interactive robot-mode initialization. Fixes init's force/non-interactive precedence logic and adds JSON output for robot mode. Updates all command files for the GiError -> LoreError rename. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 15:47:10 -05:00
parent 559f0702ad
commit daf5a73019
13 changed files with 1930 additions and 95 deletions
--- a/src/cli/commands/sync.rs
+++ b/src/cli/commands/sync.rs
@@ -0,0 +1,124 @@
+//! Sync command: unified orchestrator for ingest -> generate-docs -> embed.
+
+use console::style;
+use serde::Serialize;
+use tracing::{info, warn};
+
+use crate::Config;
+use crate::core::error::Result;
+
+use super::embed::run_embed;
+use super::generate_docs::run_generate_docs;
+use super::ingest::run_ingest;
+
+/// Options for the sync command.
+#[derive(Debug, Default)]
+pub struct SyncOptions {
+    pub full: bool,
+    pub force: bool,
+    pub no_embed: bool,
+    pub no_docs: bool,
+}
+
+/// Result of the sync command.
+#[derive(Debug, Default, Serialize)]
+pub struct SyncResult {
+    pub issues_updated: usize,
+    pub mrs_updated: usize,
+    pub discussions_fetched: usize,
+    pub documents_regenerated: usize,
+    pub documents_embedded: usize,
+}
+
+/// Run the full sync pipeline: ingest -> generate-docs -> embed.
+pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResult> {
+    let mut result = SyncResult::default();
+
+    // Stage 1: Ingest issues
+    info!("Sync stage 1/4: ingesting issues");
+    let issues_result = run_ingest(config, "issues", None, options.force, options.full, true).await?;
+    result.issues_updated = issues_result.issues_upserted;
+    result.discussions_fetched += issues_result.discussions_fetched;
+
+    // Stage 2: Ingest MRs
+    info!("Sync stage 2/4: ingesting merge requests");
+    let mrs_result = run_ingest(config, "mrs", None, options.force, options.full, true).await?;
+    result.mrs_updated = mrs_result.mrs_upserted;
+    result.discussions_fetched += mrs_result.discussions_fetched;
+
+    // Stage 3: Generate documents (unless --no-docs)
+    if options.no_docs {
+        info!("Sync stage 3/4: skipping document generation (--no-docs)");
+    } else {
+        info!("Sync stage 3/4: generating documents");
+        let docs_result = run_generate_docs(config, false, None)?;
+        result.documents_regenerated = docs_result.regenerated;
+    }
+
+    // Stage 4: Embed documents (unless --no-embed)
+    if options.no_embed {
+        info!("Sync stage 4/4: skipping embedding (--no-embed)");
+    } else {
+        info!("Sync stage 4/4: embedding documents");
+        match run_embed(config, false).await {
+            Ok(embed_result) => {
+                result.documents_embedded = embed_result.embedded;
+            }
+            Err(e) => {
+                // Graceful degradation: Ollama down is a warning, not an error
+                warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
+            }
+        }
+    }
+
+    info!(
+        issues = result.issues_updated,
+        mrs = result.mrs_updated,
+        discussions = result.discussions_fetched,
+        docs = result.documents_regenerated,
+        embedded = result.documents_embedded,
+        "Sync pipeline complete"
+    );
+
+    Ok(result)
+}
+
+/// Print human-readable sync summary.
+pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
+    println!(
+        "{} Sync complete:",
+        style("done").green().bold(),
+    );
+    println!("  Issues updated:           {}", result.issues_updated);
+    println!("  MRs updated:              {}", result.mrs_updated);
+    println!("  Discussions fetched:       {}", result.discussions_fetched);
+    println!("  Documents regenerated:     {}", result.documents_regenerated);
+    println!("  Documents embedded:        {}", result.documents_embedded);
+    println!(
+        "  Elapsed: {:.1}s",
+        elapsed.as_secs_f64()
+    );
+}
+
+/// JSON output for sync.
+#[derive(Serialize)]
+struct SyncJsonOutput<'a> {
+    ok: bool,
+    data: &'a SyncResult,
+    meta: SyncMeta,
+}
+
+#[derive(Serialize)]
+struct SyncMeta {
+    elapsed_ms: u64,
+}
+
+/// Print JSON robot-mode sync output.
+pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64) {
+    let output = SyncJsonOutput {
+        ok: true,
+        data: result,
+        meta: SyncMeta { elapsed_ms },
+    };
+    println!("{}", serde_json::to_string(&output).unwrap());
+}