feat(cli): Add search, stats, embed, sync, health, and robot-docs commands

Extends the CLI with six new commands that complete the search pipeline: - lore search <QUERY>: Hybrid search with mode selection (lexical, hybrid, semantic), rich filtering (--type, --author, --project, --label, --path, --after, --updated-after), result limits, and optional explain mode showing RRF score breakdowns. Safe FTS mode sanitizes user input; raw mode passes through for power users. - lore stats: Document and index statistics with optional --check for integrity verification and --repair to fix inconsistencies (orphaned documents, missing FTS entries, stale dirty queue items). - lore embed: Generate vector embeddings via Ollama. Supports --retry-failed to re-attempt previously failed embeddings. - lore generate-docs: Drain the dirty queue to regenerate documents. --full seeds all entities for complete rebuild. --project scopes to a single project. - lore sync: Full pipeline orchestration (ingest issues + MRs, generate-docs, embed) with --no-embed and --no-docs flags for partial runs. Reports per-stage results and total elapsed time. - lore health: Quick pre-flight check (config exists, DB exists, schema current). Returns exit code 1 if unhealthy. Designed for agent pre-flight scripts. - lore robot-docs: Machine-readable command manifest for agent self-discovery. Returns all commands, flags, examples, exit codes, and recommended workflows as structured JSON. Also enhances lore init with --gitlab-url, --token-env-var, and --projects flags for fully non-interactive robot-mode initialization. Fixes init's force/non-interactive precedence logic and adds JSON output for robot mode. Updates all command files for the GiError -> LoreError rename. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 15:47:10 -05:00
parent 559f0702ad
commit daf5a73019
13 changed files with 1930 additions and 95 deletions
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@@ -69,6 +69,18 @@ pub enum Commands {
        /// Fail if prompts would be shown
        #[arg(long)]
        non_interactive: bool,
+
+        /// GitLab base URL (required in robot mode)
+        #[arg(long)]
+        gitlab_url: Option<String>,
+
+        /// Environment variable name holding GitLab token (required in robot mode)
+        #[arg(long)]
+        token_env_var: Option<String>,
+
+        /// Comma-separated project paths (required in robot mode)
+        #[arg(long)]
+        projects: Option<String>,
    },

    /// Create timestamped database backup
@@ -81,9 +93,32 @@ pub enum Commands {
        yes: bool,
    },

+    /// Search indexed documents
+    Search(SearchArgs),
+
+    /// Show document and index statistics
+    Stats(StatsArgs),
+
+    /// Generate searchable documents from ingested data
+    #[command(name = "generate-docs")]
+    GenerateDocs(GenerateDocsArgs),
+
+    /// Generate vector embeddings for documents via Ollama
+    Embed(EmbedArgs),
+
+    /// Run full sync pipeline: ingest -> generate-docs -> embed
+    Sync(SyncArgs),
+
    /// Run pending database migrations
    Migrate,

+    /// Quick health check: config, database, schema version
+    Health,
+
+    /// Machine-readable command manifest for agent self-discovery
+    #[command(name = "robot-docs")]
+    RobotDocs,
+
    // --- Hidden backward-compat aliases ---
    /// List issues or MRs (deprecated: use 'lore issues' or 'lore mrs')
    #[command(hide = true)]
@@ -299,6 +334,109 @@ pub struct IngestArgs {
    pub full: bool,
 }

+/// Arguments for `lore stats`
+#[derive(Parser)]
+pub struct StatsArgs {
+    /// Run integrity checks
+    #[arg(long)]
+    pub check: bool,
+
+    /// Repair integrity issues (requires --check)
+    #[arg(long, requires = "check")]
+    pub repair: bool,
+}
+
+/// Arguments for `lore search <QUERY>`
+#[derive(Parser)]
+pub struct SearchArgs {
+    /// Search query string
+    pub query: String,
+
+    /// Search mode (lexical, hybrid, semantic)
+    #[arg(long, default_value = "hybrid")]
+    pub mode: String,
+
+    /// Filter by source type (issue, mr, discussion)
+    #[arg(long = "type", value_name = "TYPE")]
+    pub source_type: Option<String>,
+
+    /// Filter by author username
+    #[arg(long)]
+    pub author: Option<String>,
+
+    /// Filter by project path
+    #[arg(short = 'p', long)]
+    pub project: Option<String>,
+
+    /// Filter by label (repeatable, AND logic)
+    #[arg(long, action = clap::ArgAction::Append)]
+    pub label: Vec<String>,
+
+    /// Filter by file path (trailing / for prefix match)
+    #[arg(long)]
+    pub path: Option<String>,
+
+    /// Filter by created after (7d, 2w, or YYYY-MM-DD)
+    #[arg(long)]
+    pub after: Option<String>,
+
+    /// Filter by updated after (7d, 2w, or YYYY-MM-DD)
+    #[arg(long = "updated-after")]
+    pub updated_after: Option<String>,
+
+    /// Maximum results (default 20, max 100)
+    #[arg(short = 'n', long = "limit", default_value = "20")]
+    pub limit: usize,
+
+    /// Show ranking explanation per result
+    #[arg(long)]
+    pub explain: bool,
+
+    /// FTS query mode: safe (default) or raw
+    #[arg(long = "fts-mode", default_value = "safe")]
+    pub fts_mode: String,
+}
+
+/// Arguments for `lore generate-docs`
+#[derive(Parser)]
+pub struct GenerateDocsArgs {
+    /// Full rebuild: seed all entities into dirty queue, then drain
+    #[arg(long)]
+    pub full: bool,
+
+    /// Filter to single project
+    #[arg(short = 'p', long)]
+    pub project: Option<String>,
+}
+
+/// Arguments for `lore sync`
+#[derive(Parser)]
+pub struct SyncArgs {
+    /// Reset cursors, fetch everything
+    #[arg(long)]
+    pub full: bool,
+
+    /// Override stale lock
+    #[arg(long)]
+    pub force: bool,
+
+    /// Skip embedding step
+    #[arg(long)]
+    pub no_embed: bool,
+
+    /// Skip document regeneration
+    #[arg(long)]
+    pub no_docs: bool,
+}
+
+/// Arguments for `lore embed`
+#[derive(Parser)]
+pub struct EmbedArgs {
+    /// Retry previously failed embeddings
+    #[arg(long)]
+    pub retry_failed: bool,
+}
+
 /// Arguments for `lore count <ENTITY>`
 #[derive(Parser)]
 pub struct CountArgs {