feat(cli): Add search, stats, embed, sync, health, and robot-docs commands

Extends the CLI with six new commands that complete the search pipeline:

- lore search <QUERY>: Hybrid search with mode selection (lexical,
  hybrid, semantic), rich filtering (--type, --author, --project,
  --label, --path, --after, --updated-after), result limits, and
  optional explain mode showing RRF score breakdowns. Safe FTS mode
  sanitizes user input; raw mode passes through for power users.

- lore stats: Document and index statistics with optional --check
  for integrity verification and --repair to fix inconsistencies
  (orphaned documents, missing FTS entries, stale dirty queue items).

- lore embed: Generate vector embeddings via Ollama. Supports
  --retry-failed to re-attempt previously failed embeddings.

- lore generate-docs: Drain the dirty queue to regenerate documents.
  --full seeds all entities for complete rebuild. --project scopes
  to a single project.

- lore sync: Full pipeline orchestration (ingest issues + MRs,
  generate-docs, embed) with --no-embed and --no-docs flags for
  partial runs. Reports per-stage results and total elapsed time.

- lore health: Quick pre-flight check (config exists, DB exists,
  schema current). Returns exit code 1 if unhealthy. Designed for
  agent pre-flight scripts.

- lore robot-docs: Machine-readable command manifest for agent
  self-discovery. Returns all commands, flags, examples, exit codes,
  and recommended workflows as structured JSON.

Also enhances lore init with --gitlab-url, --token-env-var, and
--projects flags for fully non-interactive robot-mode initialization.
Fixes init's force/non-interactive precedence logic and adds JSON
output for robot mode.

Updates all command files for the GiError -> LoreError rename.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-01-30 15:47:10 -05:00
parent 559f0702ad
commit daf5a73019
13 changed files with 1930 additions and 95 deletions

View File

@@ -69,6 +69,18 @@ pub enum Commands {
/// Fail if prompts would be shown
#[arg(long)]
non_interactive: bool,
/// GitLab base URL (required in robot mode)
#[arg(long)]
gitlab_url: Option<String>,
/// Environment variable name holding GitLab token (required in robot mode)
#[arg(long)]
token_env_var: Option<String>,
/// Comma-separated project paths (required in robot mode)
#[arg(long)]
projects: Option<String>,
},
/// Create timestamped database backup
@@ -81,9 +93,32 @@ pub enum Commands {
yes: bool,
},
/// Search indexed documents
Search(SearchArgs),
/// Show document and index statistics
Stats(StatsArgs),
/// Generate searchable documents from ingested data
#[command(name = "generate-docs")]
GenerateDocs(GenerateDocsArgs),
/// Generate vector embeddings for documents via Ollama
Embed(EmbedArgs),
/// Run full sync pipeline: ingest -> generate-docs -> embed
Sync(SyncArgs),
/// Run pending database migrations
Migrate,
/// Quick health check: config, database, schema version
Health,
/// Machine-readable command manifest for agent self-discovery
#[command(name = "robot-docs")]
RobotDocs,
// --- Hidden backward-compat aliases ---
/// List issues or MRs (deprecated: use 'lore issues' or 'lore mrs')
#[command(hide = true)]
@@ -299,6 +334,109 @@ pub struct IngestArgs {
pub full: bool,
}
/// Arguments for `lore stats`
#[derive(Parser)]
pub struct StatsArgs {
/// Run integrity checks
#[arg(long)]
pub check: bool,
/// Repair integrity issues (requires --check)
#[arg(long, requires = "check")]
pub repair: bool,
}
/// Arguments for `lore search <QUERY>`
#[derive(Parser)]
pub struct SearchArgs {
/// Search query string
pub query: String,
/// Search mode (lexical, hybrid, semantic)
#[arg(long, default_value = "hybrid")]
pub mode: String,
/// Filter by source type (issue, mr, discussion)
#[arg(long = "type", value_name = "TYPE")]
pub source_type: Option<String>,
/// Filter by author username
#[arg(long)]
pub author: Option<String>,
/// Filter by project path
#[arg(short = 'p', long)]
pub project: Option<String>,
/// Filter by label (repeatable, AND logic)
#[arg(long, action = clap::ArgAction::Append)]
pub label: Vec<String>,
/// Filter by file path (trailing / for prefix match)
#[arg(long)]
pub path: Option<String>,
/// Filter by created after (7d, 2w, or YYYY-MM-DD)
#[arg(long)]
pub after: Option<String>,
/// Filter by updated after (7d, 2w, or YYYY-MM-DD)
#[arg(long = "updated-after")]
pub updated_after: Option<String>,
/// Maximum results (default 20, max 100)
#[arg(short = 'n', long = "limit", default_value = "20")]
pub limit: usize,
/// Show ranking explanation per result
#[arg(long)]
pub explain: bool,
/// FTS query mode: safe (default) or raw
#[arg(long = "fts-mode", default_value = "safe")]
pub fts_mode: String,
}
/// Arguments for `lore generate-docs`
#[derive(Parser)]
pub struct GenerateDocsArgs {
/// Full rebuild: seed all entities into dirty queue, then drain
#[arg(long)]
pub full: bool,
/// Filter to single project
#[arg(short = 'p', long)]
pub project: Option<String>,
}
/// Arguments for `lore sync`
#[derive(Parser)]
pub struct SyncArgs {
/// Reset cursors, fetch everything
#[arg(long)]
pub full: bool,
/// Override stale lock
#[arg(long)]
pub force: bool,
/// Skip embedding step
#[arg(long)]
pub no_embed: bool,
/// Skip document regeneration
#[arg(long)]
pub no_docs: bool,
}
/// Arguments for `lore embed`
#[derive(Parser)]
pub struct EmbedArgs {
/// Retry previously failed embeddings
#[arg(long)]
pub retry_failed: bool,
}
/// Arguments for `lore count <ENTITY>`
#[derive(Parser)]
pub struct CountArgs {