feat(cli): Add search, stats, embed, sync, health, and robot-docs commands

Extends the CLI with six new commands that complete the search pipeline:

- lore search <QUERY>: Hybrid search with mode selection (lexical,
  hybrid, semantic), rich filtering (--type, --author, --project,
  --label, --path, --after, --updated-after), result limits, and
  optional explain mode showing RRF score breakdowns. Safe FTS mode
  sanitizes user input; raw mode passes through for power users.

- lore stats: Document and index statistics with optional --check
  for integrity verification and --repair to fix inconsistencies
  (orphaned documents, missing FTS entries, stale dirty queue items).

- lore embed: Generate vector embeddings via Ollama. Supports
  --retry-failed to re-attempt previously failed embeddings.

- lore generate-docs: Drain the dirty queue to regenerate documents.
  --full seeds all entities for complete rebuild. --project scopes
  to a single project.

- lore sync: Full pipeline orchestration (ingest issues + MRs,
  generate-docs, embed) with --no-embed and --no-docs flags for
  partial runs. Reports per-stage results and total elapsed time.

- lore health: Quick pre-flight check (config exists, DB exists,
  schema current). Returns exit code 1 if unhealthy. Designed for
  agent pre-flight scripts.

- lore robot-docs: Machine-readable command manifest for agent
  self-discovery. Returns all commands, flags, examples, exit codes,
  and recommended workflows as structured JSON.

Also enhances lore init with --gitlab-url, --token-env-var, and
--projects flags for fully non-interactive robot-mode initialization.
Fixes init's force/non-interactive precedence logic and adds JSON
output for robot mode.

Updates all command files for the GiError -> LoreError rename.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-01-30 15:47:10 -05:00
parent 559f0702ad
commit daf5a73019
13 changed files with 1930 additions and 95 deletions

View File

@@ -10,17 +10,23 @@ use tracing_subscriber::util::SubscriberInitExt;
use lore::Config;
use lore::cli::commands::{
InitInputs, InitOptions, ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser,
print_count, print_count_json, print_doctor_results, print_ingest_summary,
print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs,
print_list_mrs_json, print_show_issue, print_show_issue_json, print_show_mr,
InitInputs, InitOptions, InitResult, ListFilters, MrListFilters, SearchCliFilters, open_issue_in_browser,
open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_generate_docs,
print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues,
print_list_issues_json, print_list_mrs, print_list_mrs_json, print_search_results,
print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr, print_stats,
print_stats_json,
print_embed, print_embed_json, print_sync, print_sync_json,
print_show_mr_json, print_sync_status, print_sync_status_json, run_auth_test, run_count,
run_doctor, run_ingest, run_init, run_list_issues, run_list_mrs, run_show_issue, run_show_mr,
run_sync_status,
run_doctor, run_embed, run_generate_docs, run_ingest, run_init, run_list_issues, run_list_mrs,
run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, SyncOptions,
};
use lore::cli::{
Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
SearchArgs, StatsArgs, SyncArgs,
};
use lore::cli::{Cli, Commands, CountArgs, IngestArgs, IssuesArgs, MrsArgs};
use lore::core::db::{create_connection, get_schema_version, run_migrations};
use lore::core::error::{GiError, RobotErrorOutput};
use lore::core::error::{LoreError, RobotErrorOutput};
use lore::core::paths::get_config_path;
use lore::core::paths::get_db_path;
@@ -49,6 +55,10 @@ async fn main() {
let result = match cli.command {
Commands::Issues(args) => handle_issues(cli.config.as_deref(), args, robot_mode).await,
Commands::Mrs(args) => handle_mrs(cli.config.as_deref(), args, robot_mode).await,
Commands::Search(args) => handle_search(cli.config.as_deref(), args, robot_mode).await,
Commands::Stats(args) => handle_stats(cli.config.as_deref(), args, robot_mode).await,
Commands::Embed(args) => handle_embed(cli.config.as_deref(), args, robot_mode).await,
Commands::Sync(args) => handle_sync_cmd(cli.config.as_deref(), args, robot_mode).await,
Commands::Ingest(args) => handle_ingest(cli.config.as_deref(), args, robot_mode).await,
Commands::Count(args) => {
handle_count(cli.config.as_deref(), args, robot_mode).await
@@ -60,10 +70,29 @@ async fn main() {
Commands::Init {
force,
non_interactive,
} => handle_init(cli.config.as_deref(), force, non_interactive, robot_mode).await,
gitlab_url,
token_env_var,
projects,
} => {
handle_init(
cli.config.as_deref(),
force,
non_interactive,
robot_mode,
gitlab_url,
token_env_var,
projects,
)
.await
}
Commands::GenerateDocs(args) => {
handle_generate_docs(cli.config.as_deref(), args, robot_mode).await
}
Commands::Backup => handle_backup(robot_mode),
Commands::Reset { yes: _ } => handle_reset(robot_mode),
Commands::Migrate => handle_migrate(cli.config.as_deref(), robot_mode).await,
Commands::Health => handle_health(cli.config.as_deref(), robot_mode).await,
Commands::RobotDocs => handle_robot_docs(robot_mode),
// --- Backward-compat: deprecated aliases ---
Commands::List {
@@ -159,7 +188,7 @@ async fn main() {
}
}
/// Fallback error output for non-GiError errors in robot mode.
/// Fallback error output for non-LoreError errors in robot mode.
#[derive(Serialize)]
struct FallbackErrorOutput {
error: FallbackError,
@@ -172,8 +201,8 @@ struct FallbackError {
}
fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
// Try to downcast to GiError for structured output
if let Some(gi_error) = e.downcast_ref::<GiError>() {
// Try to downcast to LoreError for structured output
if let Some(gi_error) = e.downcast_ref::<LoreError>() {
if robot_mode {
let output = RobotErrorOutput::from(gi_error);
// Use serde_json for safe serialization; fallback constructs JSON safely
@@ -201,7 +230,7 @@ fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
}
}
// Fallback for non-GiError errors - use serde for proper JSON escaping
// Fallback for non-LoreError errors - use serde for proper JSON escaping
if robot_mode {
let output = FallbackErrorOutput {
error: FallbackError {
@@ -473,22 +502,123 @@ async fn handle_sync_status_cmd(
Ok(())
}
/// JSON output for init command.
#[derive(Serialize)]
struct InitOutput {
ok: bool,
data: InitOutputData,
}
#[derive(Serialize)]
struct InitOutputData {
config_path: String,
data_dir: String,
user: InitOutputUser,
projects: Vec<InitOutputProject>,
}
#[derive(Serialize)]
struct InitOutputUser {
username: String,
name: String,
}
#[derive(Serialize)]
struct InitOutputProject {
path: String,
name: String,
}
fn print_init_json(result: &InitResult) {
let output = InitOutput {
ok: true,
data: InitOutputData {
config_path: result.config_path.clone(),
data_dir: result.data_dir.clone(),
user: InitOutputUser {
username: result.user.username.clone(),
name: result.user.name.clone(),
},
projects: result
.projects
.iter()
.map(|p| InitOutputProject {
path: p.path.clone(),
name: p.name.clone(),
})
.collect(),
},
};
println!("{}", serde_json::to_string(&output).unwrap());
}
async fn handle_init(
config_override: Option<&str>,
force: bool,
non_interactive: bool,
_robot_mode: bool, // TODO: Add robot mode support for init (requires non-interactive implementation)
robot_mode: bool,
gitlab_url_flag: Option<String>,
token_env_var_flag: Option<String>,
projects_flag: Option<String>,
) -> Result<(), Box<dyn std::error::Error>> {
// Robot mode: require all inputs via flags, skip interactive prompts
if robot_mode {
let missing: Vec<&str> = [
gitlab_url_flag.is_none().then_some("--gitlab-url"),
token_env_var_flag.is_none().then_some("--token-env-var"),
projects_flag.is_none().then_some("--projects"),
]
.into_iter()
.flatten()
.collect();
if !missing.is_empty() {
let output = RobotErrorWithSuggestion {
error: RobotErrorSuggestionData {
code: "MISSING_FLAGS".to_string(),
message: format!("Robot mode requires flags: {}", missing.join(", ")),
suggestion: "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project".to_string(),
},
};
eprintln!("{}", serde_json::to_string(&output)?);
std::process::exit(2);
}
let project_paths: Vec<String> = projects_flag
.unwrap()
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect();
let result = run_init(
InitInputs {
gitlab_url: gitlab_url_flag.unwrap(),
token_env_var: token_env_var_flag.unwrap(),
project_paths,
},
InitOptions {
config_path: config_override.map(String::from),
force: true,
non_interactive: true,
},
)
.await?;
print_init_json(&result);
return Ok(());
}
// Human mode: interactive prompts
let config_path = get_config_path(config_override);
let mut confirmed_overwrite = force;
// Check if config exists and handle overwrite
if config_path.exists() {
if config_path.exists() && !force {
if non_interactive {
eprintln!(
"{}",
style(format!(
"Config file exists at {}. Cannot proceed in non-interactive mode.",
"Config file exists at {}. Use --force to overwrite.",
config_path.display()
))
.red()
@@ -496,59 +626,70 @@ async fn handle_init(
std::process::exit(2);
}
if !force {
let confirm = Confirm::new()
.with_prompt(format!(
"Config file exists at {}. Overwrite?",
config_path.display()
))
.default(false)
.interact()?;
let confirm = Confirm::new()
.with_prompt(format!(
"Config file exists at {}. Overwrite?",
config_path.display()
))
.default(false)
.interact()?;
if !confirm {
println!("{}", style("Cancelled.").yellow());
std::process::exit(2);
}
confirmed_overwrite = true;
if !confirm {
println!("{}", style("Cancelled.").yellow());
std::process::exit(2);
}
confirmed_overwrite = true;
}
// Prompt for GitLab URL
let gitlab_url: String = Input::new()
.with_prompt("GitLab URL")
.default("https://gitlab.com".to_string())
.validate_with(|input: &String| -> Result<(), &str> {
if url::Url::parse(input).is_ok() {
Ok(())
} else {
Err("Please enter a valid URL")
}
})
.interact_text()?;
let gitlab_url: String = if let Some(url) = gitlab_url_flag {
url
} else {
Input::new()
.with_prompt("GitLab URL")
.default("https://gitlab.com".to_string())
.validate_with(|input: &String| -> Result<(), &str> {
if url::Url::parse(input).is_ok() {
Ok(())
} else {
Err("Please enter a valid URL")
}
})
.interact_text()?
};
// Prompt for token env var
let token_env_var: String = Input::new()
.with_prompt("Token environment variable name")
.default("GITLAB_TOKEN".to_string())
.interact_text()?;
let token_env_var: String = if let Some(var) = token_env_var_flag {
var
} else {
Input::new()
.with_prompt("Token environment variable name")
.default("GITLAB_TOKEN".to_string())
.interact_text()?
};
// Prompt for project paths
let project_paths_input: String = Input::new()
.with_prompt("Project paths (comma-separated, e.g., group/project)")
.validate_with(|input: &String| -> Result<(), &str> {
if input.trim().is_empty() {
Err("Please enter at least one project path")
} else {
Ok(())
}
})
.interact_text()?;
let project_paths: Vec<String> = if let Some(projects) = projects_flag {
projects
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect()
} else {
let project_paths_input: String = Input::new()
.with_prompt("Project paths (comma-separated, e.g., group/project)")
.validate_with(|input: &String| -> Result<(), &str> {
if input.trim().is_empty() {
Err("Please enter at least one project path")
} else {
Ok(())
}
})
.interact_text()?;
let project_paths: Vec<String> = project_paths_input
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect();
project_paths_input
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect()
};
println!("{}", style("\nValidating configuration...").blue());
@@ -840,6 +981,385 @@ async fn handle_migrate(
Ok(())
}
async fn handle_stats(
config_override: Option<&str>,
args: StatsArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let result = run_stats(&config, args.check, args.repair)?;
if robot_mode {
print_stats_json(&result);
} else {
print_stats(&result);
}
Ok(())
}
async fn handle_search(
config_override: Option<&str>,
args: SearchArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let fts_mode = match args.fts_mode.as_str() {
"raw" => lore::search::FtsQueryMode::Raw,
_ => lore::search::FtsQueryMode::Safe,
};
let cli_filters = SearchCliFilters {
source_type: args.source_type,
author: args.author,
project: args.project,
labels: args.label,
path: args.path,
after: args.after,
updated_after: args.updated_after,
limit: args.limit,
};
let start = std::time::Instant::now();
let response = run_search(&config, &args.query, cli_filters, fts_mode, args.explain)?;
let elapsed_ms = start.elapsed().as_millis() as u64;
if robot_mode {
print_search_results_json(&response, elapsed_ms);
} else {
print_search_results(&response);
}
Ok(())
}
async fn handle_generate_docs(
config_override: Option<&str>,
args: GenerateDocsArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let result = run_generate_docs(&config, args.full, args.project.as_deref())?;
if robot_mode {
print_generate_docs_json(&result);
} else {
print_generate_docs(&result);
}
Ok(())
}
async fn handle_embed(
config_override: Option<&str>,
args: EmbedArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let result = run_embed(&config, args.retry_failed).await?;
if robot_mode {
print_embed_json(&result);
} else {
print_embed(&result);
}
Ok(())
}
async fn handle_sync_cmd(
config_override: Option<&str>,
args: SyncArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let options = SyncOptions {
full: args.full,
force: args.force,
no_embed: args.no_embed,
no_docs: args.no_docs,
};
let start = std::time::Instant::now();
let result = run_sync(&config, options).await?;
let elapsed = start.elapsed();
if robot_mode {
print_sync_json(&result, elapsed.as_millis() as u64);
} else {
print_sync(&result, elapsed);
}
Ok(())
}
// ============================================================================
// Health + Robot-docs handlers
// ============================================================================
/// JSON output for health command.
#[derive(Serialize)]
struct HealthOutput {
ok: bool,
data: HealthData,
}
#[derive(Serialize)]
struct HealthData {
healthy: bool,
config_found: bool,
db_found: bool,
schema_current: bool,
schema_version: i32,
}
async fn handle_health(
config_override: Option<&str>,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config_path = get_config_path(config_override);
let config_found = config_path.exists();
let (db_found, schema_version, schema_current) = if config_found {
match Config::load(config_override) {
Ok(config) => {
let db_path = get_db_path(config.storage.db_path.as_deref());
if db_path.exists() {
match create_connection(&db_path) {
Ok(conn) => {
let version = get_schema_version(&conn);
let latest = 9; // Number of embedded migrations
(true, version, version >= latest)
}
Err(_) => (true, 0, false),
}
} else {
(false, 0, false)
}
}
Err(_) => (false, 0, false),
}
} else {
(false, 0, false)
};
let healthy = config_found && db_found && schema_current;
if robot_mode {
let output = HealthOutput {
ok: true,
data: HealthData {
healthy,
config_found,
db_found,
schema_current,
schema_version,
},
};
println!("{}", serde_json::to_string(&output)?);
} else {
let status = |ok: bool| {
if ok {
style("pass").green()
} else {
style("FAIL").red()
}
};
println!("Config: {} ({})", status(config_found), config_path.display());
println!("DB: {}", status(db_found));
println!(
"Schema: {} (v{})",
status(schema_current),
schema_version
);
println!();
if healthy {
println!("{}", style("Healthy").green().bold());
} else {
println!("{}", style("Unhealthy - run 'lore doctor' for details").red().bold());
}
}
if !healthy {
std::process::exit(1);
}
Ok(())
}
/// JSON output for robot-docs command.
#[derive(Serialize)]
struct RobotDocsOutput {
ok: bool,
data: RobotDocsData,
}
#[derive(Serialize)]
struct RobotDocsData {
name: String,
version: String,
description: String,
activation: RobotDocsActivation,
commands: serde_json::Value,
exit_codes: serde_json::Value,
error_format: String,
workflows: serde_json::Value,
}
#[derive(Serialize)]
struct RobotDocsActivation {
flags: Vec<String>,
env: String,
auto: String,
}
fn handle_robot_docs(robot_mode: bool) -> Result<(), Box<dyn std::error::Error>> {
let version = env!("CARGO_PKG_VERSION").to_string();
let commands = serde_json::json!({
"init": {
"description": "Initialize configuration and database",
"flags": ["--force", "--non-interactive", "--gitlab-url <URL>", "--token-env-var <VAR>", "--projects <paths>"],
"robot_flags": ["--gitlab-url", "--token-env-var", "--projects"],
"example": "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project"
},
"health": {
"description": "Quick pre-flight check: config, database, schema version",
"flags": [],
"example": "lore --robot health"
},
"auth": {
"description": "Verify GitLab authentication",
"flags": [],
"example": "lore --robot auth"
},
"doctor": {
"description": "Full environment health check (config, auth, DB, Ollama)",
"flags": [],
"example": "lore --robot doctor"
},
"ingest": {
"description": "Sync data from GitLab",
"flags": ["--project <path>", "--force", "--full", "<entity: issues|mrs>"],
"example": "lore --robot ingest issues --project group/repo"
},
"sync": {
"description": "Full sync pipeline: ingest -> generate-docs -> embed",
"flags": ["--full", "--force", "--no-embed", "--no-docs"],
"example": "lore --robot sync"
},
"issues": {
"description": "List or show issues",
"flags": ["<IID>", "--limit", "--state", "--project", "--author", "--assignee", "--label", "--milestone", "--since", "--due-before", "--has-due", "--sort", "--asc"],
"example": "lore --robot issues --state opened --limit 10"
},
"mrs": {
"description": "List or show merge requests",
"flags": ["<IID>", "--limit", "--state", "--project", "--author", "--assignee", "--reviewer", "--label", "--since", "--draft", "--no-draft", "--target", "--source", "--sort", "--asc"],
"example": "lore --robot mrs --state opened"
},
"search": {
"description": "Search indexed documents (lexical, hybrid, semantic)",
"flags": ["<QUERY>", "--mode", "--type", "--author", "--project", "--label", "--path", "--after", "--updated-after", "--limit", "--explain", "--fts-mode"],
"example": "lore --robot search 'authentication bug' --mode hybrid --limit 10"
},
"count": {
"description": "Count entities in local database",
"flags": ["<entity: issues|mrs|discussions|notes>", "--for <issue|mr>"],
"example": "lore --robot count issues"
},
"stats": {
"description": "Show document and index statistics",
"flags": ["--check", "--repair"],
"example": "lore --robot stats"
},
"status": {
"description": "Show sync state (cursors, last sync times)",
"flags": [],
"example": "lore --robot status"
},
"generate-docs": {
"description": "Generate searchable documents from ingested data",
"flags": ["--full", "--project <path>"],
"example": "lore --robot generate-docs --full"
},
"embed": {
"description": "Generate vector embeddings for documents via Ollama",
"flags": ["--retry-failed"],
"example": "lore --robot embed"
},
"migrate": {
"description": "Run pending database migrations",
"flags": [],
"example": "lore --robot migrate"
},
"version": {
"description": "Show version information",
"flags": [],
"example": "lore --robot version"
},
"robot-docs": {
"description": "This command (agent self-discovery manifest)",
"flags": [],
"example": "lore robot-docs"
}
});
let exit_codes = serde_json::json!({
"0": "Success",
"1": "Internal error / health check failed",
"2": "Config not found / missing flags",
"3": "Config invalid",
"4": "Token not set",
"5": "GitLab auth failed",
"6": "Resource not found",
"7": "Rate limited",
"8": "Network error",
"9": "Database locked",
"10": "Database error",
"11": "Migration failed",
"12": "I/O error",
"13": "Transform error"
});
let workflows = serde_json::json!({
"first_setup": [
"lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project",
"lore --robot doctor",
"lore --robot sync"
],
"daily_sync": [
"lore --robot sync"
],
"search": [
"lore --robot search 'query' --mode hybrid"
],
"pre_flight": [
"lore --robot health"
]
});
let output = RobotDocsOutput {
ok: true,
data: RobotDocsData {
name: "lore".to_string(),
version,
description: "Local GitLab data management with semantic search".to_string(),
activation: RobotDocsActivation {
flags: vec!["--robot".to_string(), "-J".to_string(), "--json".to_string()],
env: "LORE_ROBOT=1".to_string(),
auto: "Non-TTY stdout".to_string(),
},
commands,
exit_codes,
error_format: "stderr JSON: {\"error\":{\"code\":\"...\",\"message\":\"...\",\"suggestion\":\"...\"}}".to_string(),
workflows,
},
};
if robot_mode {
println!("{}", serde_json::to_string(&output)?);
} else {
println!("{}", serde_json::to_string_pretty(&output)?);
}
Ok(())
}
// ============================================================================
// Backward-compat handlers (deprecated, delegate to new handlers)
// ============================================================================