feat(cli): Add search, stats, embed, sync, health, and robot-docs commands

Extends the CLI with six new commands that complete the search pipeline:

- lore search <QUERY>: Hybrid search with mode selection (lexical,
  hybrid, semantic), rich filtering (--type, --author, --project,
  --label, --path, --after, --updated-after), result limits, and
  optional explain mode showing RRF score breakdowns. Safe FTS mode
  sanitizes user input; raw mode passes through for power users.

- lore stats: Document and index statistics with optional --check
  for integrity verification and --repair to fix inconsistencies
  (orphaned documents, missing FTS entries, stale dirty queue items).

- lore embed: Generate vector embeddings via Ollama. Supports
  --retry-failed to re-attempt previously failed embeddings.

- lore generate-docs: Drain the dirty queue to regenerate documents.
  --full seeds all entities for complete rebuild. --project scopes
  to a single project.

- lore sync: Full pipeline orchestration (ingest issues + MRs,
  generate-docs, embed) with --no-embed and --no-docs flags for
  partial runs. Reports per-stage results and total elapsed time.

- lore health: Quick pre-flight check (config exists, DB exists,
  schema current). Returns exit code 1 if unhealthy. Designed for
  agent pre-flight scripts.

- lore robot-docs: Machine-readable command manifest for agent
  self-discovery. Returns all commands, flags, examples, exit codes,
  and recommended workflows as structured JSON.

Also enhances lore init with --gitlab-url, --token-env-var, and
--projects flags for fully non-interactive robot-mode initialization.
Fixes init's force/non-interactive precedence logic and adds JSON
output for robot mode.

Updates all command files for the GiError -> LoreError rename.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-01-30 15:47:10 -05:00
parent 559f0702ad
commit daf5a73019
13 changed files with 1930 additions and 95 deletions

View File

@@ -1,7 +1,7 @@
//! Auth test command - verify GitLab authentication.
use crate::core::config::Config;
use crate::core::error::{GiError, Result};
use crate::core::error::{LoreError, Result};
use crate::gitlab::GitLabClient;
/// Result of successful auth test.
@@ -19,12 +19,12 @@ pub async fn run_auth_test(config_path: Option<&str>) -> Result<AuthTestResult>
// 2. Get token from environment
let token = std::env::var(&config.gitlab.token_env_var)
.map(|t| t.trim().to_string())
.map_err(|_| GiError::TokenNotSet {
.map_err(|_| LoreError::TokenNotSet {
env_var: config.gitlab.token_env_var.clone(),
})?;
if token.is_empty() {
return Err(GiError::TokenNotSet {
return Err(LoreError::TokenNotSet {
env_var: config.gitlab.token_env_var.clone(),
});
}

View File

@@ -5,7 +5,7 @@ use serde::Serialize;
use crate::core::config::Config;
use crate::core::db::{create_connection, get_schema_version, verify_pragmas};
use crate::core::error::GiError;
use crate::core::error::LoreError;
use crate::core::paths::{get_config_path, get_db_path};
use crate::gitlab::GitLabClient;
@@ -137,7 +137,7 @@ fn check_config(config_path: &str) -> (ConfigCheck, Option<Config>) {
},
Some(config),
),
Err(GiError::ConfigNotFound { path }) => (
Err(LoreError::ConfigNotFound { path }) => (
ConfigCheck {
result: CheckResult {
status: CheckStatus::Error,
@@ -264,7 +264,7 @@ async fn check_gitlab(config: Option<&Config>) -> GitLabCheck {
url: Some(config.gitlab.base_url.clone()),
username: Some(user.username),
},
Err(GiError::GitLabAuthFailed) => GitLabCheck {
Err(LoreError::GitLabAuthFailed) => GitLabCheck {
result: CheckResult {
status: CheckStatus::Error,
message: Some("Authentication failed. Check your token.".to_string()),

88
src/cli/commands/embed.rs Normal file
View File

@@ -0,0 +1,88 @@
//! Embed command: generate vector embeddings for documents via Ollama.
use console::style;
use serde::Serialize;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::embedding::pipeline::embed_documents;
use crate::Config;
/// Result of the embed command.
#[derive(Debug, Default, Serialize)]
pub struct EmbedCommandResult {
pub embedded: usize,
pub failed: usize,
pub skipped: usize,
}
/// Run the embed command.
pub async fn run_embed(
config: &Config,
retry_failed: bool,
) -> Result<EmbedCommandResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
// Build Ollama config from user settings
let ollama_config = OllamaConfig {
base_url: config.embedding.base_url.clone(),
model: config.embedding.model.clone(),
..OllamaConfig::default()
};
let client = OllamaClient::new(ollama_config);
// Health check — fail fast if Ollama is down or model missing
client.health_check().await?;
// If retry_failed, clear errors so they become pending again
if retry_failed {
conn.execute(
"UPDATE embedding_metadata SET last_error = NULL, attempt_count = 0
WHERE last_error IS NOT NULL",
[],
)?;
}
let model_name = &config.embedding.model;
let result = embed_documents(&conn, &client, model_name, None).await?;
Ok(EmbedCommandResult {
embedded: result.embedded,
failed: result.failed,
skipped: result.skipped,
})
}
/// Print human-readable output.
pub fn print_embed(result: &EmbedCommandResult) {
println!(
"{} Embedding complete",
style("done").green().bold(),
);
println!(" Embedded: {}", result.embedded);
if result.failed > 0 {
println!(" Failed: {}", style(result.failed).red());
}
if result.skipped > 0 {
println!(" Skipped: {}", result.skipped);
}
}
/// JSON output.
#[derive(Serialize)]
struct EmbedJsonOutput<'a> {
ok: bool,
data: &'a EmbedCommandResult,
}
/// Print JSON robot-mode output.
pub fn print_embed_json(result: &EmbedCommandResult) {
let output = EmbedJsonOutput {
ok: true,
data: result,
};
println!("{}", serde_json::to_string(&output).unwrap());
}

View File

@@ -0,0 +1,205 @@
//! Generate searchable documents from ingested GitLab data.
use console::style;
use rusqlite::Connection;
use serde::Serialize;
use tracing::info;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
use crate::documents::{regenerate_dirty_documents, SourceType};
use crate::Config;
const FULL_MODE_CHUNK_SIZE: i64 = 2000;
/// Result of a generate-docs run.
#[derive(Debug, Default)]
pub struct GenerateDocsResult {
pub regenerated: usize,
pub unchanged: usize,
pub errored: usize,
pub seeded: usize,
pub full_mode: bool,
}
/// Run the generate-docs pipeline.
///
/// Default mode: process only existing dirty_sources entries.
/// Full mode: seed dirty_sources with ALL entities, then drain.
pub fn run_generate_docs(
config: &Config,
full: bool,
project_filter: Option<&str>,
) -> Result<GenerateDocsResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let mut result = GenerateDocsResult {
full_mode: full,
..Default::default()
};
if full {
result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
}
let regen = regenerate_dirty_documents(&conn)?;
result.regenerated = regen.regenerated;
result.unchanged = regen.unchanged;
result.errored = regen.errored;
if full {
// Optimize FTS index after bulk rebuild
let _ = conn.execute(
"INSERT INTO documents_fts(documents_fts) VALUES('optimize')",
[],
);
info!("FTS index optimized after full rebuild");
}
Ok(result)
}
/// Seed dirty_sources with all entities of the given type using keyset pagination.
fn seed_dirty(
conn: &Connection,
source_type: SourceType,
project_filter: Option<&str>,
) -> Result<usize> {
let table = match source_type {
SourceType::Issue => "issues",
SourceType::MergeRequest => "merge_requests",
SourceType::Discussion => "discussions",
};
let type_str = source_type.as_str();
let now = chrono::Utc::now().timestamp_millis();
let mut total_seeded: usize = 0;
let mut last_id: i64 = 0;
loop {
let inserted = if let Some(project) = project_filter {
// Resolve project to ID for filtering
let project_id: Option<i64> = conn
.query_row(
"SELECT id FROM projects WHERE path_with_namespace = ?1 COLLATE NOCASE",
[project],
|row| row.get(0),
)
.ok();
let Some(pid) = project_id else {
break;
};
conn.execute(
&format!(
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
SELECT ?1, id, ?2, 0, NULL, NULL, NULL
FROM {table} WHERE id > ?3 AND project_id = ?4 ORDER BY id LIMIT ?5
ON CONFLICT(source_type, source_id) DO NOTHING"
),
rusqlite::params![type_str, now, last_id, pid, FULL_MODE_CHUNK_SIZE],
)?
} else {
conn.execute(
&format!(
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
SELECT ?1, id, ?2, 0, NULL, NULL, NULL
FROM {table} WHERE id > ?3 ORDER BY id LIMIT ?4
ON CONFLICT(source_type, source_id) DO NOTHING"
),
rusqlite::params![type_str, now, last_id, FULL_MODE_CHUNK_SIZE],
)?
};
if inserted == 0 {
break;
}
// Advance keyset cursor to the max id within the chunk window
let max_id: i64 = conn.query_row(
&format!(
"SELECT MAX(id) FROM (SELECT id FROM {table} WHERE id > ?1 ORDER BY id LIMIT ?2)",
table = table
),
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|row| row.get(0),
)?;
total_seeded += inserted;
last_id = max_id;
}
info!(
source_type = type_str,
seeded = total_seeded,
"Seeded dirty_sources"
);
Ok(total_seeded)
}
/// Print human-readable output.
pub fn print_generate_docs(result: &GenerateDocsResult) {
let mode = if result.full_mode { "full" } else { "incremental" };
println!(
"{} Document generation complete ({})",
style("done").green().bold(),
mode
);
if result.full_mode {
println!(" Seeded: {}", result.seeded);
}
println!(" Regenerated: {}", result.regenerated);
println!(" Unchanged: {}", result.unchanged);
if result.errored > 0 {
println!(
" Errored: {}",
style(result.errored).red()
);
}
}
/// JSON output structures.
#[derive(Serialize)]
struct GenerateDocsJsonOutput {
ok: bool,
data: GenerateDocsJsonData,
}
#[derive(Serialize)]
struct GenerateDocsJsonData {
mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
seeded: Option<usize>,
regenerated: usize,
unchanged: usize,
errored: usize,
}
/// Print JSON robot-mode output.
pub fn print_generate_docs_json(result: &GenerateDocsResult) {
let output = GenerateDocsJsonOutput {
ok: true,
data: GenerateDocsJsonData {
mode: if result.full_mode {
"full".to_string()
} else {
"incremental".to_string()
},
seeded: if result.full_mode {
Some(result.seeded)
} else {
None
},
regenerated: result.regenerated,
unchanged: result.unchanged,
errored: result.errored,
},
};
println!("{}", serde_json::to_string(&output).unwrap());
}

View File

@@ -7,7 +7,7 @@ use serde::Serialize;
use crate::Config;
use crate::core::db::create_connection;
use crate::core::error::{GiError, Result};
use crate::core::error::{LoreError, Result};
use crate::core::lock::{AppLock, LockOptions};
use crate::core::paths::get_db_path;
use crate::gitlab::GitLabClient;
@@ -51,7 +51,7 @@ pub async fn run_ingest(
) -> Result<IngestResult> {
// Validate resource type early
if resource_type != "issues" && resource_type != "mrs" {
return Err(GiError::Other(format!(
return Err(LoreError::Other(format!(
"Invalid resource type '{}'. Valid types: issues, mrs",
resource_type
)));
@@ -74,7 +74,7 @@ pub async fn run_ingest(
lock.acquire(force)?;
// Get token from environment
let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| GiError::TokenNotSet {
let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet {
env_var: config.gitlab.token_env_var.clone(),
})?;
@@ -119,12 +119,12 @@ pub async fn run_ingest(
if projects.is_empty() {
if let Some(filter) = project_filter {
return Err(GiError::Other(format!(
return Err(LoreError::Other(format!(
"Project '{}' not found in configuration",
filter
)));
}
return Err(GiError::Other(
return Err(LoreError::Other(
"No projects configured. Run 'lore init' first.".to_string(),
));
}

View File

@@ -4,7 +4,7 @@ use std::fs;
use crate::core::config::{MinimalConfig, MinimalGitLabConfig, ProjectConfig};
use crate::core::db::{create_connection, run_migrations};
use crate::core::error::{GiError, Result};
use crate::core::error::{LoreError, Result};
use crate::core::paths::{get_config_path, get_data_dir};
use crate::gitlab::{GitLabClient, GitLabProject};
@@ -45,32 +45,30 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
let config_path = get_config_path(options.config_path.as_deref());
let data_dir = get_data_dir();
// 1. Check if config exists
if config_path.exists() {
// 1. Check if config exists (force takes precedence over non_interactive)
if config_path.exists() && !options.force {
if options.non_interactive {
return Err(GiError::Other(format!(
"Config file exists at {}. Cannot proceed in non-interactive mode.",
return Err(LoreError::Other(format!(
"Config file exists at {}. Use --force to overwrite.",
config_path.display()
)));
}
if !options.force {
return Err(GiError::Other(
"User cancelled config overwrite.".to_string(),
));
}
return Err(LoreError::Other(
"User cancelled config overwrite.".to_string(),
));
}
// 2. Validate GitLab URL format
if url::Url::parse(&inputs.gitlab_url).is_err() {
return Err(GiError::Other(format!(
return Err(LoreError::Other(format!(
"Invalid GitLab URL: {}",
inputs.gitlab_url
)));
}
// 3. Check token is set in environment
let token = std::env::var(&inputs.token_env_var).map_err(|_| GiError::TokenNotSet {
let token = std::env::var(&inputs.token_env_var).map_err(|_| LoreError::TokenNotSet {
env_var: inputs.token_env_var.clone(),
})?;
@@ -78,8 +76,8 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
let client = GitLabClient::new(&inputs.gitlab_url, &token, None);
let gitlab_user = client.get_current_user().await.map_err(|e| {
if matches!(e, GiError::GitLabAuthFailed) {
GiError::Other(format!("Authentication failed for {}", inputs.gitlab_url))
if matches!(e, LoreError::GitLabAuthFailed) {
LoreError::Other(format!("Authentication failed for {}", inputs.gitlab_url))
} else {
e
}
@@ -95,8 +93,8 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
for project_path in &inputs.project_paths {
let project = client.get_project(project_path).await.map_err(|e| {
if matches!(e, GiError::GitLabNotFound { .. }) {
GiError::Other(format!("Project not found: {project_path}"))
if matches!(e, LoreError::GitLabNotFound { .. }) {
LoreError::Other(format!("Project not found: {project_path}"))
} else {
e
}

View File

@@ -3,21 +3,33 @@
pub mod auth_test;
pub mod count;
pub mod doctor;
pub mod embed;
pub mod generate_docs;
pub mod ingest;
pub mod init;
pub mod list;
pub mod search;
pub mod show;
pub mod stats;
pub mod sync;
pub mod sync_status;
pub use auth_test::run_auth_test;
pub use count::{print_count, print_count_json, run_count};
pub use doctor::{print_doctor_results, run_doctor};
pub use embed::{print_embed, print_embed_json, run_embed};
pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs};
pub use stats::{print_stats, print_stats_json, run_stats};
pub use search::{
print_search_results, print_search_results_json, run_search, SearchCliFilters, SearchResponse,
};
pub use ingest::{print_ingest_summary, print_ingest_summary_json, run_ingest};
pub use init::{InitInputs, InitOptions, InitResult, run_init};
pub use list::{
ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
};
pub use sync::{print_sync, print_sync_json, run_sync, SyncOptions, SyncResult};
pub use show::{
print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue,
run_show_mr,

402
src/cli/commands/search.rs Normal file
View File

@@ -0,0 +1,402 @@
//! Search command: lexical (FTS5) search with filter support and single-query hydration.
use console::style;
use serde::Serialize;
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, parse_since};
use crate::documents::SourceType;
use crate::search::{
apply_filters, get_result_snippet, rank_rrf, search_fts, FtsQueryMode, PathFilter,
SearchFilters,
};
use crate::Config;
/// Display-ready search result with all fields hydrated.
#[derive(Debug, Serialize)]
pub struct SearchResultDisplay {
pub document_id: i64,
pub source_type: String,
pub title: String,
pub url: Option<String>,
pub author: Option<String>,
pub created_at: Option<String>,
pub updated_at: Option<String>,
pub project_path: String,
pub labels: Vec<String>,
pub paths: Vec<String>,
pub snippet: String,
pub score: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub explain: Option<ExplainData>,
}
/// Ranking explanation for --explain output.
#[derive(Debug, Serialize)]
pub struct ExplainData {
pub vector_rank: Option<usize>,
pub fts_rank: Option<usize>,
pub rrf_score: f64,
}
/// Search response wrapper.
#[derive(Debug, Serialize)]
pub struct SearchResponse {
pub query: String,
pub mode: String,
pub total_results: usize,
pub results: Vec<SearchResultDisplay>,
pub warnings: Vec<String>,
}
/// Build SearchFilters from CLI args.
pub struct SearchCliFilters {
pub source_type: Option<String>,
pub author: Option<String>,
pub project: Option<String>,
pub labels: Vec<String>,
pub path: Option<String>,
pub after: Option<String>,
pub updated_after: Option<String>,
pub limit: usize,
}
/// Run a lexical search query.
pub fn run_search(
config: &Config,
query: &str,
cli_filters: SearchCliFilters,
fts_mode: FtsQueryMode,
explain: bool,
) -> Result<SearchResponse> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
// Check if any documents exist
let doc_count: i64 = conn
.query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0))
.unwrap_or(0);
if doc_count == 0 {
return Ok(SearchResponse {
query: query.to_string(),
mode: "lexical".to_string(),
total_results: 0,
results: vec![],
warnings: vec![
"No documents indexed. Run 'lore generate-docs' first.".to_string()
],
});
}
// Build filters
let source_type = cli_filters
.source_type
.as_deref()
.and_then(SourceType::parse);
let project_id = cli_filters
.project
.as_deref()
.map(|p| resolve_project(&conn, p))
.transpose()?;
let after = cli_filters.after.as_deref().and_then(parse_since);
let updated_after = cli_filters.updated_after.as_deref().and_then(parse_since);
let path = cli_filters.path.as_deref().map(|p| {
if p.ends_with('/') {
PathFilter::Prefix(p.to_string())
} else {
PathFilter::Exact(p.to_string())
}
});
let filters = SearchFilters {
source_type,
author: cli_filters.author,
project_id,
after,
updated_after,
labels: cli_filters.labels,
path,
limit: cli_filters.limit,
};
// Adaptive recall: wider initial fetch when filters applied
let requested = filters.clamp_limit();
let top_k = if filters.has_any_filter() {
(requested * 50).max(200).min(1500)
} else {
(requested * 10).max(50).min(1500)
};
// FTS search
let fts_results = search_fts(&conn, query, top_k, fts_mode)?;
let fts_tuples: Vec<(i64, f64)> = fts_results
.iter()
.map(|r| (r.document_id, r.bm25_score))
.collect();
// Build snippet map before ranking
let snippet_map: std::collections::HashMap<i64, String> = fts_results
.iter()
.map(|r| (r.document_id, r.snippet.clone()))
.collect();
// RRF ranking (single-list for lexical mode)
let ranked = rank_rrf(&[], &fts_tuples);
let ranked_ids: Vec<i64> = ranked.iter().map(|r| r.document_id).collect();
// Apply post-retrieval filters
let filtered_ids = apply_filters(&conn, &ranked_ids, &filters)?;
if filtered_ids.is_empty() {
return Ok(SearchResponse {
query: query.to_string(),
mode: "lexical".to_string(),
total_results: 0,
results: vec![],
warnings: vec![],
});
}
// Hydrate results in single round-trip
let hydrated = hydrate_results(&conn, &filtered_ids)?;
// Build display results preserving filter order
let rrf_map: std::collections::HashMap<i64, &crate::search::RrfResult> = ranked
.iter()
.map(|r| (r.document_id, r))
.collect();
let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
for row in &hydrated {
let rrf = rrf_map.get(&row.document_id);
let fts_snippet = snippet_map.get(&row.document_id).map(|s| s.as_str());
let snippet = get_result_snippet(fts_snippet, &row.content_text);
let explain_data = if explain {
rrf.map(|r| ExplainData {
vector_rank: r.vector_rank,
fts_rank: r.fts_rank,
rrf_score: r.rrf_score,
})
} else {
None
};
results.push(SearchResultDisplay {
document_id: row.document_id,
source_type: row.source_type.clone(),
title: row.title.clone(),
url: row.url.clone(),
author: row.author.clone(),
created_at: row.created_at.map(ms_to_iso),
updated_at: row.updated_at.map(ms_to_iso),
project_path: row.project_path.clone(),
labels: row.labels.clone(),
paths: row.paths.clone(),
snippet,
score: rrf.map(|r| r.normalized_score).unwrap_or(0.0),
explain: explain_data,
});
}
Ok(SearchResponse {
query: query.to_string(),
mode: "lexical".to_string(),
total_results: results.len(),
results,
warnings: vec![],
})
}
/// Raw row from hydration query.
struct HydratedRow {
document_id: i64,
source_type: String,
title: String,
url: Option<String>,
author: Option<String>,
created_at: Option<i64>,
updated_at: Option<i64>,
content_text: String,
project_path: String,
labels: Vec<String>,
paths: Vec<String>,
}
/// Hydrate document IDs into full display rows in a single query.
///
/// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key.
/// Labels and paths fetched via correlated json_group_array subqueries.
fn hydrate_results(
conn: &rusqlite::Connection,
document_ids: &[i64],
) -> Result<Vec<HydratedRow>> {
if document_ids.is_empty() {
return Ok(Vec::new());
}
let ids_json = serde_json::to_string(document_ids)
.map_err(|e| LoreError::Other(e.to_string()))?;
let sql = r#"
SELECT d.id, d.source_type, d.title, d.url, d.author_username,
d.created_at, d.updated_at, d.content_text,
p.path_with_namespace AS project_path,
(SELECT json_group_array(dl.label_name)
FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json,
(SELECT json_group_array(dp.path)
FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json
FROM json_each(?1) AS j
JOIN documents d ON d.id = j.value
JOIN projects p ON p.id = d.project_id
ORDER BY j.key
"#;
let mut stmt = conn.prepare(sql)?;
let rows = stmt
.query_map([ids_json], |row| {
let labels_json: String = row.get(9)?;
let paths_json: String = row.get(10)?;
Ok(HydratedRow {
document_id: row.get(0)?,
source_type: row.get(1)?,
title: row.get(2)?,
url: row.get(3)?,
author: row.get(4)?,
created_at: row.get(5)?,
updated_at: row.get(6)?,
content_text: row.get(7)?,
project_path: row.get(8)?,
labels: parse_json_array(&labels_json),
paths: parse_json_array(&paths_json),
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(rows)
}
/// Parse a JSON array string into a Vec<String>, filtering out null/empty.
fn parse_json_array(json: &str) -> Vec<String> {
serde_json::from_str::<Vec<serde_json::Value>>(json)
.unwrap_or_default()
.into_iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.filter(|s| !s.is_empty())
.collect()
}
/// Print human-readable search results.
pub fn print_search_results(response: &SearchResponse) {
if !response.warnings.is_empty() {
for w in &response.warnings {
eprintln!("{} {}", style("Warning:").yellow(), w);
}
}
if response.results.is_empty() {
println!(
"No results found for '{}'",
style(&response.query).bold()
);
return;
}
println!(
"{} results for '{}' ({})",
response.total_results,
style(&response.query).bold(),
response.mode
);
println!();
for (i, result) in response.results.iter().enumerate() {
let type_prefix = match result.source_type.as_str() {
"issue" => "Issue",
"merge_request" => "MR",
"discussion" => "Discussion",
_ => &result.source_type,
};
println!(
"[{}] {} - {} (score: {:.2})",
i + 1,
style(type_prefix).cyan(),
result.title,
result.score
);
if let Some(ref url) = result.url {
println!(" {}", style(url).dim());
}
println!(
" {} | {}",
style(&result.project_path).dim(),
result
.author
.as_deref()
.map(|a| format!("@{}", a))
.unwrap_or_default()
);
if !result.labels.is_empty() {
println!(
" Labels: {}",
result.labels.join(", ")
);
}
// Strip HTML tags from snippet for terminal display
let clean_snippet = result
.snippet
.replace("<mark>", "")
.replace("</mark>", "");
println!(" {}", style(clean_snippet).dim());
if let Some(ref explain) = result.explain {
println!(
" {} fts_rank={} rrf_score={:.6}",
style("[explain]").magenta(),
explain
.fts_rank
.map(|r| r.to_string())
.unwrap_or_else(|| "-".into()),
explain.rrf_score
);
}
println!();
}
}
/// JSON output structures.
#[derive(Serialize)]
struct SearchJsonOutput<'a> {
ok: bool,
data: &'a SearchResponse,
meta: SearchMeta,
}
#[derive(Serialize)]
struct SearchMeta {
elapsed_ms: u64,
}
/// Print JSON robot-mode output.
pub fn print_search_results_json(response: &SearchResponse, elapsed_ms: u64) {
let output = SearchJsonOutput {
ok: true,
data: response,
meta: SearchMeta { elapsed_ms },
};
println!("{}", serde_json::to_string(&output).unwrap());
}

View File

@@ -6,7 +6,7 @@ use serde::Serialize;
use crate::Config;
use crate::core::db::create_connection;
use crate::core::error::{GiError, Result};
use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::time::ms_to_iso;
@@ -188,11 +188,11 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
.collect::<std::result::Result<Vec<_>, _>>()?;
match issues.len() {
0 => Err(GiError::NotFound(format!("Issue #{} not found", iid))),
0 => Err(LoreError::NotFound(format!("Issue #{} not found", iid))),
1 => Ok(issues.into_iter().next().unwrap()),
_ => {
let projects: Vec<String> = issues.iter().map(|i| i.project_path.clone()).collect();
Err(GiError::Ambiguous(format!(
Err(LoreError::Ambiguous(format!(
"Issue #{} exists in multiple projects: {}. Use --project to specify.",
iid,
projects.join(", ")
@@ -386,11 +386,11 @@ fn find_mr(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Result<
.collect::<std::result::Result<Vec<_>, _>>()?;
match mrs.len() {
0 => Err(GiError::NotFound(format!("MR !{} not found", iid))),
0 => Err(LoreError::NotFound(format!("MR !{} not found", iid))),
1 => Ok(mrs.into_iter().next().unwrap()),
_ => {
let projects: Vec<String> = mrs.iter().map(|m| m.project_path.clone()).collect();
Err(GiError::Ambiguous(format!(
Err(LoreError::Ambiguous(format!(
"MR !{} exists in multiple projects: {}. Use --project to specify.",
iid,
projects.join(", ")

348
src/cli/commands/stats.rs Normal file
View File

@@ -0,0 +1,348 @@
//! Stats command: document counts, embedding coverage, queue status, integrity checks.
use console::style;
use rusqlite::Connection;
use serde::Serialize;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
use crate::Config;
/// Result of the stats command.
#[derive(Debug, Default, Serialize)]
pub struct StatsResult {
pub documents: DocumentStats,
pub embeddings: EmbeddingStats,
pub fts: FtsStats,
pub queues: QueueStats,
#[serde(skip_serializing_if = "Option::is_none")]
pub integrity: Option<IntegrityResult>,
}
#[derive(Debug, Default, Serialize)]
pub struct DocumentStats {
pub total: i64,
pub issues: i64,
pub merge_requests: i64,
pub discussions: i64,
pub truncated: i64,
}
#[derive(Debug, Default, Serialize)]
pub struct EmbeddingStats {
pub embedded_documents: i64,
pub total_chunks: i64,
pub coverage_pct: f64,
}
#[derive(Debug, Default, Serialize)]
pub struct FtsStats {
pub indexed: i64,
}
#[derive(Debug, Default, Serialize)]
pub struct QueueStats {
pub dirty_sources: i64,
pub dirty_sources_failed: i64,
pub pending_discussion_fetches: i64,
pub pending_discussion_fetches_failed: i64,
}
#[derive(Debug, Default, Serialize)]
pub struct IntegrityResult {
pub ok: bool,
pub fts_doc_mismatch: bool,
pub orphan_embeddings: i64,
pub stale_metadata: i64,
#[serde(skip_serializing_if = "Option::is_none")]
pub repair: Option<RepairResult>,
}
#[derive(Debug, Default, Serialize)]
pub struct RepairResult {
pub fts_rebuilt: bool,
pub orphans_deleted: i64,
pub stale_cleared: i64,
}
/// Run the stats command.
pub fn run_stats(
config: &Config,
check: bool,
repair: bool,
) -> Result<StatsResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let mut result = StatsResult::default();
// Document counts
result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?;
result.documents.issues =
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'")?;
result.documents.merge_requests =
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'")?;
result.documents.discussions =
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'")?;
result.documents.truncated =
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE is_truncated = 1")?;
// Embedding stats — skip gracefully if table doesn't exist (Gate A only)
if table_exists(&conn, "embedding_metadata") {
let embedded = count_query(
&conn,
"SELECT COUNT(DISTINCT document_id) FROM embedding_metadata WHERE last_error IS NULL",
)?;
let chunks = count_query(
&conn,
"SELECT COUNT(*) FROM embedding_metadata WHERE last_error IS NULL",
)?;
result.embeddings.embedded_documents = embedded;
result.embeddings.total_chunks = chunks;
result.embeddings.coverage_pct = if result.documents.total > 0 {
(embedded as f64 / result.documents.total as f64) * 100.0
} else {
0.0
};
}
// FTS stats
result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?;
// Queue stats
result.queues.dirty_sources =
count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL")?;
result.queues.dirty_sources_failed =
count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL")?;
if table_exists(&conn, "pending_discussion_fetches") {
result.queues.pending_discussion_fetches = count_query(
&conn,
"SELECT COUNT(*) FROM pending_discussion_fetches WHERE last_error IS NULL",
)?;
result.queues.pending_discussion_fetches_failed = count_query(
&conn,
"SELECT COUNT(*) FROM pending_discussion_fetches WHERE last_error IS NOT NULL",
)?;
}
// Integrity check
if check {
let mut integrity = IntegrityResult::default();
// FTS/doc count mismatch
integrity.fts_doc_mismatch = result.fts.indexed != result.documents.total;
// Orphan embeddings (rowid/1000 should match a document ID)
if table_exists(&conn, "embeddings") {
integrity.orphan_embeddings = count_query(
&conn,
"SELECT COUNT(*) FROM embedding_metadata em
WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = em.document_id)",
)?;
}
// Stale metadata (document_hash != current content_hash)
if table_exists(&conn, "embedding_metadata") {
integrity.stale_metadata = count_query(
&conn,
"SELECT COUNT(*) FROM embedding_metadata em
JOIN documents d ON d.id = em.document_id
WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash",
)?;
}
integrity.ok = !integrity.fts_doc_mismatch
&& integrity.orphan_embeddings == 0
&& integrity.stale_metadata == 0;
// Repair
if repair {
let mut repair_result = RepairResult::default();
if integrity.fts_doc_mismatch {
conn.execute(
"INSERT INTO documents_fts(documents_fts) VALUES('rebuild')",
[],
)?;
repair_result.fts_rebuilt = true;
}
if integrity.orphan_embeddings > 0 && table_exists(&conn, "embedding_metadata") {
let deleted = conn.execute(
"DELETE FROM embedding_metadata
WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)",
[],
)?;
repair_result.orphans_deleted = deleted as i64;
// Also clean orphaned vectors if vec0 table exists
if table_exists(&conn, "embeddings") {
let _ = conn.execute(
"DELETE FROM embeddings
WHERE rowid / 1000 NOT IN (SELECT id FROM documents)",
[],
);
}
}
if integrity.stale_metadata > 0 && table_exists(&conn, "embedding_metadata") {
let cleared = conn.execute(
"DELETE FROM embedding_metadata
WHERE document_id IN (
SELECT em.document_id FROM embedding_metadata em
JOIN documents d ON d.id = em.document_id
WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash
)",
[],
)?;
repair_result.stale_cleared = cleared as i64;
}
integrity.repair = Some(repair_result);
}
result.integrity = Some(integrity);
}
Ok(result)
}
fn count_query(conn: &Connection, sql: &str) -> Result<i64> {
let count: i64 = conn
.query_row(sql, [], |row| row.get(0))
.unwrap_or(0);
Ok(count)
}
fn table_exists(conn: &Connection, table: &str) -> bool {
conn.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
[table],
|row| row.get::<_, i64>(0),
)
.unwrap_or(0)
> 0
}
/// Print human-readable stats.
pub fn print_stats(result: &StatsResult) {
println!("{}", style("Documents").cyan().bold());
println!(" Total: {}", result.documents.total);
println!(" Issues: {}", result.documents.issues);
println!(" Merge Requests: {}", result.documents.merge_requests);
println!(" Discussions: {}", result.documents.discussions);
if result.documents.truncated > 0 {
println!(" Truncated: {}", style(result.documents.truncated).yellow());
}
println!();
println!("{}", style("Search Index").cyan().bold());
println!(" FTS indexed: {}", result.fts.indexed);
println!(
" Embedding coverage: {:.1}% ({}/{})",
result.embeddings.coverage_pct,
result.embeddings.embedded_documents,
result.documents.total
);
if result.embeddings.total_chunks > 0 {
println!(" Total chunks: {}", result.embeddings.total_chunks);
}
println!();
println!("{}", style("Queues").cyan().bold());
println!(" Dirty sources: {} pending, {} failed",
result.queues.dirty_sources,
result.queues.dirty_sources_failed
);
println!(" Discussion fetch: {} pending, {} failed",
result.queues.pending_discussion_fetches,
result.queues.pending_discussion_fetches_failed
);
if let Some(ref integrity) = result.integrity {
println!();
let status = if integrity.ok {
style("OK").green().bold()
} else {
style("ISSUES FOUND").red().bold()
};
println!("{} Integrity: {}", style("Check").cyan().bold(), status);
if integrity.fts_doc_mismatch {
println!(" {} FTS/document count mismatch", style("!").red());
}
if integrity.orphan_embeddings > 0 {
println!(
" {} {} orphan embeddings",
style("!").red(),
integrity.orphan_embeddings
);
}
if integrity.stale_metadata > 0 {
println!(
" {} {} stale embedding metadata",
style("!").red(),
integrity.stale_metadata
);
}
if let Some(ref repair) = integrity.repair {
println!();
println!("{}", style("Repair").cyan().bold());
if repair.fts_rebuilt {
println!(" {} FTS index rebuilt", style("fixed").green());
}
if repair.orphans_deleted > 0 {
println!(
" {} {} orphan embeddings deleted",
style("fixed").green(),
repair.orphans_deleted
);
}
if repair.stale_cleared > 0 {
println!(
" {} {} stale metadata entries cleared",
style("fixed").green(),
repair.stale_cleared
);
}
if !repair.fts_rebuilt && repair.orphans_deleted == 0 && repair.stale_cleared == 0 {
println!(" No issues to repair.");
}
}
}
}
/// JSON output structures.
#[derive(Serialize)]
struct StatsJsonOutput {
ok: bool,
data: StatsResult,
}
/// Print JSON robot-mode output.
pub fn print_stats_json(result: &StatsResult) {
let output = StatsJsonOutput {
ok: true,
data: StatsResult {
documents: DocumentStats { ..*&result.documents },
embeddings: EmbeddingStats { ..*&result.embeddings },
fts: FtsStats { ..*&result.fts },
queues: QueueStats { ..*&result.queues },
integrity: result.integrity.as_ref().map(|i| IntegrityResult {
ok: i.ok,
fts_doc_mismatch: i.fts_doc_mismatch,
orphan_embeddings: i.orphan_embeddings,
stale_metadata: i.stale_metadata,
repair: i.repair.as_ref().map(|r| RepairResult {
fts_rebuilt: r.fts_rebuilt,
orphans_deleted: r.orphans_deleted,
stale_cleared: r.stale_cleared,
}),
}),
},
};
println!("{}", serde_json::to_string(&output).unwrap());
}

124
src/cli/commands/sync.rs Normal file
View File

@@ -0,0 +1,124 @@
//! Sync command: unified orchestrator for ingest -> generate-docs -> embed.
use console::style;
use serde::Serialize;
use tracing::{info, warn};
use crate::Config;
use crate::core::error::Result;
use super::embed::run_embed;
use super::generate_docs::run_generate_docs;
use super::ingest::run_ingest;
/// Options for the sync command.
#[derive(Debug, Default)]
pub struct SyncOptions {
pub full: bool,
pub force: bool,
pub no_embed: bool,
pub no_docs: bool,
}
/// Result of the sync command.
#[derive(Debug, Default, Serialize)]
pub struct SyncResult {
pub issues_updated: usize,
pub mrs_updated: usize,
pub discussions_fetched: usize,
pub documents_regenerated: usize,
pub documents_embedded: usize,
}
/// Run the full sync pipeline: ingest -> generate-docs -> embed.
pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResult> {
let mut result = SyncResult::default();
// Stage 1: Ingest issues
info!("Sync stage 1/4: ingesting issues");
let issues_result = run_ingest(config, "issues", None, options.force, options.full, true).await?;
result.issues_updated = issues_result.issues_upserted;
result.discussions_fetched += issues_result.discussions_fetched;
// Stage 2: Ingest MRs
info!("Sync stage 2/4: ingesting merge requests");
let mrs_result = run_ingest(config, "mrs", None, options.force, options.full, true).await?;
result.mrs_updated = mrs_result.mrs_upserted;
result.discussions_fetched += mrs_result.discussions_fetched;
// Stage 3: Generate documents (unless --no-docs)
if options.no_docs {
info!("Sync stage 3/4: skipping document generation (--no-docs)");
} else {
info!("Sync stage 3/4: generating documents");
let docs_result = run_generate_docs(config, false, None)?;
result.documents_regenerated = docs_result.regenerated;
}
// Stage 4: Embed documents (unless --no-embed)
if options.no_embed {
info!("Sync stage 4/4: skipping embedding (--no-embed)");
} else {
info!("Sync stage 4/4: embedding documents");
match run_embed(config, false).await {
Ok(embed_result) => {
result.documents_embedded = embed_result.embedded;
}
Err(e) => {
// Graceful degradation: Ollama down is a warning, not an error
warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
}
}
}
info!(
issues = result.issues_updated,
mrs = result.mrs_updated,
discussions = result.discussions_fetched,
docs = result.documents_regenerated,
embedded = result.documents_embedded,
"Sync pipeline complete"
);
Ok(result)
}
/// Print human-readable sync summary.
pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
println!(
"{} Sync complete:",
style("done").green().bold(),
);
println!(" Issues updated: {}", result.issues_updated);
println!(" MRs updated: {}", result.mrs_updated);
println!(" Discussions fetched: {}", result.discussions_fetched);
println!(" Documents regenerated: {}", result.documents_regenerated);
println!(" Documents embedded: {}", result.documents_embedded);
println!(
" Elapsed: {:.1}s",
elapsed.as_secs_f64()
);
}
/// JSON output for sync.
#[derive(Serialize)]
struct SyncJsonOutput<'a> {
ok: bool,
data: &'a SyncResult,
meta: SyncMeta,
}
#[derive(Serialize)]
struct SyncMeta {
elapsed_ms: u64,
}
/// Print JSON robot-mode sync output.
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64) {
let output = SyncJsonOutput {
ok: true,
data: result,
meta: SyncMeta { elapsed_ms },
};
println!("{}", serde_json::to_string(&output).unwrap());
}

View File

@@ -69,6 +69,18 @@ pub enum Commands {
/// Fail if prompts would be shown
#[arg(long)]
non_interactive: bool,
/// GitLab base URL (required in robot mode)
#[arg(long)]
gitlab_url: Option<String>,
/// Environment variable name holding GitLab token (required in robot mode)
#[arg(long)]
token_env_var: Option<String>,
/// Comma-separated project paths (required in robot mode)
#[arg(long)]
projects: Option<String>,
},
/// Create timestamped database backup
@@ -81,9 +93,32 @@ pub enum Commands {
yes: bool,
},
/// Search indexed documents
Search(SearchArgs),
/// Show document and index statistics
Stats(StatsArgs),
/// Generate searchable documents from ingested data
#[command(name = "generate-docs")]
GenerateDocs(GenerateDocsArgs),
/// Generate vector embeddings for documents via Ollama
Embed(EmbedArgs),
/// Run full sync pipeline: ingest -> generate-docs -> embed
Sync(SyncArgs),
/// Run pending database migrations
Migrate,
/// Quick health check: config, database, schema version
Health,
/// Machine-readable command manifest for agent self-discovery
#[command(name = "robot-docs")]
RobotDocs,
// --- Hidden backward-compat aliases ---
/// List issues or MRs (deprecated: use 'lore issues' or 'lore mrs')
#[command(hide = true)]
@@ -299,6 +334,109 @@ pub struct IngestArgs {
pub full: bool,
}
/// Arguments for `lore stats`
#[derive(Parser)]
pub struct StatsArgs {
/// Run integrity checks
#[arg(long)]
pub check: bool,
/// Repair integrity issues (requires --check)
#[arg(long, requires = "check")]
pub repair: bool,
}
/// Arguments for `lore search <QUERY>`
#[derive(Parser)]
pub struct SearchArgs {
/// Search query string
pub query: String,
/// Search mode (lexical, hybrid, semantic)
#[arg(long, default_value = "hybrid")]
pub mode: String,
/// Filter by source type (issue, mr, discussion)
#[arg(long = "type", value_name = "TYPE")]
pub source_type: Option<String>,
/// Filter by author username
#[arg(long)]
pub author: Option<String>,
/// Filter by project path
#[arg(short = 'p', long)]
pub project: Option<String>,
/// Filter by label (repeatable, AND logic)
#[arg(long, action = clap::ArgAction::Append)]
pub label: Vec<String>,
/// Filter by file path (trailing / for prefix match)
#[arg(long)]
pub path: Option<String>,
/// Filter by created after (7d, 2w, or YYYY-MM-DD)
#[arg(long)]
pub after: Option<String>,
/// Filter by updated after (7d, 2w, or YYYY-MM-DD)
#[arg(long = "updated-after")]
pub updated_after: Option<String>,
/// Maximum results (default 20, max 100)
#[arg(short = 'n', long = "limit", default_value = "20")]
pub limit: usize,
/// Show ranking explanation per result
#[arg(long)]
pub explain: bool,
/// FTS query mode: safe (default) or raw
#[arg(long = "fts-mode", default_value = "safe")]
pub fts_mode: String,
}
/// Arguments for `lore generate-docs`
#[derive(Parser)]
pub struct GenerateDocsArgs {
/// Full rebuild: seed all entities into dirty queue, then drain
#[arg(long)]
pub full: bool,
/// Filter to single project
#[arg(short = 'p', long)]
pub project: Option<String>,
}
/// Arguments for `lore sync`
#[derive(Parser)]
pub struct SyncArgs {
/// Reset cursors, fetch everything
#[arg(long)]
pub full: bool,
/// Override stale lock
#[arg(long)]
pub force: bool,
/// Skip embedding step
#[arg(long)]
pub no_embed: bool,
/// Skip document regeneration
#[arg(long)]
pub no_docs: bool,
}
/// Arguments for `lore embed`
#[derive(Parser)]
pub struct EmbedArgs {
/// Retry previously failed embeddings
#[arg(long)]
pub retry_failed: bool,
}
/// Arguments for `lore count <ENTITY>`
#[derive(Parser)]
pub struct CountArgs {

View File

@@ -10,17 +10,23 @@ use tracing_subscriber::util::SubscriberInitExt;
use lore::Config;
use lore::cli::commands::{
InitInputs, InitOptions, ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser,
print_count, print_count_json, print_doctor_results, print_ingest_summary,
print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs,
print_list_mrs_json, print_show_issue, print_show_issue_json, print_show_mr,
InitInputs, InitOptions, InitResult, ListFilters, MrListFilters, SearchCliFilters, open_issue_in_browser,
open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_generate_docs,
print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues,
print_list_issues_json, print_list_mrs, print_list_mrs_json, print_search_results,
print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr, print_stats,
print_stats_json,
print_embed, print_embed_json, print_sync, print_sync_json,
print_show_mr_json, print_sync_status, print_sync_status_json, run_auth_test, run_count,
run_doctor, run_ingest, run_init, run_list_issues, run_list_mrs, run_show_issue, run_show_mr,
run_sync_status,
run_doctor, run_embed, run_generate_docs, run_ingest, run_init, run_list_issues, run_list_mrs,
run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, SyncOptions,
};
use lore::cli::{
Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
SearchArgs, StatsArgs, SyncArgs,
};
use lore::cli::{Cli, Commands, CountArgs, IngestArgs, IssuesArgs, MrsArgs};
use lore::core::db::{create_connection, get_schema_version, run_migrations};
use lore::core::error::{GiError, RobotErrorOutput};
use lore::core::error::{LoreError, RobotErrorOutput};
use lore::core::paths::get_config_path;
use lore::core::paths::get_db_path;
@@ -49,6 +55,10 @@ async fn main() {
let result = match cli.command {
Commands::Issues(args) => handle_issues(cli.config.as_deref(), args, robot_mode).await,
Commands::Mrs(args) => handle_mrs(cli.config.as_deref(), args, robot_mode).await,
Commands::Search(args) => handle_search(cli.config.as_deref(), args, robot_mode).await,
Commands::Stats(args) => handle_stats(cli.config.as_deref(), args, robot_mode).await,
Commands::Embed(args) => handle_embed(cli.config.as_deref(), args, robot_mode).await,
Commands::Sync(args) => handle_sync_cmd(cli.config.as_deref(), args, robot_mode).await,
Commands::Ingest(args) => handle_ingest(cli.config.as_deref(), args, robot_mode).await,
Commands::Count(args) => {
handle_count(cli.config.as_deref(), args, robot_mode).await
@@ -60,10 +70,29 @@ async fn main() {
Commands::Init {
force,
non_interactive,
} => handle_init(cli.config.as_deref(), force, non_interactive, robot_mode).await,
gitlab_url,
token_env_var,
projects,
} => {
handle_init(
cli.config.as_deref(),
force,
non_interactive,
robot_mode,
gitlab_url,
token_env_var,
projects,
)
.await
}
Commands::GenerateDocs(args) => {
handle_generate_docs(cli.config.as_deref(), args, robot_mode).await
}
Commands::Backup => handle_backup(robot_mode),
Commands::Reset { yes: _ } => handle_reset(robot_mode),
Commands::Migrate => handle_migrate(cli.config.as_deref(), robot_mode).await,
Commands::Health => handle_health(cli.config.as_deref(), robot_mode).await,
Commands::RobotDocs => handle_robot_docs(robot_mode),
// --- Backward-compat: deprecated aliases ---
Commands::List {
@@ -159,7 +188,7 @@ async fn main() {
}
}
/// Fallback error output for non-GiError errors in robot mode.
/// Fallback error output for non-LoreError errors in robot mode.
#[derive(Serialize)]
struct FallbackErrorOutput {
error: FallbackError,
@@ -172,8 +201,8 @@ struct FallbackError {
}
fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
// Try to downcast to GiError for structured output
if let Some(gi_error) = e.downcast_ref::<GiError>() {
// Try to downcast to LoreError for structured output
if let Some(gi_error) = e.downcast_ref::<LoreError>() {
if robot_mode {
let output = RobotErrorOutput::from(gi_error);
// Use serde_json for safe serialization; fallback constructs JSON safely
@@ -201,7 +230,7 @@ fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
}
}
// Fallback for non-GiError errors - use serde for proper JSON escaping
// Fallback for non-LoreError errors - use serde for proper JSON escaping
if robot_mode {
let output = FallbackErrorOutput {
error: FallbackError {
@@ -473,22 +502,123 @@ async fn handle_sync_status_cmd(
Ok(())
}
/// JSON output for init command.
#[derive(Serialize)]
struct InitOutput {
ok: bool,
data: InitOutputData,
}
#[derive(Serialize)]
struct InitOutputData {
config_path: String,
data_dir: String,
user: InitOutputUser,
projects: Vec<InitOutputProject>,
}
#[derive(Serialize)]
struct InitOutputUser {
username: String,
name: String,
}
#[derive(Serialize)]
struct InitOutputProject {
path: String,
name: String,
}
fn print_init_json(result: &InitResult) {
let output = InitOutput {
ok: true,
data: InitOutputData {
config_path: result.config_path.clone(),
data_dir: result.data_dir.clone(),
user: InitOutputUser {
username: result.user.username.clone(),
name: result.user.name.clone(),
},
projects: result
.projects
.iter()
.map(|p| InitOutputProject {
path: p.path.clone(),
name: p.name.clone(),
})
.collect(),
},
};
println!("{}", serde_json::to_string(&output).unwrap());
}
async fn handle_init(
config_override: Option<&str>,
force: bool,
non_interactive: bool,
_robot_mode: bool, // TODO: Add robot mode support for init (requires non-interactive implementation)
robot_mode: bool,
gitlab_url_flag: Option<String>,
token_env_var_flag: Option<String>,
projects_flag: Option<String>,
) -> Result<(), Box<dyn std::error::Error>> {
// Robot mode: require all inputs via flags, skip interactive prompts
if robot_mode {
let missing: Vec<&str> = [
gitlab_url_flag.is_none().then_some("--gitlab-url"),
token_env_var_flag.is_none().then_some("--token-env-var"),
projects_flag.is_none().then_some("--projects"),
]
.into_iter()
.flatten()
.collect();
if !missing.is_empty() {
let output = RobotErrorWithSuggestion {
error: RobotErrorSuggestionData {
code: "MISSING_FLAGS".to_string(),
message: format!("Robot mode requires flags: {}", missing.join(", ")),
suggestion: "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project".to_string(),
},
};
eprintln!("{}", serde_json::to_string(&output)?);
std::process::exit(2);
}
let project_paths: Vec<String> = projects_flag
.unwrap()
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect();
let result = run_init(
InitInputs {
gitlab_url: gitlab_url_flag.unwrap(),
token_env_var: token_env_var_flag.unwrap(),
project_paths,
},
InitOptions {
config_path: config_override.map(String::from),
force: true,
non_interactive: true,
},
)
.await?;
print_init_json(&result);
return Ok(());
}
// Human mode: interactive prompts
let config_path = get_config_path(config_override);
let mut confirmed_overwrite = force;
// Check if config exists and handle overwrite
if config_path.exists() {
if config_path.exists() && !force {
if non_interactive {
eprintln!(
"{}",
style(format!(
"Config file exists at {}. Cannot proceed in non-interactive mode.",
"Config file exists at {}. Use --force to overwrite.",
config_path.display()
))
.red()
@@ -496,59 +626,70 @@ async fn handle_init(
std::process::exit(2);
}
if !force {
let confirm = Confirm::new()
.with_prompt(format!(
"Config file exists at {}. Overwrite?",
config_path.display()
))
.default(false)
.interact()?;
let confirm = Confirm::new()
.with_prompt(format!(
"Config file exists at {}. Overwrite?",
config_path.display()
))
.default(false)
.interact()?;
if !confirm {
println!("{}", style("Cancelled.").yellow());
std::process::exit(2);
}
confirmed_overwrite = true;
if !confirm {
println!("{}", style("Cancelled.").yellow());
std::process::exit(2);
}
confirmed_overwrite = true;
}
// Prompt for GitLab URL
let gitlab_url: String = Input::new()
.with_prompt("GitLab URL")
.default("https://gitlab.com".to_string())
.validate_with(|input: &String| -> Result<(), &str> {
if url::Url::parse(input).is_ok() {
Ok(())
} else {
Err("Please enter a valid URL")
}
})
.interact_text()?;
let gitlab_url: String = if let Some(url) = gitlab_url_flag {
url
} else {
Input::new()
.with_prompt("GitLab URL")
.default("https://gitlab.com".to_string())
.validate_with(|input: &String| -> Result<(), &str> {
if url::Url::parse(input).is_ok() {
Ok(())
} else {
Err("Please enter a valid URL")
}
})
.interact_text()?
};
// Prompt for token env var
let token_env_var: String = Input::new()
.with_prompt("Token environment variable name")
.default("GITLAB_TOKEN".to_string())
.interact_text()?;
let token_env_var: String = if let Some(var) = token_env_var_flag {
var
} else {
Input::new()
.with_prompt("Token environment variable name")
.default("GITLAB_TOKEN".to_string())
.interact_text()?
};
// Prompt for project paths
let project_paths_input: String = Input::new()
.with_prompt("Project paths (comma-separated, e.g., group/project)")
.validate_with(|input: &String| -> Result<(), &str> {
if input.trim().is_empty() {
Err("Please enter at least one project path")
} else {
Ok(())
}
})
.interact_text()?;
let project_paths: Vec<String> = if let Some(projects) = projects_flag {
projects
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect()
} else {
let project_paths_input: String = Input::new()
.with_prompt("Project paths (comma-separated, e.g., group/project)")
.validate_with(|input: &String| -> Result<(), &str> {
if input.trim().is_empty() {
Err("Please enter at least one project path")
} else {
Ok(())
}
})
.interact_text()?;
let project_paths: Vec<String> = project_paths_input
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect();
project_paths_input
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect()
};
println!("{}", style("\nValidating configuration...").blue());
@@ -840,6 +981,385 @@ async fn handle_migrate(
Ok(())
}
async fn handle_stats(
config_override: Option<&str>,
args: StatsArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let result = run_stats(&config, args.check, args.repair)?;
if robot_mode {
print_stats_json(&result);
} else {
print_stats(&result);
}
Ok(())
}
async fn handle_search(
config_override: Option<&str>,
args: SearchArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let fts_mode = match args.fts_mode.as_str() {
"raw" => lore::search::FtsQueryMode::Raw,
_ => lore::search::FtsQueryMode::Safe,
};
let cli_filters = SearchCliFilters {
source_type: args.source_type,
author: args.author,
project: args.project,
labels: args.label,
path: args.path,
after: args.after,
updated_after: args.updated_after,
limit: args.limit,
};
let start = std::time::Instant::now();
let response = run_search(&config, &args.query, cli_filters, fts_mode, args.explain)?;
let elapsed_ms = start.elapsed().as_millis() as u64;
if robot_mode {
print_search_results_json(&response, elapsed_ms);
} else {
print_search_results(&response);
}
Ok(())
}
async fn handle_generate_docs(
config_override: Option<&str>,
args: GenerateDocsArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let result = run_generate_docs(&config, args.full, args.project.as_deref())?;
if robot_mode {
print_generate_docs_json(&result);
} else {
print_generate_docs(&result);
}
Ok(())
}
async fn handle_embed(
config_override: Option<&str>,
args: EmbedArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let result = run_embed(&config, args.retry_failed).await?;
if robot_mode {
print_embed_json(&result);
} else {
print_embed(&result);
}
Ok(())
}
async fn handle_sync_cmd(
config_override: Option<&str>,
args: SyncArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
let options = SyncOptions {
full: args.full,
force: args.force,
no_embed: args.no_embed,
no_docs: args.no_docs,
};
let start = std::time::Instant::now();
let result = run_sync(&config, options).await?;
let elapsed = start.elapsed();
if robot_mode {
print_sync_json(&result, elapsed.as_millis() as u64);
} else {
print_sync(&result, elapsed);
}
Ok(())
}
// ============================================================================
// Health + Robot-docs handlers
// ============================================================================
/// JSON output for health command.
#[derive(Serialize)]
struct HealthOutput {
ok: bool,
data: HealthData,
}
#[derive(Serialize)]
struct HealthData {
healthy: bool,
config_found: bool,
db_found: bool,
schema_current: bool,
schema_version: i32,
}
async fn handle_health(
config_override: Option<&str>,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config_path = get_config_path(config_override);
let config_found = config_path.exists();
let (db_found, schema_version, schema_current) = if config_found {
match Config::load(config_override) {
Ok(config) => {
let db_path = get_db_path(config.storage.db_path.as_deref());
if db_path.exists() {
match create_connection(&db_path) {
Ok(conn) => {
let version = get_schema_version(&conn);
let latest = 9; // Number of embedded migrations
(true, version, version >= latest)
}
Err(_) => (true, 0, false),
}
} else {
(false, 0, false)
}
}
Err(_) => (false, 0, false),
}
} else {
(false, 0, false)
};
let healthy = config_found && db_found && schema_current;
if robot_mode {
let output = HealthOutput {
ok: true,
data: HealthData {
healthy,
config_found,
db_found,
schema_current,
schema_version,
},
};
println!("{}", serde_json::to_string(&output)?);
} else {
let status = |ok: bool| {
if ok {
style("pass").green()
} else {
style("FAIL").red()
}
};
println!("Config: {} ({})", status(config_found), config_path.display());
println!("DB: {}", status(db_found));
println!(
"Schema: {} (v{})",
status(schema_current),
schema_version
);
println!();
if healthy {
println!("{}", style("Healthy").green().bold());
} else {
println!("{}", style("Unhealthy - run 'lore doctor' for details").red().bold());
}
}
if !healthy {
std::process::exit(1);
}
Ok(())
}
/// JSON output for robot-docs command.
#[derive(Serialize)]
struct RobotDocsOutput {
ok: bool,
data: RobotDocsData,
}
#[derive(Serialize)]
struct RobotDocsData {
name: String,
version: String,
description: String,
activation: RobotDocsActivation,
commands: serde_json::Value,
exit_codes: serde_json::Value,
error_format: String,
workflows: serde_json::Value,
}
#[derive(Serialize)]
struct RobotDocsActivation {
flags: Vec<String>,
env: String,
auto: String,
}
fn handle_robot_docs(robot_mode: bool) -> Result<(), Box<dyn std::error::Error>> {
let version = env!("CARGO_PKG_VERSION").to_string();
let commands = serde_json::json!({
"init": {
"description": "Initialize configuration and database",
"flags": ["--force", "--non-interactive", "--gitlab-url <URL>", "--token-env-var <VAR>", "--projects <paths>"],
"robot_flags": ["--gitlab-url", "--token-env-var", "--projects"],
"example": "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project"
},
"health": {
"description": "Quick pre-flight check: config, database, schema version",
"flags": [],
"example": "lore --robot health"
},
"auth": {
"description": "Verify GitLab authentication",
"flags": [],
"example": "lore --robot auth"
},
"doctor": {
"description": "Full environment health check (config, auth, DB, Ollama)",
"flags": [],
"example": "lore --robot doctor"
},
"ingest": {
"description": "Sync data from GitLab",
"flags": ["--project <path>", "--force", "--full", "<entity: issues|mrs>"],
"example": "lore --robot ingest issues --project group/repo"
},
"sync": {
"description": "Full sync pipeline: ingest -> generate-docs -> embed",
"flags": ["--full", "--force", "--no-embed", "--no-docs"],
"example": "lore --robot sync"
},
"issues": {
"description": "List or show issues",
"flags": ["<IID>", "--limit", "--state", "--project", "--author", "--assignee", "--label", "--milestone", "--since", "--due-before", "--has-due", "--sort", "--asc"],
"example": "lore --robot issues --state opened --limit 10"
},
"mrs": {
"description": "List or show merge requests",
"flags": ["<IID>", "--limit", "--state", "--project", "--author", "--assignee", "--reviewer", "--label", "--since", "--draft", "--no-draft", "--target", "--source", "--sort", "--asc"],
"example": "lore --robot mrs --state opened"
},
"search": {
"description": "Search indexed documents (lexical, hybrid, semantic)",
"flags": ["<QUERY>", "--mode", "--type", "--author", "--project", "--label", "--path", "--after", "--updated-after", "--limit", "--explain", "--fts-mode"],
"example": "lore --robot search 'authentication bug' --mode hybrid --limit 10"
},
"count": {
"description": "Count entities in local database",
"flags": ["<entity: issues|mrs|discussions|notes>", "--for <issue|mr>"],
"example": "lore --robot count issues"
},
"stats": {
"description": "Show document and index statistics",
"flags": ["--check", "--repair"],
"example": "lore --robot stats"
},
"status": {
"description": "Show sync state (cursors, last sync times)",
"flags": [],
"example": "lore --robot status"
},
"generate-docs": {
"description": "Generate searchable documents from ingested data",
"flags": ["--full", "--project <path>"],
"example": "lore --robot generate-docs --full"
},
"embed": {
"description": "Generate vector embeddings for documents via Ollama",
"flags": ["--retry-failed"],
"example": "lore --robot embed"
},
"migrate": {
"description": "Run pending database migrations",
"flags": [],
"example": "lore --robot migrate"
},
"version": {
"description": "Show version information",
"flags": [],
"example": "lore --robot version"
},
"robot-docs": {
"description": "This command (agent self-discovery manifest)",
"flags": [],
"example": "lore robot-docs"
}
});
let exit_codes = serde_json::json!({
"0": "Success",
"1": "Internal error / health check failed",
"2": "Config not found / missing flags",
"3": "Config invalid",
"4": "Token not set",
"5": "GitLab auth failed",
"6": "Resource not found",
"7": "Rate limited",
"8": "Network error",
"9": "Database locked",
"10": "Database error",
"11": "Migration failed",
"12": "I/O error",
"13": "Transform error"
});
let workflows = serde_json::json!({
"first_setup": [
"lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project",
"lore --robot doctor",
"lore --robot sync"
],
"daily_sync": [
"lore --robot sync"
],
"search": [
"lore --robot search 'query' --mode hybrid"
],
"pre_flight": [
"lore --robot health"
]
});
let output = RobotDocsOutput {
ok: true,
data: RobotDocsData {
name: "lore".to_string(),
version,
description: "Local GitLab data management with semantic search".to_string(),
activation: RobotDocsActivation {
flags: vec!["--robot".to_string(), "-J".to_string(), "--json".to_string()],
env: "LORE_ROBOT=1".to_string(),
auto: "Non-TTY stdout".to_string(),
},
commands,
exit_codes,
error_format: "stderr JSON: {\"error\":{\"code\":\"...\",\"message\":\"...\",\"suggestion\":\"...\"}}".to_string(),
workflows,
},
};
if robot_mode {
println!("{}", serde_json::to_string(&output)?);
} else {
println!("{}", serde_json::to_string_pretty(&output)?);
}
Ok(())
}
// ============================================================================
// Backward-compat handlers (deprecated, delegate to new handlers)
// ============================================================================