feat(cli): Add search, stats, embed, sync, health, and robot-docs commands
Extends the CLI with six new commands that complete the search pipeline: - lore search <QUERY>: Hybrid search with mode selection (lexical, hybrid, semantic), rich filtering (--type, --author, --project, --label, --path, --after, --updated-after), result limits, and optional explain mode showing RRF score breakdowns. Safe FTS mode sanitizes user input; raw mode passes through for power users. - lore stats: Document and index statistics with optional --check for integrity verification and --repair to fix inconsistencies (orphaned documents, missing FTS entries, stale dirty queue items). - lore embed: Generate vector embeddings via Ollama. Supports --retry-failed to re-attempt previously failed embeddings. - lore generate-docs: Drain the dirty queue to regenerate documents. --full seeds all entities for complete rebuild. --project scopes to a single project. - lore sync: Full pipeline orchestration (ingest issues + MRs, generate-docs, embed) with --no-embed and --no-docs flags for partial runs. Reports per-stage results and total elapsed time. - lore health: Quick pre-flight check (config exists, DB exists, schema current). Returns exit code 1 if unhealthy. Designed for agent pre-flight scripts. - lore robot-docs: Machine-readable command manifest for agent self-discovery. Returns all commands, flags, examples, exit codes, and recommended workflows as structured JSON. Also enhances lore init with --gitlab-url, --token-env-var, and --projects flags for fully non-interactive robot-mode initialization. Fixes init's force/non-interactive precedence logic and adds JSON output for robot mode. Updates all command files for the GiError -> LoreError rename. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
//! Auth test command - verify GitLab authentication.
|
//! Auth test command - verify GitLab authentication.
|
||||||
|
|
||||||
use crate::core::config::Config;
|
use crate::core::config::Config;
|
||||||
use crate::core::error::{GiError, Result};
|
use crate::core::error::{LoreError, Result};
|
||||||
use crate::gitlab::GitLabClient;
|
use crate::gitlab::GitLabClient;
|
||||||
|
|
||||||
/// Result of successful auth test.
|
/// Result of successful auth test.
|
||||||
@@ -19,12 +19,12 @@ pub async fn run_auth_test(config_path: Option<&str>) -> Result<AuthTestResult>
|
|||||||
// 2. Get token from environment
|
// 2. Get token from environment
|
||||||
let token = std::env::var(&config.gitlab.token_env_var)
|
let token = std::env::var(&config.gitlab.token_env_var)
|
||||||
.map(|t| t.trim().to_string())
|
.map(|t| t.trim().to_string())
|
||||||
.map_err(|_| GiError::TokenNotSet {
|
.map_err(|_| LoreError::TokenNotSet {
|
||||||
env_var: config.gitlab.token_env_var.clone(),
|
env_var: config.gitlab.token_env_var.clone(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if token.is_empty() {
|
if token.is_empty() {
|
||||||
return Err(GiError::TokenNotSet {
|
return Err(LoreError::TokenNotSet {
|
||||||
env_var: config.gitlab.token_env_var.clone(),
|
env_var: config.gitlab.token_env_var.clone(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use serde::Serialize;
|
|||||||
|
|
||||||
use crate::core::config::Config;
|
use crate::core::config::Config;
|
||||||
use crate::core::db::{create_connection, get_schema_version, verify_pragmas};
|
use crate::core::db::{create_connection, get_schema_version, verify_pragmas};
|
||||||
use crate::core::error::GiError;
|
use crate::core::error::LoreError;
|
||||||
use crate::core::paths::{get_config_path, get_db_path};
|
use crate::core::paths::{get_config_path, get_db_path};
|
||||||
use crate::gitlab::GitLabClient;
|
use crate::gitlab::GitLabClient;
|
||||||
|
|
||||||
@@ -137,7 +137,7 @@ fn check_config(config_path: &str) -> (ConfigCheck, Option<Config>) {
|
|||||||
},
|
},
|
||||||
Some(config),
|
Some(config),
|
||||||
),
|
),
|
||||||
Err(GiError::ConfigNotFound { path }) => (
|
Err(LoreError::ConfigNotFound { path }) => (
|
||||||
ConfigCheck {
|
ConfigCheck {
|
||||||
result: CheckResult {
|
result: CheckResult {
|
||||||
status: CheckStatus::Error,
|
status: CheckStatus::Error,
|
||||||
@@ -264,7 +264,7 @@ async fn check_gitlab(config: Option<&Config>) -> GitLabCheck {
|
|||||||
url: Some(config.gitlab.base_url.clone()),
|
url: Some(config.gitlab.base_url.clone()),
|
||||||
username: Some(user.username),
|
username: Some(user.username),
|
||||||
},
|
},
|
||||||
Err(GiError::GitLabAuthFailed) => GitLabCheck {
|
Err(LoreError::GitLabAuthFailed) => GitLabCheck {
|
||||||
result: CheckResult {
|
result: CheckResult {
|
||||||
status: CheckStatus::Error,
|
status: CheckStatus::Error,
|
||||||
message: Some("Authentication failed. Check your token.".to_string()),
|
message: Some("Authentication failed. Check your token.".to_string()),
|
||||||
|
|||||||
88
src/cli/commands/embed.rs
Normal file
88
src/cli/commands/embed.rs
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
//! Embed command: generate vector embeddings for documents via Ollama.
|
||||||
|
|
||||||
|
use console::style;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::core::db::create_connection;
|
||||||
|
use crate::core::error::Result;
|
||||||
|
use crate::core::paths::get_db_path;
|
||||||
|
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
|
||||||
|
use crate::embedding::pipeline::embed_documents;
|
||||||
|
use crate::Config;
|
||||||
|
|
||||||
|
/// Result of the embed command.
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct EmbedCommandResult {
|
||||||
|
pub embedded: usize,
|
||||||
|
pub failed: usize,
|
||||||
|
pub skipped: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the embed command.
|
||||||
|
pub async fn run_embed(
|
||||||
|
config: &Config,
|
||||||
|
retry_failed: bool,
|
||||||
|
) -> Result<EmbedCommandResult> {
|
||||||
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||||
|
let conn = create_connection(&db_path)?;
|
||||||
|
|
||||||
|
// Build Ollama config from user settings
|
||||||
|
let ollama_config = OllamaConfig {
|
||||||
|
base_url: config.embedding.base_url.clone(),
|
||||||
|
model: config.embedding.model.clone(),
|
||||||
|
..OllamaConfig::default()
|
||||||
|
};
|
||||||
|
let client = OllamaClient::new(ollama_config);
|
||||||
|
|
||||||
|
// Health check — fail fast if Ollama is down or model missing
|
||||||
|
client.health_check().await?;
|
||||||
|
|
||||||
|
// If retry_failed, clear errors so they become pending again
|
||||||
|
if retry_failed {
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE embedding_metadata SET last_error = NULL, attempt_count = 0
|
||||||
|
WHERE last_error IS NOT NULL",
|
||||||
|
[],
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let model_name = &config.embedding.model;
|
||||||
|
let result = embed_documents(&conn, &client, model_name, None).await?;
|
||||||
|
|
||||||
|
Ok(EmbedCommandResult {
|
||||||
|
embedded: result.embedded,
|
||||||
|
failed: result.failed,
|
||||||
|
skipped: result.skipped,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print human-readable output.
|
||||||
|
pub fn print_embed(result: &EmbedCommandResult) {
|
||||||
|
println!(
|
||||||
|
"{} Embedding complete",
|
||||||
|
style("done").green().bold(),
|
||||||
|
);
|
||||||
|
println!(" Embedded: {}", result.embedded);
|
||||||
|
if result.failed > 0 {
|
||||||
|
println!(" Failed: {}", style(result.failed).red());
|
||||||
|
}
|
||||||
|
if result.skipped > 0 {
|
||||||
|
println!(" Skipped: {}", result.skipped);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// JSON output.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct EmbedJsonOutput<'a> {
|
||||||
|
ok: bool,
|
||||||
|
data: &'a EmbedCommandResult,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print JSON robot-mode output.
|
||||||
|
pub fn print_embed_json(result: &EmbedCommandResult) {
|
||||||
|
let output = EmbedJsonOutput {
|
||||||
|
ok: true,
|
||||||
|
data: result,
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&output).unwrap());
|
||||||
|
}
|
||||||
205
src/cli/commands/generate_docs.rs
Normal file
205
src/cli/commands/generate_docs.rs
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
//! Generate searchable documents from ingested GitLab data.
|
||||||
|
|
||||||
|
use console::style;
|
||||||
|
use rusqlite::Connection;
|
||||||
|
use serde::Serialize;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::core::db::create_connection;
|
||||||
|
use crate::core::error::Result;
|
||||||
|
use crate::core::paths::get_db_path;
|
||||||
|
use crate::documents::{regenerate_dirty_documents, SourceType};
|
||||||
|
use crate::Config;
|
||||||
|
|
||||||
|
const FULL_MODE_CHUNK_SIZE: i64 = 2000;
|
||||||
|
|
||||||
|
/// Result of a generate-docs run.
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct GenerateDocsResult {
|
||||||
|
pub regenerated: usize,
|
||||||
|
pub unchanged: usize,
|
||||||
|
pub errored: usize,
|
||||||
|
pub seeded: usize,
|
||||||
|
pub full_mode: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the generate-docs pipeline.
|
||||||
|
///
|
||||||
|
/// Default mode: process only existing dirty_sources entries.
|
||||||
|
/// Full mode: seed dirty_sources with ALL entities, then drain.
|
||||||
|
pub fn run_generate_docs(
|
||||||
|
config: &Config,
|
||||||
|
full: bool,
|
||||||
|
project_filter: Option<&str>,
|
||||||
|
) -> Result<GenerateDocsResult> {
|
||||||
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||||
|
let conn = create_connection(&db_path)?;
|
||||||
|
let mut result = GenerateDocsResult {
|
||||||
|
full_mode: full,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
if full {
|
||||||
|
result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
|
||||||
|
result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
|
||||||
|
result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let regen = regenerate_dirty_documents(&conn)?;
|
||||||
|
result.regenerated = regen.regenerated;
|
||||||
|
result.unchanged = regen.unchanged;
|
||||||
|
result.errored = regen.errored;
|
||||||
|
|
||||||
|
if full {
|
||||||
|
// Optimize FTS index after bulk rebuild
|
||||||
|
let _ = conn.execute(
|
||||||
|
"INSERT INTO documents_fts(documents_fts) VALUES('optimize')",
|
||||||
|
[],
|
||||||
|
);
|
||||||
|
info!("FTS index optimized after full rebuild");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Seed dirty_sources with all entities of the given type using keyset pagination.
|
||||||
|
fn seed_dirty(
|
||||||
|
conn: &Connection,
|
||||||
|
source_type: SourceType,
|
||||||
|
project_filter: Option<&str>,
|
||||||
|
) -> Result<usize> {
|
||||||
|
let table = match source_type {
|
||||||
|
SourceType::Issue => "issues",
|
||||||
|
SourceType::MergeRequest => "merge_requests",
|
||||||
|
SourceType::Discussion => "discussions",
|
||||||
|
};
|
||||||
|
let type_str = source_type.as_str();
|
||||||
|
let now = chrono::Utc::now().timestamp_millis();
|
||||||
|
|
||||||
|
let mut total_seeded: usize = 0;
|
||||||
|
let mut last_id: i64 = 0;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let inserted = if let Some(project) = project_filter {
|
||||||
|
// Resolve project to ID for filtering
|
||||||
|
let project_id: Option<i64> = conn
|
||||||
|
.query_row(
|
||||||
|
"SELECT id FROM projects WHERE path_with_namespace = ?1 COLLATE NOCASE",
|
||||||
|
[project],
|
||||||
|
|row| row.get(0),
|
||||||
|
)
|
||||||
|
.ok();
|
||||||
|
|
||||||
|
let Some(pid) = project_id else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
&format!(
|
||||||
|
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
||||||
|
SELECT ?1, id, ?2, 0, NULL, NULL, NULL
|
||||||
|
FROM {table} WHERE id > ?3 AND project_id = ?4 ORDER BY id LIMIT ?5
|
||||||
|
ON CONFLICT(source_type, source_id) DO NOTHING"
|
||||||
|
),
|
||||||
|
rusqlite::params![type_str, now, last_id, pid, FULL_MODE_CHUNK_SIZE],
|
||||||
|
)?
|
||||||
|
} else {
|
||||||
|
conn.execute(
|
||||||
|
&format!(
|
||||||
|
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
||||||
|
SELECT ?1, id, ?2, 0, NULL, NULL, NULL
|
||||||
|
FROM {table} WHERE id > ?3 ORDER BY id LIMIT ?4
|
||||||
|
ON CONFLICT(source_type, source_id) DO NOTHING"
|
||||||
|
),
|
||||||
|
rusqlite::params![type_str, now, last_id, FULL_MODE_CHUNK_SIZE],
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
if inserted == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance keyset cursor to the max id within the chunk window
|
||||||
|
let max_id: i64 = conn.query_row(
|
||||||
|
&format!(
|
||||||
|
"SELECT MAX(id) FROM (SELECT id FROM {table} WHERE id > ?1 ORDER BY id LIMIT ?2)",
|
||||||
|
table = table
|
||||||
|
),
|
||||||
|
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|
||||||
|
|row| row.get(0),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
total_seeded += inserted;
|
||||||
|
last_id = max_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
source_type = type_str,
|
||||||
|
seeded = total_seeded,
|
||||||
|
"Seeded dirty_sources"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(total_seeded)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print human-readable output.
|
||||||
|
pub fn print_generate_docs(result: &GenerateDocsResult) {
|
||||||
|
let mode = if result.full_mode { "full" } else { "incremental" };
|
||||||
|
println!(
|
||||||
|
"{} Document generation complete ({})",
|
||||||
|
style("done").green().bold(),
|
||||||
|
mode
|
||||||
|
);
|
||||||
|
|
||||||
|
if result.full_mode {
|
||||||
|
println!(" Seeded: {}", result.seeded);
|
||||||
|
}
|
||||||
|
println!(" Regenerated: {}", result.regenerated);
|
||||||
|
println!(" Unchanged: {}", result.unchanged);
|
||||||
|
if result.errored > 0 {
|
||||||
|
println!(
|
||||||
|
" Errored: {}",
|
||||||
|
style(result.errored).red()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// JSON output structures.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct GenerateDocsJsonOutput {
|
||||||
|
ok: bool,
|
||||||
|
data: GenerateDocsJsonData,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct GenerateDocsJsonData {
|
||||||
|
mode: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
seeded: Option<usize>,
|
||||||
|
regenerated: usize,
|
||||||
|
unchanged: usize,
|
||||||
|
errored: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print JSON robot-mode output.
|
||||||
|
pub fn print_generate_docs_json(result: &GenerateDocsResult) {
|
||||||
|
let output = GenerateDocsJsonOutput {
|
||||||
|
ok: true,
|
||||||
|
data: GenerateDocsJsonData {
|
||||||
|
mode: if result.full_mode {
|
||||||
|
"full".to_string()
|
||||||
|
} else {
|
||||||
|
"incremental".to_string()
|
||||||
|
},
|
||||||
|
seeded: if result.full_mode {
|
||||||
|
Some(result.seeded)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
regenerated: result.regenerated,
|
||||||
|
unchanged: result.unchanged,
|
||||||
|
errored: result.errored,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&output).unwrap());
|
||||||
|
}
|
||||||
@@ -7,7 +7,7 @@ use serde::Serialize;
|
|||||||
|
|
||||||
use crate::Config;
|
use crate::Config;
|
||||||
use crate::core::db::create_connection;
|
use crate::core::db::create_connection;
|
||||||
use crate::core::error::{GiError, Result};
|
use crate::core::error::{LoreError, Result};
|
||||||
use crate::core::lock::{AppLock, LockOptions};
|
use crate::core::lock::{AppLock, LockOptions};
|
||||||
use crate::core::paths::get_db_path;
|
use crate::core::paths::get_db_path;
|
||||||
use crate::gitlab::GitLabClient;
|
use crate::gitlab::GitLabClient;
|
||||||
@@ -51,7 +51,7 @@ pub async fn run_ingest(
|
|||||||
) -> Result<IngestResult> {
|
) -> Result<IngestResult> {
|
||||||
// Validate resource type early
|
// Validate resource type early
|
||||||
if resource_type != "issues" && resource_type != "mrs" {
|
if resource_type != "issues" && resource_type != "mrs" {
|
||||||
return Err(GiError::Other(format!(
|
return Err(LoreError::Other(format!(
|
||||||
"Invalid resource type '{}'. Valid types: issues, mrs",
|
"Invalid resource type '{}'. Valid types: issues, mrs",
|
||||||
resource_type
|
resource_type
|
||||||
)));
|
)));
|
||||||
@@ -74,7 +74,7 @@ pub async fn run_ingest(
|
|||||||
lock.acquire(force)?;
|
lock.acquire(force)?;
|
||||||
|
|
||||||
// Get token from environment
|
// Get token from environment
|
||||||
let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| GiError::TokenNotSet {
|
let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet {
|
||||||
env_var: config.gitlab.token_env_var.clone(),
|
env_var: config.gitlab.token_env_var.clone(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -119,12 +119,12 @@ pub async fn run_ingest(
|
|||||||
|
|
||||||
if projects.is_empty() {
|
if projects.is_empty() {
|
||||||
if let Some(filter) = project_filter {
|
if let Some(filter) = project_filter {
|
||||||
return Err(GiError::Other(format!(
|
return Err(LoreError::Other(format!(
|
||||||
"Project '{}' not found in configuration",
|
"Project '{}' not found in configuration",
|
||||||
filter
|
filter
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
return Err(GiError::Other(
|
return Err(LoreError::Other(
|
||||||
"No projects configured. Run 'lore init' first.".to_string(),
|
"No projects configured. Run 'lore init' first.".to_string(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::fs;
|
|||||||
|
|
||||||
use crate::core::config::{MinimalConfig, MinimalGitLabConfig, ProjectConfig};
|
use crate::core::config::{MinimalConfig, MinimalGitLabConfig, ProjectConfig};
|
||||||
use crate::core::db::{create_connection, run_migrations};
|
use crate::core::db::{create_connection, run_migrations};
|
||||||
use crate::core::error::{GiError, Result};
|
use crate::core::error::{LoreError, Result};
|
||||||
use crate::core::paths::{get_config_path, get_data_dir};
|
use crate::core::paths::{get_config_path, get_data_dir};
|
||||||
use crate::gitlab::{GitLabClient, GitLabProject};
|
use crate::gitlab::{GitLabClient, GitLabProject};
|
||||||
|
|
||||||
@@ -45,32 +45,30 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
|
|||||||
let config_path = get_config_path(options.config_path.as_deref());
|
let config_path = get_config_path(options.config_path.as_deref());
|
||||||
let data_dir = get_data_dir();
|
let data_dir = get_data_dir();
|
||||||
|
|
||||||
// 1. Check if config exists
|
// 1. Check if config exists (force takes precedence over non_interactive)
|
||||||
if config_path.exists() {
|
if config_path.exists() && !options.force {
|
||||||
if options.non_interactive {
|
if options.non_interactive {
|
||||||
return Err(GiError::Other(format!(
|
return Err(LoreError::Other(format!(
|
||||||
"Config file exists at {}. Cannot proceed in non-interactive mode.",
|
"Config file exists at {}. Use --force to overwrite.",
|
||||||
config_path.display()
|
config_path.display()
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if !options.force {
|
return Err(LoreError::Other(
|
||||||
return Err(GiError::Other(
|
|
||||||
"User cancelled config overwrite.".to_string(),
|
"User cancelled config overwrite.".to_string(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Validate GitLab URL format
|
// 2. Validate GitLab URL format
|
||||||
if url::Url::parse(&inputs.gitlab_url).is_err() {
|
if url::Url::parse(&inputs.gitlab_url).is_err() {
|
||||||
return Err(GiError::Other(format!(
|
return Err(LoreError::Other(format!(
|
||||||
"Invalid GitLab URL: {}",
|
"Invalid GitLab URL: {}",
|
||||||
inputs.gitlab_url
|
inputs.gitlab_url
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Check token is set in environment
|
// 3. Check token is set in environment
|
||||||
let token = std::env::var(&inputs.token_env_var).map_err(|_| GiError::TokenNotSet {
|
let token = std::env::var(&inputs.token_env_var).map_err(|_| LoreError::TokenNotSet {
|
||||||
env_var: inputs.token_env_var.clone(),
|
env_var: inputs.token_env_var.clone(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -78,8 +76,8 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
|
|||||||
let client = GitLabClient::new(&inputs.gitlab_url, &token, None);
|
let client = GitLabClient::new(&inputs.gitlab_url, &token, None);
|
||||||
|
|
||||||
let gitlab_user = client.get_current_user().await.map_err(|e| {
|
let gitlab_user = client.get_current_user().await.map_err(|e| {
|
||||||
if matches!(e, GiError::GitLabAuthFailed) {
|
if matches!(e, LoreError::GitLabAuthFailed) {
|
||||||
GiError::Other(format!("Authentication failed for {}", inputs.gitlab_url))
|
LoreError::Other(format!("Authentication failed for {}", inputs.gitlab_url))
|
||||||
} else {
|
} else {
|
||||||
e
|
e
|
||||||
}
|
}
|
||||||
@@ -95,8 +93,8 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
|
|||||||
|
|
||||||
for project_path in &inputs.project_paths {
|
for project_path in &inputs.project_paths {
|
||||||
let project = client.get_project(project_path).await.map_err(|e| {
|
let project = client.get_project(project_path).await.map_err(|e| {
|
||||||
if matches!(e, GiError::GitLabNotFound { .. }) {
|
if matches!(e, LoreError::GitLabNotFound { .. }) {
|
||||||
GiError::Other(format!("Project not found: {project_path}"))
|
LoreError::Other(format!("Project not found: {project_path}"))
|
||||||
} else {
|
} else {
|
||||||
e
|
e
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,21 +3,33 @@
|
|||||||
pub mod auth_test;
|
pub mod auth_test;
|
||||||
pub mod count;
|
pub mod count;
|
||||||
pub mod doctor;
|
pub mod doctor;
|
||||||
|
pub mod embed;
|
||||||
|
pub mod generate_docs;
|
||||||
pub mod ingest;
|
pub mod ingest;
|
||||||
pub mod init;
|
pub mod init;
|
||||||
pub mod list;
|
pub mod list;
|
||||||
|
pub mod search;
|
||||||
pub mod show;
|
pub mod show;
|
||||||
|
pub mod stats;
|
||||||
|
pub mod sync;
|
||||||
pub mod sync_status;
|
pub mod sync_status;
|
||||||
|
|
||||||
pub use auth_test::run_auth_test;
|
pub use auth_test::run_auth_test;
|
||||||
pub use count::{print_count, print_count_json, run_count};
|
pub use count::{print_count, print_count_json, run_count};
|
||||||
pub use doctor::{print_doctor_results, run_doctor};
|
pub use doctor::{print_doctor_results, run_doctor};
|
||||||
|
pub use embed::{print_embed, print_embed_json, run_embed};
|
||||||
|
pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs};
|
||||||
|
pub use stats::{print_stats, print_stats_json, run_stats};
|
||||||
|
pub use search::{
|
||||||
|
print_search_results, print_search_results_json, run_search, SearchCliFilters, SearchResponse,
|
||||||
|
};
|
||||||
pub use ingest::{print_ingest_summary, print_ingest_summary_json, run_ingest};
|
pub use ingest::{print_ingest_summary, print_ingest_summary_json, run_ingest};
|
||||||
pub use init::{InitInputs, InitOptions, InitResult, run_init};
|
pub use init::{InitInputs, InitOptions, InitResult, run_init};
|
||||||
pub use list::{
|
pub use list::{
|
||||||
ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
|
ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
|
||||||
print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
|
print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
|
||||||
};
|
};
|
||||||
|
pub use sync::{print_sync, print_sync_json, run_sync, SyncOptions, SyncResult};
|
||||||
pub use show::{
|
pub use show::{
|
||||||
print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue,
|
print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue,
|
||||||
run_show_mr,
|
run_show_mr,
|
||||||
|
|||||||
402
src/cli/commands/search.rs
Normal file
402
src/cli/commands/search.rs
Normal file
@@ -0,0 +1,402 @@
|
|||||||
|
//! Search command: lexical (FTS5) search with filter support and single-query hydration.
|
||||||
|
|
||||||
|
use console::style;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::core::db::create_connection;
|
||||||
|
use crate::core::error::{LoreError, Result};
|
||||||
|
use crate::core::paths::get_db_path;
|
||||||
|
use crate::core::project::resolve_project;
|
||||||
|
use crate::core::time::{ms_to_iso, parse_since};
|
||||||
|
use crate::documents::SourceType;
|
||||||
|
use crate::search::{
|
||||||
|
apply_filters, get_result_snippet, rank_rrf, search_fts, FtsQueryMode, PathFilter,
|
||||||
|
SearchFilters,
|
||||||
|
};
|
||||||
|
use crate::Config;
|
||||||
|
|
||||||
|
/// Display-ready search result with all fields hydrated.
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct SearchResultDisplay {
|
||||||
|
pub document_id: i64,
|
||||||
|
pub source_type: String,
|
||||||
|
pub title: String,
|
||||||
|
pub url: Option<String>,
|
||||||
|
pub author: Option<String>,
|
||||||
|
pub created_at: Option<String>,
|
||||||
|
pub updated_at: Option<String>,
|
||||||
|
pub project_path: String,
|
||||||
|
pub labels: Vec<String>,
|
||||||
|
pub paths: Vec<String>,
|
||||||
|
pub snippet: String,
|
||||||
|
pub score: f64,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub explain: Option<ExplainData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ranking explanation for --explain output.
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct ExplainData {
|
||||||
|
pub vector_rank: Option<usize>,
|
||||||
|
pub fts_rank: Option<usize>,
|
||||||
|
pub rrf_score: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Search response wrapper.
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct SearchResponse {
|
||||||
|
pub query: String,
|
||||||
|
pub mode: String,
|
||||||
|
pub total_results: usize,
|
||||||
|
pub results: Vec<SearchResultDisplay>,
|
||||||
|
pub warnings: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build SearchFilters from CLI args.
|
||||||
|
pub struct SearchCliFilters {
|
||||||
|
pub source_type: Option<String>,
|
||||||
|
pub author: Option<String>,
|
||||||
|
pub project: Option<String>,
|
||||||
|
pub labels: Vec<String>,
|
||||||
|
pub path: Option<String>,
|
||||||
|
pub after: Option<String>,
|
||||||
|
pub updated_after: Option<String>,
|
||||||
|
pub limit: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run a lexical search query.
|
||||||
|
pub fn run_search(
|
||||||
|
config: &Config,
|
||||||
|
query: &str,
|
||||||
|
cli_filters: SearchCliFilters,
|
||||||
|
fts_mode: FtsQueryMode,
|
||||||
|
explain: bool,
|
||||||
|
) -> Result<SearchResponse> {
|
||||||
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||||
|
let conn = create_connection(&db_path)?;
|
||||||
|
|
||||||
|
// Check if any documents exist
|
||||||
|
let doc_count: i64 = conn
|
||||||
|
.query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0))
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
if doc_count == 0 {
|
||||||
|
return Ok(SearchResponse {
|
||||||
|
query: query.to_string(),
|
||||||
|
mode: "lexical".to_string(),
|
||||||
|
total_results: 0,
|
||||||
|
results: vec![],
|
||||||
|
warnings: vec![
|
||||||
|
"No documents indexed. Run 'lore generate-docs' first.".to_string()
|
||||||
|
],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build filters
|
||||||
|
let source_type = cli_filters
|
||||||
|
.source_type
|
||||||
|
.as_deref()
|
||||||
|
.and_then(SourceType::parse);
|
||||||
|
|
||||||
|
let project_id = cli_filters
|
||||||
|
.project
|
||||||
|
.as_deref()
|
||||||
|
.map(|p| resolve_project(&conn, p))
|
||||||
|
.transpose()?;
|
||||||
|
|
||||||
|
let after = cli_filters.after.as_deref().and_then(parse_since);
|
||||||
|
let updated_after = cli_filters.updated_after.as_deref().and_then(parse_since);
|
||||||
|
|
||||||
|
let path = cli_filters.path.as_deref().map(|p| {
|
||||||
|
if p.ends_with('/') {
|
||||||
|
PathFilter::Prefix(p.to_string())
|
||||||
|
} else {
|
||||||
|
PathFilter::Exact(p.to_string())
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let filters = SearchFilters {
|
||||||
|
source_type,
|
||||||
|
author: cli_filters.author,
|
||||||
|
project_id,
|
||||||
|
after,
|
||||||
|
updated_after,
|
||||||
|
labels: cli_filters.labels,
|
||||||
|
path,
|
||||||
|
limit: cli_filters.limit,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Adaptive recall: wider initial fetch when filters applied
|
||||||
|
let requested = filters.clamp_limit();
|
||||||
|
let top_k = if filters.has_any_filter() {
|
||||||
|
(requested * 50).max(200).min(1500)
|
||||||
|
} else {
|
||||||
|
(requested * 10).max(50).min(1500)
|
||||||
|
};
|
||||||
|
|
||||||
|
// FTS search
|
||||||
|
let fts_results = search_fts(&conn, query, top_k, fts_mode)?;
|
||||||
|
let fts_tuples: Vec<(i64, f64)> = fts_results
|
||||||
|
.iter()
|
||||||
|
.map(|r| (r.document_id, r.bm25_score))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Build snippet map before ranking
|
||||||
|
let snippet_map: std::collections::HashMap<i64, String> = fts_results
|
||||||
|
.iter()
|
||||||
|
.map(|r| (r.document_id, r.snippet.clone()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// RRF ranking (single-list for lexical mode)
|
||||||
|
let ranked = rank_rrf(&[], &fts_tuples);
|
||||||
|
let ranked_ids: Vec<i64> = ranked.iter().map(|r| r.document_id).collect();
|
||||||
|
|
||||||
|
// Apply post-retrieval filters
|
||||||
|
let filtered_ids = apply_filters(&conn, &ranked_ids, &filters)?;
|
||||||
|
|
||||||
|
if filtered_ids.is_empty() {
|
||||||
|
return Ok(SearchResponse {
|
||||||
|
query: query.to_string(),
|
||||||
|
mode: "lexical".to_string(),
|
||||||
|
total_results: 0,
|
||||||
|
results: vec![],
|
||||||
|
warnings: vec![],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hydrate results in single round-trip
|
||||||
|
let hydrated = hydrate_results(&conn, &filtered_ids)?;
|
||||||
|
|
||||||
|
// Build display results preserving filter order
|
||||||
|
let rrf_map: std::collections::HashMap<i64, &crate::search::RrfResult> = ranked
|
||||||
|
.iter()
|
||||||
|
.map(|r| (r.document_id, r))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
|
||||||
|
for row in &hydrated {
|
||||||
|
let rrf = rrf_map.get(&row.document_id);
|
||||||
|
let fts_snippet = snippet_map.get(&row.document_id).map(|s| s.as_str());
|
||||||
|
let snippet = get_result_snippet(fts_snippet, &row.content_text);
|
||||||
|
|
||||||
|
let explain_data = if explain {
|
||||||
|
rrf.map(|r| ExplainData {
|
||||||
|
vector_rank: r.vector_rank,
|
||||||
|
fts_rank: r.fts_rank,
|
||||||
|
rrf_score: r.rrf_score,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
results.push(SearchResultDisplay {
|
||||||
|
document_id: row.document_id,
|
||||||
|
source_type: row.source_type.clone(),
|
||||||
|
title: row.title.clone(),
|
||||||
|
url: row.url.clone(),
|
||||||
|
author: row.author.clone(),
|
||||||
|
created_at: row.created_at.map(ms_to_iso),
|
||||||
|
updated_at: row.updated_at.map(ms_to_iso),
|
||||||
|
project_path: row.project_path.clone(),
|
||||||
|
labels: row.labels.clone(),
|
||||||
|
paths: row.paths.clone(),
|
||||||
|
snippet,
|
||||||
|
score: rrf.map(|r| r.normalized_score).unwrap_or(0.0),
|
||||||
|
explain: explain_data,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(SearchResponse {
|
||||||
|
query: query.to_string(),
|
||||||
|
mode: "lexical".to_string(),
|
||||||
|
total_results: results.len(),
|
||||||
|
results,
|
||||||
|
warnings: vec![],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Raw row from hydration query.
|
||||||
|
struct HydratedRow {
|
||||||
|
document_id: i64,
|
||||||
|
source_type: String,
|
||||||
|
title: String,
|
||||||
|
url: Option<String>,
|
||||||
|
author: Option<String>,
|
||||||
|
created_at: Option<i64>,
|
||||||
|
updated_at: Option<i64>,
|
||||||
|
content_text: String,
|
||||||
|
project_path: String,
|
||||||
|
labels: Vec<String>,
|
||||||
|
paths: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Hydrate document IDs into full display rows in a single query.
|
||||||
|
///
|
||||||
|
/// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key.
|
||||||
|
/// Labels and paths fetched via correlated json_group_array subqueries.
|
||||||
|
fn hydrate_results(
|
||||||
|
conn: &rusqlite::Connection,
|
||||||
|
document_ids: &[i64],
|
||||||
|
) -> Result<Vec<HydratedRow>> {
|
||||||
|
if document_ids.is_empty() {
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let ids_json = serde_json::to_string(document_ids)
|
||||||
|
.map_err(|e| LoreError::Other(e.to_string()))?;
|
||||||
|
|
||||||
|
let sql = r#"
|
||||||
|
SELECT d.id, d.source_type, d.title, d.url, d.author_username,
|
||||||
|
d.created_at, d.updated_at, d.content_text,
|
||||||
|
p.path_with_namespace AS project_path,
|
||||||
|
(SELECT json_group_array(dl.label_name)
|
||||||
|
FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json,
|
||||||
|
(SELECT json_group_array(dp.path)
|
||||||
|
FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json
|
||||||
|
FROM json_each(?1) AS j
|
||||||
|
JOIN documents d ON d.id = j.value
|
||||||
|
JOIN projects p ON p.id = d.project_id
|
||||||
|
ORDER BY j.key
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let mut stmt = conn.prepare(sql)?;
|
||||||
|
let rows = stmt
|
||||||
|
.query_map([ids_json], |row| {
|
||||||
|
let labels_json: String = row.get(9)?;
|
||||||
|
let paths_json: String = row.get(10)?;
|
||||||
|
|
||||||
|
Ok(HydratedRow {
|
||||||
|
document_id: row.get(0)?,
|
||||||
|
source_type: row.get(1)?,
|
||||||
|
title: row.get(2)?,
|
||||||
|
url: row.get(3)?,
|
||||||
|
author: row.get(4)?,
|
||||||
|
created_at: row.get(5)?,
|
||||||
|
updated_at: row.get(6)?,
|
||||||
|
content_text: row.get(7)?,
|
||||||
|
project_path: row.get(8)?,
|
||||||
|
labels: parse_json_array(&labels_json),
|
||||||
|
paths: parse_json_array(&paths_json),
|
||||||
|
})
|
||||||
|
})?
|
||||||
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||||
|
|
||||||
|
Ok(rows)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a JSON array string into a Vec<String>, filtering out null/empty.
|
||||||
|
fn parse_json_array(json: &str) -> Vec<String> {
|
||||||
|
serde_json::from_str::<Vec<serde_json::Value>>(json)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|v| v.as_str().map(|s| s.to_string()))
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print human-readable search results.
|
||||||
|
pub fn print_search_results(response: &SearchResponse) {
|
||||||
|
if !response.warnings.is_empty() {
|
||||||
|
for w in &response.warnings {
|
||||||
|
eprintln!("{} {}", style("Warning:").yellow(), w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if response.results.is_empty() {
|
||||||
|
println!(
|
||||||
|
"No results found for '{}'",
|
||||||
|
style(&response.query).bold()
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"{} results for '{}' ({})",
|
||||||
|
response.total_results,
|
||||||
|
style(&response.query).bold(),
|
||||||
|
response.mode
|
||||||
|
);
|
||||||
|
println!();
|
||||||
|
|
||||||
|
for (i, result) in response.results.iter().enumerate() {
|
||||||
|
let type_prefix = match result.source_type.as_str() {
|
||||||
|
"issue" => "Issue",
|
||||||
|
"merge_request" => "MR",
|
||||||
|
"discussion" => "Discussion",
|
||||||
|
_ => &result.source_type,
|
||||||
|
};
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"[{}] {} - {} (score: {:.2})",
|
||||||
|
i + 1,
|
||||||
|
style(type_prefix).cyan(),
|
||||||
|
result.title,
|
||||||
|
result.score
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(ref url) = result.url {
|
||||||
|
println!(" {}", style(url).dim());
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(
|
||||||
|
" {} | {}",
|
||||||
|
style(&result.project_path).dim(),
|
||||||
|
result
|
||||||
|
.author
|
||||||
|
.as_deref()
|
||||||
|
.map(|a| format!("@{}", a))
|
||||||
|
.unwrap_or_default()
|
||||||
|
);
|
||||||
|
|
||||||
|
if !result.labels.is_empty() {
|
||||||
|
println!(
|
||||||
|
" Labels: {}",
|
||||||
|
result.labels.join(", ")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip HTML tags from snippet for terminal display
|
||||||
|
let clean_snippet = result
|
||||||
|
.snippet
|
||||||
|
.replace("<mark>", "")
|
||||||
|
.replace("</mark>", "");
|
||||||
|
println!(" {}", style(clean_snippet).dim());
|
||||||
|
|
||||||
|
if let Some(ref explain) = result.explain {
|
||||||
|
println!(
|
||||||
|
" {} fts_rank={} rrf_score={:.6}",
|
||||||
|
style("[explain]").magenta(),
|
||||||
|
explain
|
||||||
|
.fts_rank
|
||||||
|
.map(|r| r.to_string())
|
||||||
|
.unwrap_or_else(|| "-".into()),
|
||||||
|
explain.rrf_score
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// JSON output structures.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct SearchJsonOutput<'a> {
|
||||||
|
ok: bool,
|
||||||
|
data: &'a SearchResponse,
|
||||||
|
meta: SearchMeta,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct SearchMeta {
|
||||||
|
elapsed_ms: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print JSON robot-mode output.
|
||||||
|
pub fn print_search_results_json(response: &SearchResponse, elapsed_ms: u64) {
|
||||||
|
let output = SearchJsonOutput {
|
||||||
|
ok: true,
|
||||||
|
data: response,
|
||||||
|
meta: SearchMeta { elapsed_ms },
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&output).unwrap());
|
||||||
|
}
|
||||||
@@ -6,7 +6,7 @@ use serde::Serialize;
|
|||||||
|
|
||||||
use crate::Config;
|
use crate::Config;
|
||||||
use crate::core::db::create_connection;
|
use crate::core::db::create_connection;
|
||||||
use crate::core::error::{GiError, Result};
|
use crate::core::error::{LoreError, Result};
|
||||||
use crate::core::paths::get_db_path;
|
use crate::core::paths::get_db_path;
|
||||||
use crate::core::time::ms_to_iso;
|
use crate::core::time::ms_to_iso;
|
||||||
|
|
||||||
@@ -188,11 +188,11 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
|
|||||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||||
|
|
||||||
match issues.len() {
|
match issues.len() {
|
||||||
0 => Err(GiError::NotFound(format!("Issue #{} not found", iid))),
|
0 => Err(LoreError::NotFound(format!("Issue #{} not found", iid))),
|
||||||
1 => Ok(issues.into_iter().next().unwrap()),
|
1 => Ok(issues.into_iter().next().unwrap()),
|
||||||
_ => {
|
_ => {
|
||||||
let projects: Vec<String> = issues.iter().map(|i| i.project_path.clone()).collect();
|
let projects: Vec<String> = issues.iter().map(|i| i.project_path.clone()).collect();
|
||||||
Err(GiError::Ambiguous(format!(
|
Err(LoreError::Ambiguous(format!(
|
||||||
"Issue #{} exists in multiple projects: {}. Use --project to specify.",
|
"Issue #{} exists in multiple projects: {}. Use --project to specify.",
|
||||||
iid,
|
iid,
|
||||||
projects.join(", ")
|
projects.join(", ")
|
||||||
@@ -386,11 +386,11 @@ fn find_mr(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Result<
|
|||||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||||
|
|
||||||
match mrs.len() {
|
match mrs.len() {
|
||||||
0 => Err(GiError::NotFound(format!("MR !{} not found", iid))),
|
0 => Err(LoreError::NotFound(format!("MR !{} not found", iid))),
|
||||||
1 => Ok(mrs.into_iter().next().unwrap()),
|
1 => Ok(mrs.into_iter().next().unwrap()),
|
||||||
_ => {
|
_ => {
|
||||||
let projects: Vec<String> = mrs.iter().map(|m| m.project_path.clone()).collect();
|
let projects: Vec<String> = mrs.iter().map(|m| m.project_path.clone()).collect();
|
||||||
Err(GiError::Ambiguous(format!(
|
Err(LoreError::Ambiguous(format!(
|
||||||
"MR !{} exists in multiple projects: {}. Use --project to specify.",
|
"MR !{} exists in multiple projects: {}. Use --project to specify.",
|
||||||
iid,
|
iid,
|
||||||
projects.join(", ")
|
projects.join(", ")
|
||||||
|
|||||||
348
src/cli/commands/stats.rs
Normal file
348
src/cli/commands/stats.rs
Normal file
@@ -0,0 +1,348 @@
|
|||||||
|
//! Stats command: document counts, embedding coverage, queue status, integrity checks.
|
||||||
|
|
||||||
|
use console::style;
|
||||||
|
use rusqlite::Connection;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::core::db::create_connection;
|
||||||
|
use crate::core::error::Result;
|
||||||
|
use crate::core::paths::get_db_path;
|
||||||
|
use crate::Config;
|
||||||
|
|
||||||
|
/// Result of the stats command.
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct StatsResult {
|
||||||
|
pub documents: DocumentStats,
|
||||||
|
pub embeddings: EmbeddingStats,
|
||||||
|
pub fts: FtsStats,
|
||||||
|
pub queues: QueueStats,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub integrity: Option<IntegrityResult>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct DocumentStats {
|
||||||
|
pub total: i64,
|
||||||
|
pub issues: i64,
|
||||||
|
pub merge_requests: i64,
|
||||||
|
pub discussions: i64,
|
||||||
|
pub truncated: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct EmbeddingStats {
|
||||||
|
pub embedded_documents: i64,
|
||||||
|
pub total_chunks: i64,
|
||||||
|
pub coverage_pct: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct FtsStats {
|
||||||
|
pub indexed: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct QueueStats {
|
||||||
|
pub dirty_sources: i64,
|
||||||
|
pub dirty_sources_failed: i64,
|
||||||
|
pub pending_discussion_fetches: i64,
|
||||||
|
pub pending_discussion_fetches_failed: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct IntegrityResult {
|
||||||
|
pub ok: bool,
|
||||||
|
pub fts_doc_mismatch: bool,
|
||||||
|
pub orphan_embeddings: i64,
|
||||||
|
pub stale_metadata: i64,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub repair: Option<RepairResult>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct RepairResult {
|
||||||
|
pub fts_rebuilt: bool,
|
||||||
|
pub orphans_deleted: i64,
|
||||||
|
pub stale_cleared: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the stats command.
|
||||||
|
pub fn run_stats(
|
||||||
|
config: &Config,
|
||||||
|
check: bool,
|
||||||
|
repair: bool,
|
||||||
|
) -> Result<StatsResult> {
|
||||||
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||||
|
let conn = create_connection(&db_path)?;
|
||||||
|
|
||||||
|
let mut result = StatsResult::default();
|
||||||
|
|
||||||
|
// Document counts
|
||||||
|
result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?;
|
||||||
|
result.documents.issues =
|
||||||
|
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'")?;
|
||||||
|
result.documents.merge_requests =
|
||||||
|
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'")?;
|
||||||
|
result.documents.discussions =
|
||||||
|
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'")?;
|
||||||
|
result.documents.truncated =
|
||||||
|
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE is_truncated = 1")?;
|
||||||
|
|
||||||
|
// Embedding stats — skip gracefully if table doesn't exist (Gate A only)
|
||||||
|
if table_exists(&conn, "embedding_metadata") {
|
||||||
|
let embedded = count_query(
|
||||||
|
&conn,
|
||||||
|
"SELECT COUNT(DISTINCT document_id) FROM embedding_metadata WHERE last_error IS NULL",
|
||||||
|
)?;
|
||||||
|
let chunks = count_query(
|
||||||
|
&conn,
|
||||||
|
"SELECT COUNT(*) FROM embedding_metadata WHERE last_error IS NULL",
|
||||||
|
)?;
|
||||||
|
result.embeddings.embedded_documents = embedded;
|
||||||
|
result.embeddings.total_chunks = chunks;
|
||||||
|
result.embeddings.coverage_pct = if result.documents.total > 0 {
|
||||||
|
(embedded as f64 / result.documents.total as f64) * 100.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// FTS stats
|
||||||
|
result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?;
|
||||||
|
|
||||||
|
// Queue stats
|
||||||
|
result.queues.dirty_sources =
|
||||||
|
count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL")?;
|
||||||
|
result.queues.dirty_sources_failed =
|
||||||
|
count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL")?;
|
||||||
|
|
||||||
|
if table_exists(&conn, "pending_discussion_fetches") {
|
||||||
|
result.queues.pending_discussion_fetches = count_query(
|
||||||
|
&conn,
|
||||||
|
"SELECT COUNT(*) FROM pending_discussion_fetches WHERE last_error IS NULL",
|
||||||
|
)?;
|
||||||
|
result.queues.pending_discussion_fetches_failed = count_query(
|
||||||
|
&conn,
|
||||||
|
"SELECT COUNT(*) FROM pending_discussion_fetches WHERE last_error IS NOT NULL",
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Integrity check
|
||||||
|
if check {
|
||||||
|
let mut integrity = IntegrityResult::default();
|
||||||
|
|
||||||
|
// FTS/doc count mismatch
|
||||||
|
integrity.fts_doc_mismatch = result.fts.indexed != result.documents.total;
|
||||||
|
|
||||||
|
// Orphan embeddings (rowid/1000 should match a document ID)
|
||||||
|
if table_exists(&conn, "embeddings") {
|
||||||
|
integrity.orphan_embeddings = count_query(
|
||||||
|
&conn,
|
||||||
|
"SELECT COUNT(*) FROM embedding_metadata em
|
||||||
|
WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = em.document_id)",
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stale metadata (document_hash != current content_hash)
|
||||||
|
if table_exists(&conn, "embedding_metadata") {
|
||||||
|
integrity.stale_metadata = count_query(
|
||||||
|
&conn,
|
||||||
|
"SELECT COUNT(*) FROM embedding_metadata em
|
||||||
|
JOIN documents d ON d.id = em.document_id
|
||||||
|
WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash",
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
integrity.ok = !integrity.fts_doc_mismatch
|
||||||
|
&& integrity.orphan_embeddings == 0
|
||||||
|
&& integrity.stale_metadata == 0;
|
||||||
|
|
||||||
|
// Repair
|
||||||
|
if repair {
|
||||||
|
let mut repair_result = RepairResult::default();
|
||||||
|
|
||||||
|
if integrity.fts_doc_mismatch {
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO documents_fts(documents_fts) VALUES('rebuild')",
|
||||||
|
[],
|
||||||
|
)?;
|
||||||
|
repair_result.fts_rebuilt = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if integrity.orphan_embeddings > 0 && table_exists(&conn, "embedding_metadata") {
|
||||||
|
let deleted = conn.execute(
|
||||||
|
"DELETE FROM embedding_metadata
|
||||||
|
WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)",
|
||||||
|
[],
|
||||||
|
)?;
|
||||||
|
repair_result.orphans_deleted = deleted as i64;
|
||||||
|
|
||||||
|
// Also clean orphaned vectors if vec0 table exists
|
||||||
|
if table_exists(&conn, "embeddings") {
|
||||||
|
let _ = conn.execute(
|
||||||
|
"DELETE FROM embeddings
|
||||||
|
WHERE rowid / 1000 NOT IN (SELECT id FROM documents)",
|
||||||
|
[],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if integrity.stale_metadata > 0 && table_exists(&conn, "embedding_metadata") {
|
||||||
|
let cleared = conn.execute(
|
||||||
|
"DELETE FROM embedding_metadata
|
||||||
|
WHERE document_id IN (
|
||||||
|
SELECT em.document_id FROM embedding_metadata em
|
||||||
|
JOIN documents d ON d.id = em.document_id
|
||||||
|
WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash
|
||||||
|
)",
|
||||||
|
[],
|
||||||
|
)?;
|
||||||
|
repair_result.stale_cleared = cleared as i64;
|
||||||
|
}
|
||||||
|
|
||||||
|
integrity.repair = Some(repair_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
result.integrity = Some(integrity);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn count_query(conn: &Connection, sql: &str) -> Result<i64> {
|
||||||
|
let count: i64 = conn
|
||||||
|
.query_row(sql, [], |row| row.get(0))
|
||||||
|
.unwrap_or(0);
|
||||||
|
Ok(count)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn table_exists(conn: &Connection, table: &str) -> bool {
|
||||||
|
conn.query_row(
|
||||||
|
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
|
||||||
|
[table],
|
||||||
|
|row| row.get::<_, i64>(0),
|
||||||
|
)
|
||||||
|
.unwrap_or(0)
|
||||||
|
> 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print human-readable stats.
|
||||||
|
pub fn print_stats(result: &StatsResult) {
|
||||||
|
println!("{}", style("Documents").cyan().bold());
|
||||||
|
println!(" Total: {}", result.documents.total);
|
||||||
|
println!(" Issues: {}", result.documents.issues);
|
||||||
|
println!(" Merge Requests: {}", result.documents.merge_requests);
|
||||||
|
println!(" Discussions: {}", result.documents.discussions);
|
||||||
|
if result.documents.truncated > 0 {
|
||||||
|
println!(" Truncated: {}", style(result.documents.truncated).yellow());
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
println!("{}", style("Search Index").cyan().bold());
|
||||||
|
println!(" FTS indexed: {}", result.fts.indexed);
|
||||||
|
println!(
|
||||||
|
" Embedding coverage: {:.1}% ({}/{})",
|
||||||
|
result.embeddings.coverage_pct,
|
||||||
|
result.embeddings.embedded_documents,
|
||||||
|
result.documents.total
|
||||||
|
);
|
||||||
|
if result.embeddings.total_chunks > 0 {
|
||||||
|
println!(" Total chunks: {}", result.embeddings.total_chunks);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
println!("{}", style("Queues").cyan().bold());
|
||||||
|
println!(" Dirty sources: {} pending, {} failed",
|
||||||
|
result.queues.dirty_sources,
|
||||||
|
result.queues.dirty_sources_failed
|
||||||
|
);
|
||||||
|
println!(" Discussion fetch: {} pending, {} failed",
|
||||||
|
result.queues.pending_discussion_fetches,
|
||||||
|
result.queues.pending_discussion_fetches_failed
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(ref integrity) = result.integrity {
|
||||||
|
println!();
|
||||||
|
let status = if integrity.ok {
|
||||||
|
style("OK").green().bold()
|
||||||
|
} else {
|
||||||
|
style("ISSUES FOUND").red().bold()
|
||||||
|
};
|
||||||
|
println!("{} Integrity: {}", style("Check").cyan().bold(), status);
|
||||||
|
|
||||||
|
if integrity.fts_doc_mismatch {
|
||||||
|
println!(" {} FTS/document count mismatch", style("!").red());
|
||||||
|
}
|
||||||
|
if integrity.orphan_embeddings > 0 {
|
||||||
|
println!(
|
||||||
|
" {} {} orphan embeddings",
|
||||||
|
style("!").red(),
|
||||||
|
integrity.orphan_embeddings
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if integrity.stale_metadata > 0 {
|
||||||
|
println!(
|
||||||
|
" {} {} stale embedding metadata",
|
||||||
|
style("!").red(),
|
||||||
|
integrity.stale_metadata
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ref repair) = integrity.repair {
|
||||||
|
println!();
|
||||||
|
println!("{}", style("Repair").cyan().bold());
|
||||||
|
if repair.fts_rebuilt {
|
||||||
|
println!(" {} FTS index rebuilt", style("fixed").green());
|
||||||
|
}
|
||||||
|
if repair.orphans_deleted > 0 {
|
||||||
|
println!(
|
||||||
|
" {} {} orphan embeddings deleted",
|
||||||
|
style("fixed").green(),
|
||||||
|
repair.orphans_deleted
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if repair.stale_cleared > 0 {
|
||||||
|
println!(
|
||||||
|
" {} {} stale metadata entries cleared",
|
||||||
|
style("fixed").green(),
|
||||||
|
repair.stale_cleared
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if !repair.fts_rebuilt && repair.orphans_deleted == 0 && repair.stale_cleared == 0 {
|
||||||
|
println!(" No issues to repair.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// JSON output structures.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct StatsJsonOutput {
|
||||||
|
ok: bool,
|
||||||
|
data: StatsResult,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print JSON robot-mode output.
|
||||||
|
pub fn print_stats_json(result: &StatsResult) {
|
||||||
|
let output = StatsJsonOutput {
|
||||||
|
ok: true,
|
||||||
|
data: StatsResult {
|
||||||
|
documents: DocumentStats { ..*&result.documents },
|
||||||
|
embeddings: EmbeddingStats { ..*&result.embeddings },
|
||||||
|
fts: FtsStats { ..*&result.fts },
|
||||||
|
queues: QueueStats { ..*&result.queues },
|
||||||
|
integrity: result.integrity.as_ref().map(|i| IntegrityResult {
|
||||||
|
ok: i.ok,
|
||||||
|
fts_doc_mismatch: i.fts_doc_mismatch,
|
||||||
|
orphan_embeddings: i.orphan_embeddings,
|
||||||
|
stale_metadata: i.stale_metadata,
|
||||||
|
repair: i.repair.as_ref().map(|r| RepairResult {
|
||||||
|
fts_rebuilt: r.fts_rebuilt,
|
||||||
|
orphans_deleted: r.orphans_deleted,
|
||||||
|
stale_cleared: r.stale_cleared,
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&output).unwrap());
|
||||||
|
}
|
||||||
124
src/cli/commands/sync.rs
Normal file
124
src/cli/commands/sync.rs
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
//! Sync command: unified orchestrator for ingest -> generate-docs -> embed.
|
||||||
|
|
||||||
|
use console::style;
|
||||||
|
use serde::Serialize;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
use crate::Config;
|
||||||
|
use crate::core::error::Result;
|
||||||
|
|
||||||
|
use super::embed::run_embed;
|
||||||
|
use super::generate_docs::run_generate_docs;
|
||||||
|
use super::ingest::run_ingest;
|
||||||
|
|
||||||
|
/// Options for the sync command.
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct SyncOptions {
|
||||||
|
pub full: bool,
|
||||||
|
pub force: bool,
|
||||||
|
pub no_embed: bool,
|
||||||
|
pub no_docs: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result of the sync command.
|
||||||
|
#[derive(Debug, Default, Serialize)]
|
||||||
|
pub struct SyncResult {
|
||||||
|
pub issues_updated: usize,
|
||||||
|
pub mrs_updated: usize,
|
||||||
|
pub discussions_fetched: usize,
|
||||||
|
pub documents_regenerated: usize,
|
||||||
|
pub documents_embedded: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the full sync pipeline: ingest -> generate-docs -> embed.
|
||||||
|
pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResult> {
|
||||||
|
let mut result = SyncResult::default();
|
||||||
|
|
||||||
|
// Stage 1: Ingest issues
|
||||||
|
info!("Sync stage 1/4: ingesting issues");
|
||||||
|
let issues_result = run_ingest(config, "issues", None, options.force, options.full, true).await?;
|
||||||
|
result.issues_updated = issues_result.issues_upserted;
|
||||||
|
result.discussions_fetched += issues_result.discussions_fetched;
|
||||||
|
|
||||||
|
// Stage 2: Ingest MRs
|
||||||
|
info!("Sync stage 2/4: ingesting merge requests");
|
||||||
|
let mrs_result = run_ingest(config, "mrs", None, options.force, options.full, true).await?;
|
||||||
|
result.mrs_updated = mrs_result.mrs_upserted;
|
||||||
|
result.discussions_fetched += mrs_result.discussions_fetched;
|
||||||
|
|
||||||
|
// Stage 3: Generate documents (unless --no-docs)
|
||||||
|
if options.no_docs {
|
||||||
|
info!("Sync stage 3/4: skipping document generation (--no-docs)");
|
||||||
|
} else {
|
||||||
|
info!("Sync stage 3/4: generating documents");
|
||||||
|
let docs_result = run_generate_docs(config, false, None)?;
|
||||||
|
result.documents_regenerated = docs_result.regenerated;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stage 4: Embed documents (unless --no-embed)
|
||||||
|
if options.no_embed {
|
||||||
|
info!("Sync stage 4/4: skipping embedding (--no-embed)");
|
||||||
|
} else {
|
||||||
|
info!("Sync stage 4/4: embedding documents");
|
||||||
|
match run_embed(config, false).await {
|
||||||
|
Ok(embed_result) => {
|
||||||
|
result.documents_embedded = embed_result.embedded;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
// Graceful degradation: Ollama down is a warning, not an error
|
||||||
|
warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
issues = result.issues_updated,
|
||||||
|
mrs = result.mrs_updated,
|
||||||
|
discussions = result.discussions_fetched,
|
||||||
|
docs = result.documents_regenerated,
|
||||||
|
embedded = result.documents_embedded,
|
||||||
|
"Sync pipeline complete"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print human-readable sync summary.
|
||||||
|
pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
|
||||||
|
println!(
|
||||||
|
"{} Sync complete:",
|
||||||
|
style("done").green().bold(),
|
||||||
|
);
|
||||||
|
println!(" Issues updated: {}", result.issues_updated);
|
||||||
|
println!(" MRs updated: {}", result.mrs_updated);
|
||||||
|
println!(" Discussions fetched: {}", result.discussions_fetched);
|
||||||
|
println!(" Documents regenerated: {}", result.documents_regenerated);
|
||||||
|
println!(" Documents embedded: {}", result.documents_embedded);
|
||||||
|
println!(
|
||||||
|
" Elapsed: {:.1}s",
|
||||||
|
elapsed.as_secs_f64()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// JSON output for sync.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct SyncJsonOutput<'a> {
|
||||||
|
ok: bool,
|
||||||
|
data: &'a SyncResult,
|
||||||
|
meta: SyncMeta,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct SyncMeta {
|
||||||
|
elapsed_ms: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print JSON robot-mode sync output.
|
||||||
|
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64) {
|
||||||
|
let output = SyncJsonOutput {
|
||||||
|
ok: true,
|
||||||
|
data: result,
|
||||||
|
meta: SyncMeta { elapsed_ms },
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&output).unwrap());
|
||||||
|
}
|
||||||
138
src/cli/mod.rs
138
src/cli/mod.rs
@@ -69,6 +69,18 @@ pub enum Commands {
|
|||||||
/// Fail if prompts would be shown
|
/// Fail if prompts would be shown
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
non_interactive: bool,
|
non_interactive: bool,
|
||||||
|
|
||||||
|
/// GitLab base URL (required in robot mode)
|
||||||
|
#[arg(long)]
|
||||||
|
gitlab_url: Option<String>,
|
||||||
|
|
||||||
|
/// Environment variable name holding GitLab token (required in robot mode)
|
||||||
|
#[arg(long)]
|
||||||
|
token_env_var: Option<String>,
|
||||||
|
|
||||||
|
/// Comma-separated project paths (required in robot mode)
|
||||||
|
#[arg(long)]
|
||||||
|
projects: Option<String>,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Create timestamped database backup
|
/// Create timestamped database backup
|
||||||
@@ -81,9 +93,32 @@ pub enum Commands {
|
|||||||
yes: bool,
|
yes: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Search indexed documents
|
||||||
|
Search(SearchArgs),
|
||||||
|
|
||||||
|
/// Show document and index statistics
|
||||||
|
Stats(StatsArgs),
|
||||||
|
|
||||||
|
/// Generate searchable documents from ingested data
|
||||||
|
#[command(name = "generate-docs")]
|
||||||
|
GenerateDocs(GenerateDocsArgs),
|
||||||
|
|
||||||
|
/// Generate vector embeddings for documents via Ollama
|
||||||
|
Embed(EmbedArgs),
|
||||||
|
|
||||||
|
/// Run full sync pipeline: ingest -> generate-docs -> embed
|
||||||
|
Sync(SyncArgs),
|
||||||
|
|
||||||
/// Run pending database migrations
|
/// Run pending database migrations
|
||||||
Migrate,
|
Migrate,
|
||||||
|
|
||||||
|
/// Quick health check: config, database, schema version
|
||||||
|
Health,
|
||||||
|
|
||||||
|
/// Machine-readable command manifest for agent self-discovery
|
||||||
|
#[command(name = "robot-docs")]
|
||||||
|
RobotDocs,
|
||||||
|
|
||||||
// --- Hidden backward-compat aliases ---
|
// --- Hidden backward-compat aliases ---
|
||||||
/// List issues or MRs (deprecated: use 'lore issues' or 'lore mrs')
|
/// List issues or MRs (deprecated: use 'lore issues' or 'lore mrs')
|
||||||
#[command(hide = true)]
|
#[command(hide = true)]
|
||||||
@@ -299,6 +334,109 @@ pub struct IngestArgs {
|
|||||||
pub full: bool,
|
pub full: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Arguments for `lore stats`
|
||||||
|
#[derive(Parser)]
|
||||||
|
pub struct StatsArgs {
|
||||||
|
/// Run integrity checks
|
||||||
|
#[arg(long)]
|
||||||
|
pub check: bool,
|
||||||
|
|
||||||
|
/// Repair integrity issues (requires --check)
|
||||||
|
#[arg(long, requires = "check")]
|
||||||
|
pub repair: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Arguments for `lore search <QUERY>`
|
||||||
|
#[derive(Parser)]
|
||||||
|
pub struct SearchArgs {
|
||||||
|
/// Search query string
|
||||||
|
pub query: String,
|
||||||
|
|
||||||
|
/// Search mode (lexical, hybrid, semantic)
|
||||||
|
#[arg(long, default_value = "hybrid")]
|
||||||
|
pub mode: String,
|
||||||
|
|
||||||
|
/// Filter by source type (issue, mr, discussion)
|
||||||
|
#[arg(long = "type", value_name = "TYPE")]
|
||||||
|
pub source_type: Option<String>,
|
||||||
|
|
||||||
|
/// Filter by author username
|
||||||
|
#[arg(long)]
|
||||||
|
pub author: Option<String>,
|
||||||
|
|
||||||
|
/// Filter by project path
|
||||||
|
#[arg(short = 'p', long)]
|
||||||
|
pub project: Option<String>,
|
||||||
|
|
||||||
|
/// Filter by label (repeatable, AND logic)
|
||||||
|
#[arg(long, action = clap::ArgAction::Append)]
|
||||||
|
pub label: Vec<String>,
|
||||||
|
|
||||||
|
/// Filter by file path (trailing / for prefix match)
|
||||||
|
#[arg(long)]
|
||||||
|
pub path: Option<String>,
|
||||||
|
|
||||||
|
/// Filter by created after (7d, 2w, or YYYY-MM-DD)
|
||||||
|
#[arg(long)]
|
||||||
|
pub after: Option<String>,
|
||||||
|
|
||||||
|
/// Filter by updated after (7d, 2w, or YYYY-MM-DD)
|
||||||
|
#[arg(long = "updated-after")]
|
||||||
|
pub updated_after: Option<String>,
|
||||||
|
|
||||||
|
/// Maximum results (default 20, max 100)
|
||||||
|
#[arg(short = 'n', long = "limit", default_value = "20")]
|
||||||
|
pub limit: usize,
|
||||||
|
|
||||||
|
/// Show ranking explanation per result
|
||||||
|
#[arg(long)]
|
||||||
|
pub explain: bool,
|
||||||
|
|
||||||
|
/// FTS query mode: safe (default) or raw
|
||||||
|
#[arg(long = "fts-mode", default_value = "safe")]
|
||||||
|
pub fts_mode: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Arguments for `lore generate-docs`
|
||||||
|
#[derive(Parser)]
|
||||||
|
pub struct GenerateDocsArgs {
|
||||||
|
/// Full rebuild: seed all entities into dirty queue, then drain
|
||||||
|
#[arg(long)]
|
||||||
|
pub full: bool,
|
||||||
|
|
||||||
|
/// Filter to single project
|
||||||
|
#[arg(short = 'p', long)]
|
||||||
|
pub project: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Arguments for `lore sync`
|
||||||
|
#[derive(Parser)]
|
||||||
|
pub struct SyncArgs {
|
||||||
|
/// Reset cursors, fetch everything
|
||||||
|
#[arg(long)]
|
||||||
|
pub full: bool,
|
||||||
|
|
||||||
|
/// Override stale lock
|
||||||
|
#[arg(long)]
|
||||||
|
pub force: bool,
|
||||||
|
|
||||||
|
/// Skip embedding step
|
||||||
|
#[arg(long)]
|
||||||
|
pub no_embed: bool,
|
||||||
|
|
||||||
|
/// Skip document regeneration
|
||||||
|
#[arg(long)]
|
||||||
|
pub no_docs: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Arguments for `lore embed`
|
||||||
|
#[derive(Parser)]
|
||||||
|
pub struct EmbedArgs {
|
||||||
|
/// Retry previously failed embeddings
|
||||||
|
#[arg(long)]
|
||||||
|
pub retry_failed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
/// Arguments for `lore count <ENTITY>`
|
/// Arguments for `lore count <ENTITY>`
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
pub struct CountArgs {
|
pub struct CountArgs {
|
||||||
|
|||||||
576
src/main.rs
576
src/main.rs
@@ -10,17 +10,23 @@ use tracing_subscriber::util::SubscriberInitExt;
|
|||||||
|
|
||||||
use lore::Config;
|
use lore::Config;
|
||||||
use lore::cli::commands::{
|
use lore::cli::commands::{
|
||||||
InitInputs, InitOptions, ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser,
|
InitInputs, InitOptions, InitResult, ListFilters, MrListFilters, SearchCliFilters, open_issue_in_browser,
|
||||||
print_count, print_count_json, print_doctor_results, print_ingest_summary,
|
open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_generate_docs,
|
||||||
print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs,
|
print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues,
|
||||||
print_list_mrs_json, print_show_issue, print_show_issue_json, print_show_mr,
|
print_list_issues_json, print_list_mrs, print_list_mrs_json, print_search_results,
|
||||||
|
print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr, print_stats,
|
||||||
|
print_stats_json,
|
||||||
|
print_embed, print_embed_json, print_sync, print_sync_json,
|
||||||
print_show_mr_json, print_sync_status, print_sync_status_json, run_auth_test, run_count,
|
print_show_mr_json, print_sync_status, print_sync_status_json, run_auth_test, run_count,
|
||||||
run_doctor, run_ingest, run_init, run_list_issues, run_list_mrs, run_show_issue, run_show_mr,
|
run_doctor, run_embed, run_generate_docs, run_ingest, run_init, run_list_issues, run_list_mrs,
|
||||||
run_sync_status,
|
run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, SyncOptions,
|
||||||
|
};
|
||||||
|
use lore::cli::{
|
||||||
|
Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
|
||||||
|
SearchArgs, StatsArgs, SyncArgs,
|
||||||
};
|
};
|
||||||
use lore::cli::{Cli, Commands, CountArgs, IngestArgs, IssuesArgs, MrsArgs};
|
|
||||||
use lore::core::db::{create_connection, get_schema_version, run_migrations};
|
use lore::core::db::{create_connection, get_schema_version, run_migrations};
|
||||||
use lore::core::error::{GiError, RobotErrorOutput};
|
use lore::core::error::{LoreError, RobotErrorOutput};
|
||||||
use lore::core::paths::get_config_path;
|
use lore::core::paths::get_config_path;
|
||||||
use lore::core::paths::get_db_path;
|
use lore::core::paths::get_db_path;
|
||||||
|
|
||||||
@@ -49,6 +55,10 @@ async fn main() {
|
|||||||
let result = match cli.command {
|
let result = match cli.command {
|
||||||
Commands::Issues(args) => handle_issues(cli.config.as_deref(), args, robot_mode).await,
|
Commands::Issues(args) => handle_issues(cli.config.as_deref(), args, robot_mode).await,
|
||||||
Commands::Mrs(args) => handle_mrs(cli.config.as_deref(), args, robot_mode).await,
|
Commands::Mrs(args) => handle_mrs(cli.config.as_deref(), args, robot_mode).await,
|
||||||
|
Commands::Search(args) => handle_search(cli.config.as_deref(), args, robot_mode).await,
|
||||||
|
Commands::Stats(args) => handle_stats(cli.config.as_deref(), args, robot_mode).await,
|
||||||
|
Commands::Embed(args) => handle_embed(cli.config.as_deref(), args, robot_mode).await,
|
||||||
|
Commands::Sync(args) => handle_sync_cmd(cli.config.as_deref(), args, robot_mode).await,
|
||||||
Commands::Ingest(args) => handle_ingest(cli.config.as_deref(), args, robot_mode).await,
|
Commands::Ingest(args) => handle_ingest(cli.config.as_deref(), args, robot_mode).await,
|
||||||
Commands::Count(args) => {
|
Commands::Count(args) => {
|
||||||
handle_count(cli.config.as_deref(), args, robot_mode).await
|
handle_count(cli.config.as_deref(), args, robot_mode).await
|
||||||
@@ -60,10 +70,29 @@ async fn main() {
|
|||||||
Commands::Init {
|
Commands::Init {
|
||||||
force,
|
force,
|
||||||
non_interactive,
|
non_interactive,
|
||||||
} => handle_init(cli.config.as_deref(), force, non_interactive, robot_mode).await,
|
gitlab_url,
|
||||||
|
token_env_var,
|
||||||
|
projects,
|
||||||
|
} => {
|
||||||
|
handle_init(
|
||||||
|
cli.config.as_deref(),
|
||||||
|
force,
|
||||||
|
non_interactive,
|
||||||
|
robot_mode,
|
||||||
|
gitlab_url,
|
||||||
|
token_env_var,
|
||||||
|
projects,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
Commands::GenerateDocs(args) => {
|
||||||
|
handle_generate_docs(cli.config.as_deref(), args, robot_mode).await
|
||||||
|
}
|
||||||
Commands::Backup => handle_backup(robot_mode),
|
Commands::Backup => handle_backup(robot_mode),
|
||||||
Commands::Reset { yes: _ } => handle_reset(robot_mode),
|
Commands::Reset { yes: _ } => handle_reset(robot_mode),
|
||||||
Commands::Migrate => handle_migrate(cli.config.as_deref(), robot_mode).await,
|
Commands::Migrate => handle_migrate(cli.config.as_deref(), robot_mode).await,
|
||||||
|
Commands::Health => handle_health(cli.config.as_deref(), robot_mode).await,
|
||||||
|
Commands::RobotDocs => handle_robot_docs(robot_mode),
|
||||||
|
|
||||||
// --- Backward-compat: deprecated aliases ---
|
// --- Backward-compat: deprecated aliases ---
|
||||||
Commands::List {
|
Commands::List {
|
||||||
@@ -159,7 +188,7 @@ async fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fallback error output for non-GiError errors in robot mode.
|
/// Fallback error output for non-LoreError errors in robot mode.
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct FallbackErrorOutput {
|
struct FallbackErrorOutput {
|
||||||
error: FallbackError,
|
error: FallbackError,
|
||||||
@@ -172,8 +201,8 @@ struct FallbackError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
|
fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
|
||||||
// Try to downcast to GiError for structured output
|
// Try to downcast to LoreError for structured output
|
||||||
if let Some(gi_error) = e.downcast_ref::<GiError>() {
|
if let Some(gi_error) = e.downcast_ref::<LoreError>() {
|
||||||
if robot_mode {
|
if robot_mode {
|
||||||
let output = RobotErrorOutput::from(gi_error);
|
let output = RobotErrorOutput::from(gi_error);
|
||||||
// Use serde_json for safe serialization; fallback constructs JSON safely
|
// Use serde_json for safe serialization; fallback constructs JSON safely
|
||||||
@@ -201,7 +230,7 @@ fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback for non-GiError errors - use serde for proper JSON escaping
|
// Fallback for non-LoreError errors - use serde for proper JSON escaping
|
||||||
if robot_mode {
|
if robot_mode {
|
||||||
let output = FallbackErrorOutput {
|
let output = FallbackErrorOutput {
|
||||||
error: FallbackError {
|
error: FallbackError {
|
||||||
@@ -473,22 +502,123 @@ async fn handle_sync_status_cmd(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// JSON output for init command.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct InitOutput {
|
||||||
|
ok: bool,
|
||||||
|
data: InitOutputData,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct InitOutputData {
|
||||||
|
config_path: String,
|
||||||
|
data_dir: String,
|
||||||
|
user: InitOutputUser,
|
||||||
|
projects: Vec<InitOutputProject>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct InitOutputUser {
|
||||||
|
username: String,
|
||||||
|
name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct InitOutputProject {
|
||||||
|
path: String,
|
||||||
|
name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_init_json(result: &InitResult) {
|
||||||
|
let output = InitOutput {
|
||||||
|
ok: true,
|
||||||
|
data: InitOutputData {
|
||||||
|
config_path: result.config_path.clone(),
|
||||||
|
data_dir: result.data_dir.clone(),
|
||||||
|
user: InitOutputUser {
|
||||||
|
username: result.user.username.clone(),
|
||||||
|
name: result.user.name.clone(),
|
||||||
|
},
|
||||||
|
projects: result
|
||||||
|
.projects
|
||||||
|
.iter()
|
||||||
|
.map(|p| InitOutputProject {
|
||||||
|
path: p.path.clone(),
|
||||||
|
name: p.name.clone(),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&output).unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_init(
|
async fn handle_init(
|
||||||
config_override: Option<&str>,
|
config_override: Option<&str>,
|
||||||
force: bool,
|
force: bool,
|
||||||
non_interactive: bool,
|
non_interactive: bool,
|
||||||
_robot_mode: bool, // TODO: Add robot mode support for init (requires non-interactive implementation)
|
robot_mode: bool,
|
||||||
|
gitlab_url_flag: Option<String>,
|
||||||
|
token_env_var_flag: Option<String>,
|
||||||
|
projects_flag: Option<String>,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Robot mode: require all inputs via flags, skip interactive prompts
|
||||||
|
if robot_mode {
|
||||||
|
let missing: Vec<&str> = [
|
||||||
|
gitlab_url_flag.is_none().then_some("--gitlab-url"),
|
||||||
|
token_env_var_flag.is_none().then_some("--token-env-var"),
|
||||||
|
projects_flag.is_none().then_some("--projects"),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.flatten()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if !missing.is_empty() {
|
||||||
|
let output = RobotErrorWithSuggestion {
|
||||||
|
error: RobotErrorSuggestionData {
|
||||||
|
code: "MISSING_FLAGS".to_string(),
|
||||||
|
message: format!("Robot mode requires flags: {}", missing.join(", ")),
|
||||||
|
suggestion: "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project".to_string(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
eprintln!("{}", serde_json::to_string(&output)?);
|
||||||
|
std::process::exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
let project_paths: Vec<String> = projects_flag
|
||||||
|
.unwrap()
|
||||||
|
.split(',')
|
||||||
|
.map(|p| p.trim().to_string())
|
||||||
|
.filter(|p| !p.is_empty())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let result = run_init(
|
||||||
|
InitInputs {
|
||||||
|
gitlab_url: gitlab_url_flag.unwrap(),
|
||||||
|
token_env_var: token_env_var_flag.unwrap(),
|
||||||
|
project_paths,
|
||||||
|
},
|
||||||
|
InitOptions {
|
||||||
|
config_path: config_override.map(String::from),
|
||||||
|
force: true,
|
||||||
|
non_interactive: true,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
print_init_json(&result);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Human mode: interactive prompts
|
||||||
let config_path = get_config_path(config_override);
|
let config_path = get_config_path(config_override);
|
||||||
let mut confirmed_overwrite = force;
|
let mut confirmed_overwrite = force;
|
||||||
|
|
||||||
// Check if config exists and handle overwrite
|
if config_path.exists() && !force {
|
||||||
if config_path.exists() {
|
|
||||||
if non_interactive {
|
if non_interactive {
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"{}",
|
"{}",
|
||||||
style(format!(
|
style(format!(
|
||||||
"Config file exists at {}. Cannot proceed in non-interactive mode.",
|
"Config file exists at {}. Use --force to overwrite.",
|
||||||
config_path.display()
|
config_path.display()
|
||||||
))
|
))
|
||||||
.red()
|
.red()
|
||||||
@@ -496,7 +626,6 @@ async fn handle_init(
|
|||||||
std::process::exit(2);
|
std::process::exit(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if !force {
|
|
||||||
let confirm = Confirm::new()
|
let confirm = Confirm::new()
|
||||||
.with_prompt(format!(
|
.with_prompt(format!(
|
||||||
"Config file exists at {}. Overwrite?",
|
"Config file exists at {}. Overwrite?",
|
||||||
@@ -511,10 +640,11 @@ async fn handle_init(
|
|||||||
}
|
}
|
||||||
confirmed_overwrite = true;
|
confirmed_overwrite = true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Prompt for GitLab URL
|
let gitlab_url: String = if let Some(url) = gitlab_url_flag {
|
||||||
let gitlab_url: String = Input::new()
|
url
|
||||||
|
} else {
|
||||||
|
Input::new()
|
||||||
.with_prompt("GitLab URL")
|
.with_prompt("GitLab URL")
|
||||||
.default("https://gitlab.com".to_string())
|
.default("https://gitlab.com".to_string())
|
||||||
.validate_with(|input: &String| -> Result<(), &str> {
|
.validate_with(|input: &String| -> Result<(), &str> {
|
||||||
@@ -524,15 +654,25 @@ async fn handle_init(
|
|||||||
Err("Please enter a valid URL")
|
Err("Please enter a valid URL")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.interact_text()?;
|
.interact_text()?
|
||||||
|
};
|
||||||
|
|
||||||
// Prompt for token env var
|
let token_env_var: String = if let Some(var) = token_env_var_flag {
|
||||||
let token_env_var: String = Input::new()
|
var
|
||||||
|
} else {
|
||||||
|
Input::new()
|
||||||
.with_prompt("Token environment variable name")
|
.with_prompt("Token environment variable name")
|
||||||
.default("GITLAB_TOKEN".to_string())
|
.default("GITLAB_TOKEN".to_string())
|
||||||
.interact_text()?;
|
.interact_text()?
|
||||||
|
};
|
||||||
|
|
||||||
// Prompt for project paths
|
let project_paths: Vec<String> = if let Some(projects) = projects_flag {
|
||||||
|
projects
|
||||||
|
.split(',')
|
||||||
|
.map(|p| p.trim().to_string())
|
||||||
|
.filter(|p| !p.is_empty())
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
let project_paths_input: String = Input::new()
|
let project_paths_input: String = Input::new()
|
||||||
.with_prompt("Project paths (comma-separated, e.g., group/project)")
|
.with_prompt("Project paths (comma-separated, e.g., group/project)")
|
||||||
.validate_with(|input: &String| -> Result<(), &str> {
|
.validate_with(|input: &String| -> Result<(), &str> {
|
||||||
@@ -544,11 +684,12 @@ async fn handle_init(
|
|||||||
})
|
})
|
||||||
.interact_text()?;
|
.interact_text()?;
|
||||||
|
|
||||||
let project_paths: Vec<String> = project_paths_input
|
project_paths_input
|
||||||
.split(',')
|
.split(',')
|
||||||
.map(|p| p.trim().to_string())
|
.map(|p| p.trim().to_string())
|
||||||
.filter(|p| !p.is_empty())
|
.filter(|p| !p.is_empty())
|
||||||
.collect();
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
println!("{}", style("\nValidating configuration...").blue());
|
println!("{}", style("\nValidating configuration...").blue());
|
||||||
|
|
||||||
@@ -840,6 +981,385 @@ async fn handle_migrate(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn handle_stats(
|
||||||
|
config_override: Option<&str>,
|
||||||
|
args: StatsArgs,
|
||||||
|
robot_mode: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let config = Config::load(config_override)?;
|
||||||
|
let result = run_stats(&config, args.check, args.repair)?;
|
||||||
|
if robot_mode {
|
||||||
|
print_stats_json(&result);
|
||||||
|
} else {
|
||||||
|
print_stats(&result);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_search(
|
||||||
|
config_override: Option<&str>,
|
||||||
|
args: SearchArgs,
|
||||||
|
robot_mode: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let config = Config::load(config_override)?;
|
||||||
|
|
||||||
|
let fts_mode = match args.fts_mode.as_str() {
|
||||||
|
"raw" => lore::search::FtsQueryMode::Raw,
|
||||||
|
_ => lore::search::FtsQueryMode::Safe,
|
||||||
|
};
|
||||||
|
|
||||||
|
let cli_filters = SearchCliFilters {
|
||||||
|
source_type: args.source_type,
|
||||||
|
author: args.author,
|
||||||
|
project: args.project,
|
||||||
|
labels: args.label,
|
||||||
|
path: args.path,
|
||||||
|
after: args.after,
|
||||||
|
updated_after: args.updated_after,
|
||||||
|
limit: args.limit,
|
||||||
|
};
|
||||||
|
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
let response = run_search(&config, &args.query, cli_filters, fts_mode, args.explain)?;
|
||||||
|
let elapsed_ms = start.elapsed().as_millis() as u64;
|
||||||
|
|
||||||
|
if robot_mode {
|
||||||
|
print_search_results_json(&response, elapsed_ms);
|
||||||
|
} else {
|
||||||
|
print_search_results(&response);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_generate_docs(
|
||||||
|
config_override: Option<&str>,
|
||||||
|
args: GenerateDocsArgs,
|
||||||
|
robot_mode: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let config = Config::load(config_override)?;
|
||||||
|
|
||||||
|
let result = run_generate_docs(&config, args.full, args.project.as_deref())?;
|
||||||
|
if robot_mode {
|
||||||
|
print_generate_docs_json(&result);
|
||||||
|
} else {
|
||||||
|
print_generate_docs(&result);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_embed(
|
||||||
|
config_override: Option<&str>,
|
||||||
|
args: EmbedArgs,
|
||||||
|
robot_mode: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let config = Config::load(config_override)?;
|
||||||
|
let result = run_embed(&config, args.retry_failed).await?;
|
||||||
|
if robot_mode {
|
||||||
|
print_embed_json(&result);
|
||||||
|
} else {
|
||||||
|
print_embed(&result);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_sync_cmd(
|
||||||
|
config_override: Option<&str>,
|
||||||
|
args: SyncArgs,
|
||||||
|
robot_mode: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let config = Config::load(config_override)?;
|
||||||
|
let options = SyncOptions {
|
||||||
|
full: args.full,
|
||||||
|
force: args.force,
|
||||||
|
no_embed: args.no_embed,
|
||||||
|
no_docs: args.no_docs,
|
||||||
|
};
|
||||||
|
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
let result = run_sync(&config, options).await?;
|
||||||
|
let elapsed = start.elapsed();
|
||||||
|
|
||||||
|
if robot_mode {
|
||||||
|
print_sync_json(&result, elapsed.as_millis() as u64);
|
||||||
|
} else {
|
||||||
|
print_sync(&result, elapsed);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Health + Robot-docs handlers
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/// JSON output for health command.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct HealthOutput {
|
||||||
|
ok: bool,
|
||||||
|
data: HealthData,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct HealthData {
|
||||||
|
healthy: bool,
|
||||||
|
config_found: bool,
|
||||||
|
db_found: bool,
|
||||||
|
schema_current: bool,
|
||||||
|
schema_version: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_health(
|
||||||
|
config_override: Option<&str>,
|
||||||
|
robot_mode: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let config_path = get_config_path(config_override);
|
||||||
|
let config_found = config_path.exists();
|
||||||
|
|
||||||
|
let (db_found, schema_version, schema_current) = if config_found {
|
||||||
|
match Config::load(config_override) {
|
||||||
|
Ok(config) => {
|
||||||
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||||
|
if db_path.exists() {
|
||||||
|
match create_connection(&db_path) {
|
||||||
|
Ok(conn) => {
|
||||||
|
let version = get_schema_version(&conn);
|
||||||
|
let latest = 9; // Number of embedded migrations
|
||||||
|
(true, version, version >= latest)
|
||||||
|
}
|
||||||
|
Err(_) => (true, 0, false),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
(false, 0, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => (false, 0, false),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
(false, 0, false)
|
||||||
|
};
|
||||||
|
|
||||||
|
let healthy = config_found && db_found && schema_current;
|
||||||
|
|
||||||
|
if robot_mode {
|
||||||
|
let output = HealthOutput {
|
||||||
|
ok: true,
|
||||||
|
data: HealthData {
|
||||||
|
healthy,
|
||||||
|
config_found,
|
||||||
|
db_found,
|
||||||
|
schema_current,
|
||||||
|
schema_version,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&output)?);
|
||||||
|
} else {
|
||||||
|
let status = |ok: bool| {
|
||||||
|
if ok {
|
||||||
|
style("pass").green()
|
||||||
|
} else {
|
||||||
|
style("FAIL").red()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
println!("Config: {} ({})", status(config_found), config_path.display());
|
||||||
|
println!("DB: {}", status(db_found));
|
||||||
|
println!(
|
||||||
|
"Schema: {} (v{})",
|
||||||
|
status(schema_current),
|
||||||
|
schema_version
|
||||||
|
);
|
||||||
|
println!();
|
||||||
|
if healthy {
|
||||||
|
println!("{}", style("Healthy").green().bold());
|
||||||
|
} else {
|
||||||
|
println!("{}", style("Unhealthy - run 'lore doctor' for details").red().bold());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !healthy {
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// JSON output for robot-docs command.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct RobotDocsOutput {
|
||||||
|
ok: bool,
|
||||||
|
data: RobotDocsData,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct RobotDocsData {
|
||||||
|
name: String,
|
||||||
|
version: String,
|
||||||
|
description: String,
|
||||||
|
activation: RobotDocsActivation,
|
||||||
|
commands: serde_json::Value,
|
||||||
|
exit_codes: serde_json::Value,
|
||||||
|
error_format: String,
|
||||||
|
workflows: serde_json::Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct RobotDocsActivation {
|
||||||
|
flags: Vec<String>,
|
||||||
|
env: String,
|
||||||
|
auto: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_robot_docs(robot_mode: bool) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let version = env!("CARGO_PKG_VERSION").to_string();
|
||||||
|
|
||||||
|
let commands = serde_json::json!({
|
||||||
|
"init": {
|
||||||
|
"description": "Initialize configuration and database",
|
||||||
|
"flags": ["--force", "--non-interactive", "--gitlab-url <URL>", "--token-env-var <VAR>", "--projects <paths>"],
|
||||||
|
"robot_flags": ["--gitlab-url", "--token-env-var", "--projects"],
|
||||||
|
"example": "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project"
|
||||||
|
},
|
||||||
|
"health": {
|
||||||
|
"description": "Quick pre-flight check: config, database, schema version",
|
||||||
|
"flags": [],
|
||||||
|
"example": "lore --robot health"
|
||||||
|
},
|
||||||
|
"auth": {
|
||||||
|
"description": "Verify GitLab authentication",
|
||||||
|
"flags": [],
|
||||||
|
"example": "lore --robot auth"
|
||||||
|
},
|
||||||
|
"doctor": {
|
||||||
|
"description": "Full environment health check (config, auth, DB, Ollama)",
|
||||||
|
"flags": [],
|
||||||
|
"example": "lore --robot doctor"
|
||||||
|
},
|
||||||
|
"ingest": {
|
||||||
|
"description": "Sync data from GitLab",
|
||||||
|
"flags": ["--project <path>", "--force", "--full", "<entity: issues|mrs>"],
|
||||||
|
"example": "lore --robot ingest issues --project group/repo"
|
||||||
|
},
|
||||||
|
"sync": {
|
||||||
|
"description": "Full sync pipeline: ingest -> generate-docs -> embed",
|
||||||
|
"flags": ["--full", "--force", "--no-embed", "--no-docs"],
|
||||||
|
"example": "lore --robot sync"
|
||||||
|
},
|
||||||
|
"issues": {
|
||||||
|
"description": "List or show issues",
|
||||||
|
"flags": ["<IID>", "--limit", "--state", "--project", "--author", "--assignee", "--label", "--milestone", "--since", "--due-before", "--has-due", "--sort", "--asc"],
|
||||||
|
"example": "lore --robot issues --state opened --limit 10"
|
||||||
|
},
|
||||||
|
"mrs": {
|
||||||
|
"description": "List or show merge requests",
|
||||||
|
"flags": ["<IID>", "--limit", "--state", "--project", "--author", "--assignee", "--reviewer", "--label", "--since", "--draft", "--no-draft", "--target", "--source", "--sort", "--asc"],
|
||||||
|
"example": "lore --robot mrs --state opened"
|
||||||
|
},
|
||||||
|
"search": {
|
||||||
|
"description": "Search indexed documents (lexical, hybrid, semantic)",
|
||||||
|
"flags": ["<QUERY>", "--mode", "--type", "--author", "--project", "--label", "--path", "--after", "--updated-after", "--limit", "--explain", "--fts-mode"],
|
||||||
|
"example": "lore --robot search 'authentication bug' --mode hybrid --limit 10"
|
||||||
|
},
|
||||||
|
"count": {
|
||||||
|
"description": "Count entities in local database",
|
||||||
|
"flags": ["<entity: issues|mrs|discussions|notes>", "--for <issue|mr>"],
|
||||||
|
"example": "lore --robot count issues"
|
||||||
|
},
|
||||||
|
"stats": {
|
||||||
|
"description": "Show document and index statistics",
|
||||||
|
"flags": ["--check", "--repair"],
|
||||||
|
"example": "lore --robot stats"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"description": "Show sync state (cursors, last sync times)",
|
||||||
|
"flags": [],
|
||||||
|
"example": "lore --robot status"
|
||||||
|
},
|
||||||
|
"generate-docs": {
|
||||||
|
"description": "Generate searchable documents from ingested data",
|
||||||
|
"flags": ["--full", "--project <path>"],
|
||||||
|
"example": "lore --robot generate-docs --full"
|
||||||
|
},
|
||||||
|
"embed": {
|
||||||
|
"description": "Generate vector embeddings for documents via Ollama",
|
||||||
|
"flags": ["--retry-failed"],
|
||||||
|
"example": "lore --robot embed"
|
||||||
|
},
|
||||||
|
"migrate": {
|
||||||
|
"description": "Run pending database migrations",
|
||||||
|
"flags": [],
|
||||||
|
"example": "lore --robot migrate"
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"description": "Show version information",
|
||||||
|
"flags": [],
|
||||||
|
"example": "lore --robot version"
|
||||||
|
},
|
||||||
|
"robot-docs": {
|
||||||
|
"description": "This command (agent self-discovery manifest)",
|
||||||
|
"flags": [],
|
||||||
|
"example": "lore robot-docs"
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let exit_codes = serde_json::json!({
|
||||||
|
"0": "Success",
|
||||||
|
"1": "Internal error / health check failed",
|
||||||
|
"2": "Config not found / missing flags",
|
||||||
|
"3": "Config invalid",
|
||||||
|
"4": "Token not set",
|
||||||
|
"5": "GitLab auth failed",
|
||||||
|
"6": "Resource not found",
|
||||||
|
"7": "Rate limited",
|
||||||
|
"8": "Network error",
|
||||||
|
"9": "Database locked",
|
||||||
|
"10": "Database error",
|
||||||
|
"11": "Migration failed",
|
||||||
|
"12": "I/O error",
|
||||||
|
"13": "Transform error"
|
||||||
|
});
|
||||||
|
|
||||||
|
let workflows = serde_json::json!({
|
||||||
|
"first_setup": [
|
||||||
|
"lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project",
|
||||||
|
"lore --robot doctor",
|
||||||
|
"lore --robot sync"
|
||||||
|
],
|
||||||
|
"daily_sync": [
|
||||||
|
"lore --robot sync"
|
||||||
|
],
|
||||||
|
"search": [
|
||||||
|
"lore --robot search 'query' --mode hybrid"
|
||||||
|
],
|
||||||
|
"pre_flight": [
|
||||||
|
"lore --robot health"
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
let output = RobotDocsOutput {
|
||||||
|
ok: true,
|
||||||
|
data: RobotDocsData {
|
||||||
|
name: "lore".to_string(),
|
||||||
|
version,
|
||||||
|
description: "Local GitLab data management with semantic search".to_string(),
|
||||||
|
activation: RobotDocsActivation {
|
||||||
|
flags: vec!["--robot".to_string(), "-J".to_string(), "--json".to_string()],
|
||||||
|
env: "LORE_ROBOT=1".to_string(),
|
||||||
|
auto: "Non-TTY stdout".to_string(),
|
||||||
|
},
|
||||||
|
commands,
|
||||||
|
exit_codes,
|
||||||
|
error_format: "stderr JSON: {\"error\":{\"code\":\"...\",\"message\":\"...\",\"suggestion\":\"...\"}}".to_string(),
|
||||||
|
workflows,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
if robot_mode {
|
||||||
|
println!("{}", serde_json::to_string(&output)?);
|
||||||
|
} else {
|
||||||
|
println!("{}", serde_json::to_string_pretty(&output)?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Backward-compat handlers (deprecated, delegate to new handlers)
|
// Backward-compat handlers (deprecated, delegate to new handlers)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user