feat(cli): Add search, stats, embed, sync, health, and robot-docs commands
Extends the CLI with six new commands that complete the search pipeline: - lore search <QUERY>: Hybrid search with mode selection (lexical, hybrid, semantic), rich filtering (--type, --author, --project, --label, --path, --after, --updated-after), result limits, and optional explain mode showing RRF score breakdowns. Safe FTS mode sanitizes user input; raw mode passes through for power users. - lore stats: Document and index statistics with optional --check for integrity verification and --repair to fix inconsistencies (orphaned documents, missing FTS entries, stale dirty queue items). - lore embed: Generate vector embeddings via Ollama. Supports --retry-failed to re-attempt previously failed embeddings. - lore generate-docs: Drain the dirty queue to regenerate documents. --full seeds all entities for complete rebuild. --project scopes to a single project. - lore sync: Full pipeline orchestration (ingest issues + MRs, generate-docs, embed) with --no-embed and --no-docs flags for partial runs. Reports per-stage results and total elapsed time. - lore health: Quick pre-flight check (config exists, DB exists, schema current). Returns exit code 1 if unhealthy. Designed for agent pre-flight scripts. - lore robot-docs: Machine-readable command manifest for agent self-discovery. Returns all commands, flags, examples, exit codes, and recommended workflows as structured JSON. Also enhances lore init with --gitlab-url, --token-env-var, and --projects flags for fully non-interactive robot-mode initialization. Fixes init's force/non-interactive precedence logic and adds JSON output for robot mode. Updates all command files for the GiError -> LoreError rename. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
348
src/cli/commands/stats.rs
Normal file
348
src/cli/commands/stats.rs
Normal file
@@ -0,0 +1,348 @@
|
||||
//! Stats command: document counts, embedding coverage, queue status, integrity checks.
|
||||
|
||||
use console::style;
|
||||
use rusqlite::Connection;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::core::db::create_connection;
|
||||
use crate::core::error::Result;
|
||||
use crate::core::paths::get_db_path;
|
||||
use crate::Config;
|
||||
|
||||
/// Result of the stats command.
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct StatsResult {
|
||||
pub documents: DocumentStats,
|
||||
pub embeddings: EmbeddingStats,
|
||||
pub fts: FtsStats,
|
||||
pub queues: QueueStats,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub integrity: Option<IntegrityResult>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct DocumentStats {
|
||||
pub total: i64,
|
||||
pub issues: i64,
|
||||
pub merge_requests: i64,
|
||||
pub discussions: i64,
|
||||
pub truncated: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct EmbeddingStats {
|
||||
pub embedded_documents: i64,
|
||||
pub total_chunks: i64,
|
||||
pub coverage_pct: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct FtsStats {
|
||||
pub indexed: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct QueueStats {
|
||||
pub dirty_sources: i64,
|
||||
pub dirty_sources_failed: i64,
|
||||
pub pending_discussion_fetches: i64,
|
||||
pub pending_discussion_fetches_failed: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct IntegrityResult {
|
||||
pub ok: bool,
|
||||
pub fts_doc_mismatch: bool,
|
||||
pub orphan_embeddings: i64,
|
||||
pub stale_metadata: i64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub repair: Option<RepairResult>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct RepairResult {
|
||||
pub fts_rebuilt: bool,
|
||||
pub orphans_deleted: i64,
|
||||
pub stale_cleared: i64,
|
||||
}
|
||||
|
||||
/// Run the stats command.
|
||||
pub fn run_stats(
|
||||
config: &Config,
|
||||
check: bool,
|
||||
repair: bool,
|
||||
) -> Result<StatsResult> {
|
||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||
let conn = create_connection(&db_path)?;
|
||||
|
||||
let mut result = StatsResult::default();
|
||||
|
||||
// Document counts
|
||||
result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?;
|
||||
result.documents.issues =
|
||||
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'")?;
|
||||
result.documents.merge_requests =
|
||||
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'")?;
|
||||
result.documents.discussions =
|
||||
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'")?;
|
||||
result.documents.truncated =
|
||||
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE is_truncated = 1")?;
|
||||
|
||||
// Embedding stats — skip gracefully if table doesn't exist (Gate A only)
|
||||
if table_exists(&conn, "embedding_metadata") {
|
||||
let embedded = count_query(
|
||||
&conn,
|
||||
"SELECT COUNT(DISTINCT document_id) FROM embedding_metadata WHERE last_error IS NULL",
|
||||
)?;
|
||||
let chunks = count_query(
|
||||
&conn,
|
||||
"SELECT COUNT(*) FROM embedding_metadata WHERE last_error IS NULL",
|
||||
)?;
|
||||
result.embeddings.embedded_documents = embedded;
|
||||
result.embeddings.total_chunks = chunks;
|
||||
result.embeddings.coverage_pct = if result.documents.total > 0 {
|
||||
(embedded as f64 / result.documents.total as f64) * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
}
|
||||
|
||||
// FTS stats
|
||||
result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?;
|
||||
|
||||
// Queue stats
|
||||
result.queues.dirty_sources =
|
||||
count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL")?;
|
||||
result.queues.dirty_sources_failed =
|
||||
count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL")?;
|
||||
|
||||
if table_exists(&conn, "pending_discussion_fetches") {
|
||||
result.queues.pending_discussion_fetches = count_query(
|
||||
&conn,
|
||||
"SELECT COUNT(*) FROM pending_discussion_fetches WHERE last_error IS NULL",
|
||||
)?;
|
||||
result.queues.pending_discussion_fetches_failed = count_query(
|
||||
&conn,
|
||||
"SELECT COUNT(*) FROM pending_discussion_fetches WHERE last_error IS NOT NULL",
|
||||
)?;
|
||||
}
|
||||
|
||||
// Integrity check
|
||||
if check {
|
||||
let mut integrity = IntegrityResult::default();
|
||||
|
||||
// FTS/doc count mismatch
|
||||
integrity.fts_doc_mismatch = result.fts.indexed != result.documents.total;
|
||||
|
||||
// Orphan embeddings (rowid/1000 should match a document ID)
|
||||
if table_exists(&conn, "embeddings") {
|
||||
integrity.orphan_embeddings = count_query(
|
||||
&conn,
|
||||
"SELECT COUNT(*) FROM embedding_metadata em
|
||||
WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = em.document_id)",
|
||||
)?;
|
||||
}
|
||||
|
||||
// Stale metadata (document_hash != current content_hash)
|
||||
if table_exists(&conn, "embedding_metadata") {
|
||||
integrity.stale_metadata = count_query(
|
||||
&conn,
|
||||
"SELECT COUNT(*) FROM embedding_metadata em
|
||||
JOIN documents d ON d.id = em.document_id
|
||||
WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash",
|
||||
)?;
|
||||
}
|
||||
|
||||
integrity.ok = !integrity.fts_doc_mismatch
|
||||
&& integrity.orphan_embeddings == 0
|
||||
&& integrity.stale_metadata == 0;
|
||||
|
||||
// Repair
|
||||
if repair {
|
||||
let mut repair_result = RepairResult::default();
|
||||
|
||||
if integrity.fts_doc_mismatch {
|
||||
conn.execute(
|
||||
"INSERT INTO documents_fts(documents_fts) VALUES('rebuild')",
|
||||
[],
|
||||
)?;
|
||||
repair_result.fts_rebuilt = true;
|
||||
}
|
||||
|
||||
if integrity.orphan_embeddings > 0 && table_exists(&conn, "embedding_metadata") {
|
||||
let deleted = conn.execute(
|
||||
"DELETE FROM embedding_metadata
|
||||
WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)",
|
||||
[],
|
||||
)?;
|
||||
repair_result.orphans_deleted = deleted as i64;
|
||||
|
||||
// Also clean orphaned vectors if vec0 table exists
|
||||
if table_exists(&conn, "embeddings") {
|
||||
let _ = conn.execute(
|
||||
"DELETE FROM embeddings
|
||||
WHERE rowid / 1000 NOT IN (SELECT id FROM documents)",
|
||||
[],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if integrity.stale_metadata > 0 && table_exists(&conn, "embedding_metadata") {
|
||||
let cleared = conn.execute(
|
||||
"DELETE FROM embedding_metadata
|
||||
WHERE document_id IN (
|
||||
SELECT em.document_id FROM embedding_metadata em
|
||||
JOIN documents d ON d.id = em.document_id
|
||||
WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash
|
||||
)",
|
||||
[],
|
||||
)?;
|
||||
repair_result.stale_cleared = cleared as i64;
|
||||
}
|
||||
|
||||
integrity.repair = Some(repair_result);
|
||||
}
|
||||
|
||||
result.integrity = Some(integrity);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn count_query(conn: &Connection, sql: &str) -> Result<i64> {
|
||||
let count: i64 = conn
|
||||
.query_row(sql, [], |row| row.get(0))
|
||||
.unwrap_or(0);
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn table_exists(conn: &Connection, table: &str) -> bool {
|
||||
conn.query_row(
|
||||
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
|
||||
[table],
|
||||
|row| row.get::<_, i64>(0),
|
||||
)
|
||||
.unwrap_or(0)
|
||||
> 0
|
||||
}
|
||||
|
||||
/// Print human-readable stats.
|
||||
pub fn print_stats(result: &StatsResult) {
|
||||
println!("{}", style("Documents").cyan().bold());
|
||||
println!(" Total: {}", result.documents.total);
|
||||
println!(" Issues: {}", result.documents.issues);
|
||||
println!(" Merge Requests: {}", result.documents.merge_requests);
|
||||
println!(" Discussions: {}", result.documents.discussions);
|
||||
if result.documents.truncated > 0 {
|
||||
println!(" Truncated: {}", style(result.documents.truncated).yellow());
|
||||
}
|
||||
println!();
|
||||
|
||||
println!("{}", style("Search Index").cyan().bold());
|
||||
println!(" FTS indexed: {}", result.fts.indexed);
|
||||
println!(
|
||||
" Embedding coverage: {:.1}% ({}/{})",
|
||||
result.embeddings.coverage_pct,
|
||||
result.embeddings.embedded_documents,
|
||||
result.documents.total
|
||||
);
|
||||
if result.embeddings.total_chunks > 0 {
|
||||
println!(" Total chunks: {}", result.embeddings.total_chunks);
|
||||
}
|
||||
println!();
|
||||
|
||||
println!("{}", style("Queues").cyan().bold());
|
||||
println!(" Dirty sources: {} pending, {} failed",
|
||||
result.queues.dirty_sources,
|
||||
result.queues.dirty_sources_failed
|
||||
);
|
||||
println!(" Discussion fetch: {} pending, {} failed",
|
||||
result.queues.pending_discussion_fetches,
|
||||
result.queues.pending_discussion_fetches_failed
|
||||
);
|
||||
|
||||
if let Some(ref integrity) = result.integrity {
|
||||
println!();
|
||||
let status = if integrity.ok {
|
||||
style("OK").green().bold()
|
||||
} else {
|
||||
style("ISSUES FOUND").red().bold()
|
||||
};
|
||||
println!("{} Integrity: {}", style("Check").cyan().bold(), status);
|
||||
|
||||
if integrity.fts_doc_mismatch {
|
||||
println!(" {} FTS/document count mismatch", style("!").red());
|
||||
}
|
||||
if integrity.orphan_embeddings > 0 {
|
||||
println!(
|
||||
" {} {} orphan embeddings",
|
||||
style("!").red(),
|
||||
integrity.orphan_embeddings
|
||||
);
|
||||
}
|
||||
if integrity.stale_metadata > 0 {
|
||||
println!(
|
||||
" {} {} stale embedding metadata",
|
||||
style("!").red(),
|
||||
integrity.stale_metadata
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(ref repair) = integrity.repair {
|
||||
println!();
|
||||
println!("{}", style("Repair").cyan().bold());
|
||||
if repair.fts_rebuilt {
|
||||
println!(" {} FTS index rebuilt", style("fixed").green());
|
||||
}
|
||||
if repair.orphans_deleted > 0 {
|
||||
println!(
|
||||
" {} {} orphan embeddings deleted",
|
||||
style("fixed").green(),
|
||||
repair.orphans_deleted
|
||||
);
|
||||
}
|
||||
if repair.stale_cleared > 0 {
|
||||
println!(
|
||||
" {} {} stale metadata entries cleared",
|
||||
style("fixed").green(),
|
||||
repair.stale_cleared
|
||||
);
|
||||
}
|
||||
if !repair.fts_rebuilt && repair.orphans_deleted == 0 && repair.stale_cleared == 0 {
|
||||
println!(" No issues to repair.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// JSON output structures.
|
||||
#[derive(Serialize)]
|
||||
struct StatsJsonOutput {
|
||||
ok: bool,
|
||||
data: StatsResult,
|
||||
}
|
||||
|
||||
/// Print JSON robot-mode output.
|
||||
pub fn print_stats_json(result: &StatsResult) {
|
||||
let output = StatsJsonOutput {
|
||||
ok: true,
|
||||
data: StatsResult {
|
||||
documents: DocumentStats { ..*&result.documents },
|
||||
embeddings: EmbeddingStats { ..*&result.embeddings },
|
||||
fts: FtsStats { ..*&result.fts },
|
||||
queues: QueueStats { ..*&result.queues },
|
||||
integrity: result.integrity.as_ref().map(|i| IntegrityResult {
|
||||
ok: i.ok,
|
||||
fts_doc_mismatch: i.fts_doc_mismatch,
|
||||
orphan_embeddings: i.orphan_embeddings,
|
||||
stale_metadata: i.stale_metadata,
|
||||
repair: i.repair.as_ref().map(|r| RepairResult {
|
||||
fts_rebuilt: r.fts_rebuilt,
|
||||
orphans_deleted: r.orphans_deleted,
|
||||
stale_cleared: r.stale_cleared,
|
||||
}),
|
||||
}),
|
||||
},
|
||||
};
|
||||
println!("{}", serde_json::to_string(&output).unwrap());
|
||||
}
|
||||
Reference in New Issue
Block a user