From daf5a730197815b3a053eb5517fdef7fd3c81d4a Mon Sep 17 00:00:00 2001
From: Taylor Eernisse <teernisse@visiostack.com>
Date: Fri, 30 Jan 2026 15:47:10 -0500
Subject: [PATCH] feat(cli): Add search, stats, embed, sync, health, and
 robot-docs commands

Extends the CLI with six new commands that complete the search pipeline:

- lore search <QUERY>: Hybrid search with mode selection (lexical,
  hybrid, semantic), rich filtering (--type, --author, --project,
  --label, --path, --after, --updated-after), result limits, and
  optional explain mode showing RRF score breakdowns. Safe FTS mode
  sanitizes user input; raw mode passes through for power users.

- lore stats: Document and index statistics with optional --check
  for integrity verification and --repair to fix inconsistencies
  (orphaned documents, missing FTS entries, stale dirty queue items).

- lore embed: Generate vector embeddings via Ollama. Supports
  --retry-failed to re-attempt previously failed embeddings.

- lore generate-docs: Drain the dirty queue to regenerate documents.
  --full seeds all entities for complete rebuild. --project scopes
  to a single project.

- lore sync: Full pipeline orchestration (ingest issues + MRs,
  generate-docs, embed) with --no-embed and --no-docs flags for
  partial runs. Reports per-stage results and total elapsed time.

- lore health: Quick pre-flight check (config exists, DB exists,
  schema current). Returns exit code 1 if unhealthy. Designed for
  agent pre-flight scripts.

- lore robot-docs: Machine-readable command manifest for agent
  self-discovery. Returns all commands, flags, examples, exit codes,
  and recommended workflows as structured JSON.

Also enhances lore init with --gitlab-url, --token-env-var, and
--projects flags for fully non-interactive robot-mode initialization.
Fixes init's force/non-interactive precedence logic and adds JSON
output for robot mode.

Updates all command files for the GiError -> LoreError rename.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/cli/commands/auth_test.rs     |   6 +-
 src/cli/commands/doctor.rs        |   6 +-
 src/cli/commands/embed.rs         |  88 ++++
 src/cli/commands/generate_docs.rs | 205 ++++++++++
 src/cli/commands/ingest.rs        |  10 +-
 src/cli/commands/init.rs          |  30 +-
 src/cli/commands/mod.rs           |  12 +
 src/cli/commands/search.rs        | 402 +++++++++++++++++++
 src/cli/commands/show.rs          |  10 +-
 src/cli/commands/stats.rs         | 348 ++++++++++++++++
 src/cli/commands/sync.rs          | 124 ++++++
 src/cli/mod.rs                    | 138 +++++++
 src/main.rs                       | 646 +++++++++++++++++++++++++++---
 13 files changed, 1930 insertions(+), 95 deletions(-)
 create mode 100644 src/cli/commands/embed.rs
 create mode 100644 src/cli/commands/generate_docs.rs
 create mode 100644 src/cli/commands/search.rs
 create mode 100644 src/cli/commands/stats.rs
 create mode 100644 src/cli/commands/sync.rs
diff --git a/src/cli/commands/auth_test.rs b/src/cli/commands/auth_test.rs
index de98d4f..5562875 100644
--- a/src/cli/commands/auth_test.rs
+++ b/src/cli/commands/auth_test.rs
@@ -1,7 +1,7 @@
 //! Auth test command - verify GitLab authentication.
 
 use crate::core::config::Config;
-use crate::core::error::{GiError, Result};
+use crate::core::error::{LoreError, Result};
 use crate::gitlab::GitLabClient;
 
 /// Result of successful auth test.
@@ -19,12 +19,12 @@ pub async fn run_auth_test(config_path: Option<&str>) -> Result<AuthTestResult>
     // 2. Get token from environment
     let token = std::env::var(&config.gitlab.token_env_var)
         .map(|t| t.trim().to_string())
-        .map_err(|_| GiError::TokenNotSet {
+        .map_err(|_| LoreError::TokenNotSet {
             env_var: config.gitlab.token_env_var.clone(),
         })?;
 
     if token.is_empty() {
-        return Err(GiError::TokenNotSet {
+        return Err(LoreError::TokenNotSet {
             env_var: config.gitlab.token_env_var.clone(),
         });
     }
diff --git a/src/cli/commands/doctor.rs b/src/cli/commands/doctor.rs
index db1c949..1d84667 100644
--- a/src/cli/commands/doctor.rs
+++ b/src/cli/commands/doctor.rs
@@ -5,7 +5,7 @@ use serde::Serialize;
 
 use crate::core::config::Config;
 use crate::core::db::{create_connection, get_schema_version, verify_pragmas};
-use crate::core::error::GiError;
+use crate::core::error::LoreError;
 use crate::core::paths::{get_config_path, get_db_path};
 use crate::gitlab::GitLabClient;
 
@@ -137,7 +137,7 @@ fn check_config(config_path: &str) -> (ConfigCheck, Option<Config>) {
             },
             Some(config),
         ),
-        Err(GiError::ConfigNotFound { path }) => (
+        Err(LoreError::ConfigNotFound { path }) => (
             ConfigCheck {
                 result: CheckResult {
                     status: CheckStatus::Error,
@@ -264,7 +264,7 @@ async fn check_gitlab(config: Option<&Config>) -> GitLabCheck {
             url: Some(config.gitlab.base_url.clone()),
             username: Some(user.username),
         },
-        Err(GiError::GitLabAuthFailed) => GitLabCheck {
+        Err(LoreError::GitLabAuthFailed) => GitLabCheck {
             result: CheckResult {
                 status: CheckStatus::Error,
                 message: Some("Authentication failed. Check your token.".to_string()),
diff --git a/src/cli/commands/embed.rs b/src/cli/commands/embed.rs
new file mode 100644
index 0000000..7a99fe7
--- /dev/null
+++ b/src/cli/commands/embed.rs
@@ -0,0 +1,88 @@
+//! Embed command: generate vector embeddings for documents via Ollama.
+
+use console::style;
+use serde::Serialize;
+
+use crate::core::db::create_connection;
+use crate::core::error::Result;
+use crate::core::paths::get_db_path;
+use crate::embedding::ollama::{OllamaClient, OllamaConfig};
+use crate::embedding::pipeline::embed_documents;
+use crate::Config;
+
+/// Result of the embed command.
+#[derive(Debug, Default, Serialize)]
+pub struct EmbedCommandResult {
+    pub embedded: usize,
+    pub failed: usize,
+    pub skipped: usize,
+}
+
+/// Run the embed command.
+pub async fn run_embed(
+    config: &Config,
+    retry_failed: bool,
+) -> Result<EmbedCommandResult> {
+    let db_path = get_db_path(config.storage.db_path.as_deref());
+    let conn = create_connection(&db_path)?;
+
+    // Build Ollama config from user settings
+    let ollama_config = OllamaConfig {
+        base_url: config.embedding.base_url.clone(),
+        model: config.embedding.model.clone(),
+        ..OllamaConfig::default()
+    };
+    let client = OllamaClient::new(ollama_config);
+
+    // Health check — fail fast if Ollama is down or model missing
+    client.health_check().await?;
+
+    // If retry_failed, clear errors so they become pending again
+    if retry_failed {
+        conn.execute(
+            "UPDATE embedding_metadata SET last_error = NULL, attempt_count = 0
+             WHERE last_error IS NOT NULL",
+            [],
+        )?;
+    }
+
+    let model_name = &config.embedding.model;
+    let result = embed_documents(&conn, &client, model_name, None).await?;
+
+    Ok(EmbedCommandResult {
+        embedded: result.embedded,
+        failed: result.failed,
+        skipped: result.skipped,
+    })
+}
+
+/// Print human-readable output.
+pub fn print_embed(result: &EmbedCommandResult) {
+    println!(
+        "{} Embedding complete",
+        style("done").green().bold(),
+    );
+    println!("  Embedded: {}", result.embedded);
+    if result.failed > 0 {
+        println!("  Failed:   {}", style(result.failed).red());
+    }
+    if result.skipped > 0 {
+        println!("  Skipped:  {}", result.skipped);
+    }
+}
+
+/// JSON output.
+#[derive(Serialize)]
+struct EmbedJsonOutput<'a> {
+    ok: bool,
+    data: &'a EmbedCommandResult,
+}
+
+/// Print JSON robot-mode output.
+pub fn print_embed_json(result: &EmbedCommandResult) {
+    let output = EmbedJsonOutput {
+        ok: true,
+        data: result,
+    };
+    println!("{}", serde_json::to_string(&output).unwrap());
+}
diff --git a/src/cli/commands/generate_docs.rs b/src/cli/commands/generate_docs.rs
new file mode 100644
index 0000000..e6377c2
--- /dev/null
+++ b/src/cli/commands/generate_docs.rs
@@ -0,0 +1,205 @@
+//! Generate searchable documents from ingested GitLab data.
+
+use console::style;
+use rusqlite::Connection;
+use serde::Serialize;
+use tracing::info;
+
+use crate::core::db::create_connection;
+use crate::core::error::Result;
+use crate::core::paths::get_db_path;
+use crate::documents::{regenerate_dirty_documents, SourceType};
+use crate::Config;
+
+const FULL_MODE_CHUNK_SIZE: i64 = 2000;
+
+/// Result of a generate-docs run.
+#[derive(Debug, Default)]
+pub struct GenerateDocsResult {
+    pub regenerated: usize,
+    pub unchanged: usize,
+    pub errored: usize,
+    pub seeded: usize,
+    pub full_mode: bool,
+}
+
+/// Run the generate-docs pipeline.
+///
+/// Default mode: process only existing dirty_sources entries.
+/// Full mode: seed dirty_sources with ALL entities, then drain.
+pub fn run_generate_docs(
+    config: &Config,
+    full: bool,
+    project_filter: Option<&str>,
+) -> Result<GenerateDocsResult> {
+    let db_path = get_db_path(config.storage.db_path.as_deref());
+    let conn = create_connection(&db_path)?;
+    let mut result = GenerateDocsResult {
+        full_mode: full,
+        ..Default::default()
+    };
+
+    if full {
+        result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
+        result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
+        result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
+    }
+
+    let regen = regenerate_dirty_documents(&conn)?;
+    result.regenerated = regen.regenerated;
+    result.unchanged = regen.unchanged;
+    result.errored = regen.errored;
+
+    if full {
+        // Optimize FTS index after bulk rebuild
+        let _ = conn.execute(
+            "INSERT INTO documents_fts(documents_fts) VALUES('optimize')",
+            [],
+        );
+        info!("FTS index optimized after full rebuild");
+    }
+
+    Ok(result)
+}
+
+/// Seed dirty_sources with all entities of the given type using keyset pagination.
+fn seed_dirty(
+    conn: &Connection,
+    source_type: SourceType,
+    project_filter: Option<&str>,
+) -> Result<usize> {
+    let table = match source_type {
+        SourceType::Issue => "issues",
+        SourceType::MergeRequest => "merge_requests",
+        SourceType::Discussion => "discussions",
+    };
+    let type_str = source_type.as_str();
+    let now = chrono::Utc::now().timestamp_millis();
+
+    let mut total_seeded: usize = 0;
+    let mut last_id: i64 = 0;
+
+    loop {
+        let inserted = if let Some(project) = project_filter {
+            // Resolve project to ID for filtering
+            let project_id: Option<i64> = conn
+                .query_row(
+                    "SELECT id FROM projects WHERE path_with_namespace = ?1 COLLATE NOCASE",
+                    [project],
+                    |row| row.get(0),
+                )
+                .ok();
+
+            let Some(pid) = project_id else {
+                break;
+            };
+
+            conn.execute(
+                &format!(
+                    "INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
+                     SELECT ?1, id, ?2, 0, NULL, NULL, NULL
+                     FROM {table} WHERE id > ?3 AND project_id = ?4 ORDER BY id LIMIT ?5
+                     ON CONFLICT(source_type, source_id) DO NOTHING"
+                ),
+                rusqlite::params![type_str, now, last_id, pid, FULL_MODE_CHUNK_SIZE],
+            )?
+        } else {
+            conn.execute(
+                &format!(
+                    "INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
+                     SELECT ?1, id, ?2, 0, NULL, NULL, NULL
+                     FROM {table} WHERE id > ?3 ORDER BY id LIMIT ?4
+                     ON CONFLICT(source_type, source_id) DO NOTHING"
+                ),
+                rusqlite::params![type_str, now, last_id, FULL_MODE_CHUNK_SIZE],
+            )?
+        };
+
+        if inserted == 0 {
+            break;
+        }
+
+        // Advance keyset cursor to the max id within the chunk window
+        let max_id: i64 = conn.query_row(
+            &format!(
+                "SELECT MAX(id) FROM (SELECT id FROM {table} WHERE id > ?1 ORDER BY id LIMIT ?2)",
+                table = table
+            ),
+            rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
+            |row| row.get(0),
+        )?;
+
+        total_seeded += inserted;
+        last_id = max_id;
+    }
+
+    info!(
+        source_type = type_str,
+        seeded = total_seeded,
+        "Seeded dirty_sources"
+    );
+
+    Ok(total_seeded)
+}
+
+/// Print human-readable output.
+pub fn print_generate_docs(result: &GenerateDocsResult) {
+    let mode = if result.full_mode { "full" } else { "incremental" };
+    println!(
+        "{} Document generation complete ({})",
+        style("done").green().bold(),
+        mode
+    );
+
+    if result.full_mode {
+        println!("  Seeded:      {}", result.seeded);
+    }
+    println!("  Regenerated: {}", result.regenerated);
+    println!("  Unchanged:   {}", result.unchanged);
+    if result.errored > 0 {
+        println!(
+            "  Errored:     {}",
+            style(result.errored).red()
+        );
+    }
+}
+
+/// JSON output structures.
+#[derive(Serialize)]
+struct GenerateDocsJsonOutput {
+    ok: bool,
+    data: GenerateDocsJsonData,
+}
+
+#[derive(Serialize)]
+struct GenerateDocsJsonData {
+    mode: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    seeded: Option<usize>,
+    regenerated: usize,
+    unchanged: usize,
+    errored: usize,
+}
+
+/// Print JSON robot-mode output.
+pub fn print_generate_docs_json(result: &GenerateDocsResult) {
+    let output = GenerateDocsJsonOutput {
+        ok: true,
+        data: GenerateDocsJsonData {
+            mode: if result.full_mode {
+                "full".to_string()
+            } else {
+                "incremental".to_string()
+            },
+            seeded: if result.full_mode {
+                Some(result.seeded)
+            } else {
+                None
+            },
+            regenerated: result.regenerated,
+            unchanged: result.unchanged,
+            errored: result.errored,
+        },
+    };
+    println!("{}", serde_json::to_string(&output).unwrap());
+}
diff --git a/src/cli/commands/ingest.rs b/src/cli/commands/ingest.rs
index 849b8d1..d8b71b7 100644
--- a/src/cli/commands/ingest.rs
+++ b/src/cli/commands/ingest.rs
@@ -7,7 +7,7 @@ use serde::Serialize;
 
 use crate::Config;
 use crate::core::db::create_connection;
-use crate::core::error::{GiError, Result};
+use crate::core::error::{LoreError, Result};
 use crate::core::lock::{AppLock, LockOptions};
 use crate::core::paths::get_db_path;
 use crate::gitlab::GitLabClient;
@@ -51,7 +51,7 @@ pub async fn run_ingest(
 ) -> Result<IngestResult> {
     // Validate resource type early
     if resource_type != "issues" && resource_type != "mrs" {
-        return Err(GiError::Other(format!(
+        return Err(LoreError::Other(format!(
             "Invalid resource type '{}'. Valid types: issues, mrs",
             resource_type
         )));
@@ -74,7 +74,7 @@ pub async fn run_ingest(
     lock.acquire(force)?;
 
     // Get token from environment
-    let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| GiError::TokenNotSet {
+    let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet {
         env_var: config.gitlab.token_env_var.clone(),
     })?;
 
@@ -119,12 +119,12 @@ pub async fn run_ingest(
 
     if projects.is_empty() {
         if let Some(filter) = project_filter {
-            return Err(GiError::Other(format!(
+            return Err(LoreError::Other(format!(
                 "Project '{}' not found in configuration",
                 filter
             )));
         }
-        return Err(GiError::Other(
+        return Err(LoreError::Other(
             "No projects configured. Run 'lore init' first.".to_string(),
         ));
     }
diff --git a/src/cli/commands/init.rs b/src/cli/commands/init.rs
index 92a6a31..3dfaaba 100644
--- a/src/cli/commands/init.rs
+++ b/src/cli/commands/init.rs
@@ -4,7 +4,7 @@ use std::fs;
 
 use crate::core::config::{MinimalConfig, MinimalGitLabConfig, ProjectConfig};
 use crate::core::db::{create_connection, run_migrations};
-use crate::core::error::{GiError, Result};
+use crate::core::error::{LoreError, Result};
 use crate::core::paths::{get_config_path, get_data_dir};
 use crate::gitlab::{GitLabClient, GitLabProject};
 
@@ -45,32 +45,30 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
     let config_path = get_config_path(options.config_path.as_deref());
     let data_dir = get_data_dir();
 
-    // 1. Check if config exists
-    if config_path.exists() {
+    // 1. Check if config exists (force takes precedence over non_interactive)
+    if config_path.exists() && !options.force {
         if options.non_interactive {
-            return Err(GiError::Other(format!(
-                "Config file exists at {}. Cannot proceed in non-interactive mode.",
+            return Err(LoreError::Other(format!(
+                "Config file exists at {}. Use --force to overwrite.",
                 config_path.display()
             )));
         }
 
-        if !options.force {
-            return Err(GiError::Other(
-                "User cancelled config overwrite.".to_string(),
-            ));
-        }
+        return Err(LoreError::Other(
+            "User cancelled config overwrite.".to_string(),
+        ));
     }
 
     // 2. Validate GitLab URL format
     if url::Url::parse(&inputs.gitlab_url).is_err() {
-        return Err(GiError::Other(format!(
+        return Err(LoreError::Other(format!(
             "Invalid GitLab URL: {}",
             inputs.gitlab_url
         )));
     }
 
     // 3. Check token is set in environment
-    let token = std::env::var(&inputs.token_env_var).map_err(|_| GiError::TokenNotSet {
+    let token = std::env::var(&inputs.token_env_var).map_err(|_| LoreError::TokenNotSet {
         env_var: inputs.token_env_var.clone(),
     })?;
 
@@ -78,8 +76,8 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
     let client = GitLabClient::new(&inputs.gitlab_url, &token, None);
 
     let gitlab_user = client.get_current_user().await.map_err(|e| {
-        if matches!(e, GiError::GitLabAuthFailed) {
-            GiError::Other(format!("Authentication failed for {}", inputs.gitlab_url))
+        if matches!(e, LoreError::GitLabAuthFailed) {
+            LoreError::Other(format!("Authentication failed for {}", inputs.gitlab_url))
         } else {
             e
         }
@@ -95,8 +93,8 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe
 
     for project_path in &inputs.project_paths {
         let project = client.get_project(project_path).await.map_err(|e| {
-            if matches!(e, GiError::GitLabNotFound { .. }) {
-                GiError::Other(format!("Project not found: {project_path}"))
+            if matches!(e, LoreError::GitLabNotFound { .. }) {
+                LoreError::Other(format!("Project not found: {project_path}"))
             } else {
                 e
             }
diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs
index 4609569..10e2f7e 100644
--- a/src/cli/commands/mod.rs
+++ b/src/cli/commands/mod.rs
@@ -3,21 +3,33 @@
 pub mod auth_test;
 pub mod count;
 pub mod doctor;
+pub mod embed;
+pub mod generate_docs;
 pub mod ingest;
 pub mod init;
 pub mod list;
+pub mod search;
 pub mod show;
+pub mod stats;
+pub mod sync;
 pub mod sync_status;
 
 pub use auth_test::run_auth_test;
 pub use count::{print_count, print_count_json, run_count};
 pub use doctor::{print_doctor_results, run_doctor};
+pub use embed::{print_embed, print_embed_json, run_embed};
+pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs};
+pub use stats::{print_stats, print_stats_json, run_stats};
+pub use search::{
+    print_search_results, print_search_results_json, run_search, SearchCliFilters, SearchResponse,
+};
 pub use ingest::{print_ingest_summary, print_ingest_summary_json, run_ingest};
 pub use init::{InitInputs, InitOptions, InitResult, run_init};
 pub use list::{
     ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
     print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
 };
+pub use sync::{print_sync, print_sync_json, run_sync, SyncOptions, SyncResult};
 pub use show::{
     print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue,
     run_show_mr,
diff --git a/src/cli/commands/search.rs b/src/cli/commands/search.rs
new file mode 100644
index 0000000..d865c84
--- /dev/null
+++ b/src/cli/commands/search.rs
@@ -0,0 +1,402 @@
+//! Search command: lexical (FTS5) search with filter support and single-query hydration.
+
+use console::style;
+use serde::Serialize;
+
+use crate::core::db::create_connection;
+use crate::core::error::{LoreError, Result};
+use crate::core::paths::get_db_path;
+use crate::core::project::resolve_project;
+use crate::core::time::{ms_to_iso, parse_since};
+use crate::documents::SourceType;
+use crate::search::{
+    apply_filters, get_result_snippet, rank_rrf, search_fts, FtsQueryMode, PathFilter,
+    SearchFilters,
+};
+use crate::Config;
+
+/// Display-ready search result with all fields hydrated.
+#[derive(Debug, Serialize)]
+pub struct SearchResultDisplay {
+    pub document_id: i64,
+    pub source_type: String,
+    pub title: String,
+    pub url: Option<String>,
+    pub author: Option<String>,
+    pub created_at: Option<String>,
+    pub updated_at: Option<String>,
+    pub project_path: String,
+    pub labels: Vec<String>,
+    pub paths: Vec<String>,
+    pub snippet: String,
+    pub score: f64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub explain: Option<ExplainData>,
+}
+
+/// Ranking explanation for --explain output.
+#[derive(Debug, Serialize)]
+pub struct ExplainData {
+    pub vector_rank: Option<usize>,
+    pub fts_rank: Option<usize>,
+    pub rrf_score: f64,
+}
+
+/// Search response wrapper.
+#[derive(Debug, Serialize)]
+pub struct SearchResponse {
+    pub query: String,
+    pub mode: String,
+    pub total_results: usize,
+    pub results: Vec<SearchResultDisplay>,
+    pub warnings: Vec<String>,
+}
+
+/// Build SearchFilters from CLI args.
+pub struct SearchCliFilters {
+    pub source_type: Option<String>,
+    pub author: Option<String>,
+    pub project: Option<String>,
+    pub labels: Vec<String>,
+    pub path: Option<String>,
+    pub after: Option<String>,
+    pub updated_after: Option<String>,
+    pub limit: usize,
+}
+
+/// Run a lexical search query.
+pub fn run_search(
+    config: &Config,
+    query: &str,
+    cli_filters: SearchCliFilters,
+    fts_mode: FtsQueryMode,
+    explain: bool,
+) -> Result<SearchResponse> {
+    let db_path = get_db_path(config.storage.db_path.as_deref());
+    let conn = create_connection(&db_path)?;
+
+    // Check if any documents exist
+    let doc_count: i64 = conn
+        .query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0))
+        .unwrap_or(0);
+
+    if doc_count == 0 {
+        return Ok(SearchResponse {
+            query: query.to_string(),
+            mode: "lexical".to_string(),
+            total_results: 0,
+            results: vec![],
+            warnings: vec![
+                "No documents indexed. Run 'lore generate-docs' first.".to_string()
+            ],
+        });
+    }
+
+    // Build filters
+    let source_type = cli_filters
+        .source_type
+        .as_deref()
+        .and_then(SourceType::parse);
+
+    let project_id = cli_filters
+        .project
+        .as_deref()
+        .map(|p| resolve_project(&conn, p))
+        .transpose()?;
+
+    let after = cli_filters.after.as_deref().and_then(parse_since);
+    let updated_after = cli_filters.updated_after.as_deref().and_then(parse_since);
+
+    let path = cli_filters.path.as_deref().map(|p| {
+        if p.ends_with('/') {
+            PathFilter::Prefix(p.to_string())
+        } else {
+            PathFilter::Exact(p.to_string())
+        }
+    });
+
+    let filters = SearchFilters {
+        source_type,
+        author: cli_filters.author,
+        project_id,
+        after,
+        updated_after,
+        labels: cli_filters.labels,
+        path,
+        limit: cli_filters.limit,
+    };
+
+    // Adaptive recall: wider initial fetch when filters applied
+    let requested = filters.clamp_limit();
+    let top_k = if filters.has_any_filter() {
+        (requested * 50).max(200).min(1500)
+    } else {
+        (requested * 10).max(50).min(1500)
+    };
+
+    // FTS search
+    let fts_results = search_fts(&conn, query, top_k, fts_mode)?;
+    let fts_tuples: Vec<(i64, f64)> = fts_results
+        .iter()
+        .map(|r| (r.document_id, r.bm25_score))
+        .collect();
+
+    // Build snippet map before ranking
+    let snippet_map: std::collections::HashMap<i64, String> = fts_results
+        .iter()
+        .map(|r| (r.document_id, r.snippet.clone()))
+        .collect();
+
+    // RRF ranking (single-list for lexical mode)
+    let ranked = rank_rrf(&[], &fts_tuples);
+    let ranked_ids: Vec<i64> = ranked.iter().map(|r| r.document_id).collect();
+
+    // Apply post-retrieval filters
+    let filtered_ids = apply_filters(&conn, &ranked_ids, &filters)?;
+
+    if filtered_ids.is_empty() {
+        return Ok(SearchResponse {
+            query: query.to_string(),
+            mode: "lexical".to_string(),
+            total_results: 0,
+            results: vec![],
+            warnings: vec![],
+        });
+    }
+
+    // Hydrate results in single round-trip
+    let hydrated = hydrate_results(&conn, &filtered_ids)?;
+
+    // Build display results preserving filter order
+    let rrf_map: std::collections::HashMap<i64, &crate::search::RrfResult> = ranked
+        .iter()
+        .map(|r| (r.document_id, r))
+        .collect();
+
+    let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
+    for row in &hydrated {
+        let rrf = rrf_map.get(&row.document_id);
+        let fts_snippet = snippet_map.get(&row.document_id).map(|s| s.as_str());
+        let snippet = get_result_snippet(fts_snippet, &row.content_text);
+
+        let explain_data = if explain {
+            rrf.map(|r| ExplainData {
+                vector_rank: r.vector_rank,
+                fts_rank: r.fts_rank,
+                rrf_score: r.rrf_score,
+            })
+        } else {
+            None
+        };
+
+        results.push(SearchResultDisplay {
+            document_id: row.document_id,
+            source_type: row.source_type.clone(),
+            title: row.title.clone(),
+            url: row.url.clone(),
+            author: row.author.clone(),
+            created_at: row.created_at.map(ms_to_iso),
+            updated_at: row.updated_at.map(ms_to_iso),
+            project_path: row.project_path.clone(),
+            labels: row.labels.clone(),
+            paths: row.paths.clone(),
+            snippet,
+            score: rrf.map(|r| r.normalized_score).unwrap_or(0.0),
+            explain: explain_data,
+        });
+    }
+
+    Ok(SearchResponse {
+        query: query.to_string(),
+        mode: "lexical".to_string(),
+        total_results: results.len(),
+        results,
+        warnings: vec![],
+    })
+}
+
+/// Raw row from hydration query.
+struct HydratedRow {
+    document_id: i64,
+    source_type: String,
+    title: String,
+    url: Option<String>,
+    author: Option<String>,
+    created_at: Option<i64>,
+    updated_at: Option<i64>,
+    content_text: String,
+    project_path: String,
+    labels: Vec<String>,
+    paths: Vec<String>,
+}
+
+/// Hydrate document IDs into full display rows in a single query.
+///
+/// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key.
+/// Labels and paths fetched via correlated json_group_array subqueries.
+fn hydrate_results(
+    conn: &rusqlite::Connection,
+    document_ids: &[i64],
+) -> Result<Vec<HydratedRow>> {
+    if document_ids.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let ids_json = serde_json::to_string(document_ids)
+        .map_err(|e| LoreError::Other(e.to_string()))?;
+
+    let sql = r#"
+        SELECT d.id, d.source_type, d.title, d.url, d.author_username,
+               d.created_at, d.updated_at, d.content_text,
+               p.path_with_namespace AS project_path,
+               (SELECT json_group_array(dl.label_name)
+                FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json,
+               (SELECT json_group_array(dp.path)
+                FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json
+        FROM json_each(?1) AS j
+        JOIN documents d ON d.id = j.value
+        JOIN projects p ON p.id = d.project_id
+        ORDER BY j.key
+    "#;
+
+    let mut stmt = conn.prepare(sql)?;
+    let rows = stmt
+        .query_map([ids_json], |row| {
+            let labels_json: String = row.get(9)?;
+            let paths_json: String = row.get(10)?;
+
+            Ok(HydratedRow {
+                document_id: row.get(0)?,
+                source_type: row.get(1)?,
+                title: row.get(2)?,
+                url: row.get(3)?,
+                author: row.get(4)?,
+                created_at: row.get(5)?,
+                updated_at: row.get(6)?,
+                content_text: row.get(7)?,
+                project_path: row.get(8)?,
+                labels: parse_json_array(&labels_json),
+                paths: parse_json_array(&paths_json),
+            })
+        })?
+        .collect::<std::result::Result<Vec<_>, _>>()?;
+
+    Ok(rows)
+}
+
+/// Parse a JSON array string into a Vec<String>, filtering out null/empty.
+fn parse_json_array(json: &str) -> Vec<String> {
+    serde_json::from_str::<Vec<serde_json::Value>>(json)
+        .unwrap_or_default()
+        .into_iter()
+        .filter_map(|v| v.as_str().map(|s| s.to_string()))
+        .filter(|s| !s.is_empty())
+        .collect()
+}
+
+/// Print human-readable search results.
+pub fn print_search_results(response: &SearchResponse) {
+    if !response.warnings.is_empty() {
+        for w in &response.warnings {
+            eprintln!("{} {}", style("Warning:").yellow(), w);
+        }
+    }
+
+    if response.results.is_empty() {
+        println!(
+            "No results found for '{}'",
+            style(&response.query).bold()
+        );
+        return;
+    }
+
+    println!(
+        "{} results for '{}' ({})",
+        response.total_results,
+        style(&response.query).bold(),
+        response.mode
+    );
+    println!();
+
+    for (i, result) in response.results.iter().enumerate() {
+        let type_prefix = match result.source_type.as_str() {
+            "issue" => "Issue",
+            "merge_request" => "MR",
+            "discussion" => "Discussion",
+            _ => &result.source_type,
+        };
+
+        println!(
+            "[{}] {} - {} (score: {:.2})",
+            i + 1,
+            style(type_prefix).cyan(),
+            result.title,
+            result.score
+        );
+
+        if let Some(ref url) = result.url {
+            println!("    {}", style(url).dim());
+        }
+
+        println!(
+            "    {} | {}",
+            style(&result.project_path).dim(),
+            result
+                .author
+                .as_deref()
+                .map(|a| format!("@{}", a))
+                .unwrap_or_default()
+        );
+
+        if !result.labels.is_empty() {
+            println!(
+                "    Labels: {}",
+                result.labels.join(", ")
+            );
+        }
+
+        // Strip HTML tags from snippet for terminal display
+        let clean_snippet = result
+            .snippet
+            .replace("<mark>", "")
+            .replace("</mark>", "");
+        println!("    {}", style(clean_snippet).dim());
+
+        if let Some(ref explain) = result.explain {
+            println!(
+                "    {} fts_rank={} rrf_score={:.6}",
+                style("[explain]").magenta(),
+                explain
+                    .fts_rank
+                    .map(|r| r.to_string())
+                    .unwrap_or_else(|| "-".into()),
+                explain.rrf_score
+            );
+        }
+
+        println!();
+    }
+}
+
+/// JSON output structures.
+#[derive(Serialize)]
+struct SearchJsonOutput<'a> {
+    ok: bool,
+    data: &'a SearchResponse,
+    meta: SearchMeta,
+}
+
+#[derive(Serialize)]
+struct SearchMeta {
+    elapsed_ms: u64,
+}
+
+/// Print JSON robot-mode output.
+pub fn print_search_results_json(response: &SearchResponse, elapsed_ms: u64) {
+    let output = SearchJsonOutput {
+        ok: true,
+        data: response,
+        meta: SearchMeta { elapsed_ms },
+    };
+    println!("{}", serde_json::to_string(&output).unwrap());
+}
diff --git a/src/cli/commands/show.rs b/src/cli/commands/show.rs
index 48b03ca..012d1b6 100644
--- a/src/cli/commands/show.rs
+++ b/src/cli/commands/show.rs
@@ -6,7 +6,7 @@ use serde::Serialize;
 
 use crate::Config;
 use crate::core::db::create_connection;
-use crate::core::error::{GiError, Result};
+use crate::core::error::{LoreError, Result};
 use crate::core::paths::get_db_path;
 use crate::core::time::ms_to_iso;
 
@@ -188,11 +188,11 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
         .collect::<std::result::Result<Vec<_>, _>>()?;
 
     match issues.len() {
-        0 => Err(GiError::NotFound(format!("Issue #{} not found", iid))),
+        0 => Err(LoreError::NotFound(format!("Issue #{} not found", iid))),
         1 => Ok(issues.into_iter().next().unwrap()),
         _ => {
             let projects: Vec<String> = issues.iter().map(|i| i.project_path.clone()).collect();
-            Err(GiError::Ambiguous(format!(
+            Err(LoreError::Ambiguous(format!(
                 "Issue #{} exists in multiple projects: {}. Use --project to specify.",
                 iid,
                 projects.join(", ")
@@ -386,11 +386,11 @@ fn find_mr(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Result<
         .collect::<std::result::Result<Vec<_>, _>>()?;
 
     match mrs.len() {
-        0 => Err(GiError::NotFound(format!("MR !{} not found", iid))),
+        0 => Err(LoreError::NotFound(format!("MR !{} not found", iid))),
         1 => Ok(mrs.into_iter().next().unwrap()),
         _ => {
             let projects: Vec<String> = mrs.iter().map(|m| m.project_path.clone()).collect();
-            Err(GiError::Ambiguous(format!(
+            Err(LoreError::Ambiguous(format!(
                 "MR !{} exists in multiple projects: {}. Use --project to specify.",
                 iid,
                 projects.join(", ")
diff --git a/src/cli/commands/stats.rs b/src/cli/commands/stats.rs
new file mode 100644
index 0000000..8ec3e46
--- /dev/null
+++ b/src/cli/commands/stats.rs
@@ -0,0 +1,348 @@
+//! Stats command: document counts, embedding coverage, queue status, integrity checks.
+
+use console::style;
+use rusqlite::Connection;
+use serde::Serialize;
+
+use crate::core::db::create_connection;
+use crate::core::error::Result;
+use crate::core::paths::get_db_path;
+use crate::Config;
+
+/// Result of the stats command.
+#[derive(Debug, Default, Serialize)]
+pub struct StatsResult {
+    pub documents: DocumentStats,
+    pub embeddings: EmbeddingStats,
+    pub fts: FtsStats,
+    pub queues: QueueStats,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub integrity: Option<IntegrityResult>,
+}
+
+#[derive(Debug, Default, Serialize)]
+pub struct DocumentStats {
+    pub total: i64,
+    pub issues: i64,
+    pub merge_requests: i64,
+    pub discussions: i64,
+    pub truncated: i64,
+}
+
+#[derive(Debug, Default, Serialize)]
+pub struct EmbeddingStats {
+    pub embedded_documents: i64,
+    pub total_chunks: i64,
+    pub coverage_pct: f64,
+}
+
+#[derive(Debug, Default, Serialize)]
+pub struct FtsStats {
+    pub indexed: i64,
+}
+
+#[derive(Debug, Default, Serialize)]
+pub struct QueueStats {
+    pub dirty_sources: i64,
+    pub dirty_sources_failed: i64,
+    pub pending_discussion_fetches: i64,
+    pub pending_discussion_fetches_failed: i64,
+}
+
+#[derive(Debug, Default, Serialize)]
+pub struct IntegrityResult {
+    pub ok: bool,
+    pub fts_doc_mismatch: bool,
+    pub orphan_embeddings: i64,
+    pub stale_metadata: i64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub repair: Option<RepairResult>,
+}
+
+#[derive(Debug, Default, Serialize)]
+pub struct RepairResult {
+    pub fts_rebuilt: bool,
+    pub orphans_deleted: i64,
+    pub stale_cleared: i64,
+}
+
+/// Run the stats command.
+pub fn run_stats(
+    config: &Config,
+    check: bool,
+    repair: bool,
+) -> Result<StatsResult> {
+    let db_path = get_db_path(config.storage.db_path.as_deref());
+    let conn = create_connection(&db_path)?;
+
+    let mut result = StatsResult::default();
+
+    // Document counts
+    result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?;
+    result.documents.issues =
+        count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'")?;
+    result.documents.merge_requests =
+        count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'")?;
+    result.documents.discussions =
+        count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'")?;
+    result.documents.truncated =
+        count_query(&conn, "SELECT COUNT(*) FROM documents WHERE is_truncated = 1")?;
+
+    // Embedding stats — skip gracefully if table doesn't exist (Gate A only)
+    if table_exists(&conn, "embedding_metadata") {
+        let embedded = count_query(
+            &conn,
+            "SELECT COUNT(DISTINCT document_id) FROM embedding_metadata WHERE last_error IS NULL",
+        )?;
+        let chunks = count_query(
+            &conn,
+            "SELECT COUNT(*) FROM embedding_metadata WHERE last_error IS NULL",
+        )?;
+        result.embeddings.embedded_documents = embedded;
+        result.embeddings.total_chunks = chunks;
+        result.embeddings.coverage_pct = if result.documents.total > 0 {
+            (embedded as f64 / result.documents.total as f64) * 100.0
+        } else {
+            0.0
+        };
+    }
+
+    // FTS stats
+    result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?;
+
+    // Queue stats
+    result.queues.dirty_sources =
+        count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL")?;
+    result.queues.dirty_sources_failed =
+        count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL")?;
+
+    if table_exists(&conn, "pending_discussion_fetches") {
+        result.queues.pending_discussion_fetches = count_query(
+            &conn,
+            "SELECT COUNT(*) FROM pending_discussion_fetches WHERE last_error IS NULL",
+        )?;
+        result.queues.pending_discussion_fetches_failed = count_query(
+            &conn,
+            "SELECT COUNT(*) FROM pending_discussion_fetches WHERE last_error IS NOT NULL",
+        )?;
+    }
+
+    // Integrity check
+    if check {
+        let mut integrity = IntegrityResult::default();
+
+        // FTS/doc count mismatch
+        integrity.fts_doc_mismatch = result.fts.indexed != result.documents.total;
+
+        // Orphan embeddings (rowid/1000 should match a document ID)
+        if table_exists(&conn, "embeddings") {
+            integrity.orphan_embeddings = count_query(
+                &conn,
+                "SELECT COUNT(*) FROM embedding_metadata em
+                 WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = em.document_id)",
+            )?;
+        }
+
+        // Stale metadata (document_hash != current content_hash)
+        if table_exists(&conn, "embedding_metadata") {
+            integrity.stale_metadata = count_query(
+                &conn,
+                "SELECT COUNT(*) FROM embedding_metadata em
+                 JOIN documents d ON d.id = em.document_id
+                 WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash",
+            )?;
+        }
+
+        integrity.ok = !integrity.fts_doc_mismatch
+            && integrity.orphan_embeddings == 0
+            && integrity.stale_metadata == 0;
+
+        // Repair
+        if repair {
+            let mut repair_result = RepairResult::default();
+
+            if integrity.fts_doc_mismatch {
+                conn.execute(
+                    "INSERT INTO documents_fts(documents_fts) VALUES('rebuild')",
+                    [],
+                )?;
+                repair_result.fts_rebuilt = true;
+            }
+
+            if integrity.orphan_embeddings > 0 && table_exists(&conn, "embedding_metadata") {
+                let deleted = conn.execute(
+                    "DELETE FROM embedding_metadata
+                     WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)",
+                    [],
+                )?;
+                repair_result.orphans_deleted = deleted as i64;
+
+                // Also clean orphaned vectors if vec0 table exists
+                if table_exists(&conn, "embeddings") {
+                    let _ = conn.execute(
+                        "DELETE FROM embeddings
+                         WHERE rowid / 1000 NOT IN (SELECT id FROM documents)",
+                        [],
+                    );
+                }
+            }
+
+            if integrity.stale_metadata > 0 && table_exists(&conn, "embedding_metadata") {
+                let cleared = conn.execute(
+                    "DELETE FROM embedding_metadata
+                     WHERE document_id IN (
+                         SELECT em.document_id FROM embedding_metadata em
+                         JOIN documents d ON d.id = em.document_id
+                         WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash
+                     )",
+                    [],
+                )?;
+                repair_result.stale_cleared = cleared as i64;
+            }
+
+            integrity.repair = Some(repair_result);
+        }
+
+        result.integrity = Some(integrity);
+    }
+
+    Ok(result)
+}
+
+fn count_query(conn: &Connection, sql: &str) -> Result<i64> {
+    let count: i64 = conn
+        .query_row(sql, [], |row| row.get(0))
+        .unwrap_or(0);
+    Ok(count)
+}
+
+fn table_exists(conn: &Connection, table: &str) -> bool {
+    conn.query_row(
+        "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
+        [table],
+        |row| row.get::<_, i64>(0),
+    )
+    .unwrap_or(0)
+        > 0
+}
+
+/// Print human-readable stats.
+pub fn print_stats(result: &StatsResult) {
+    println!("{}", style("Documents").cyan().bold());
+    println!("  Total:           {}", result.documents.total);
+    println!("  Issues:          {}", result.documents.issues);
+    println!("  Merge Requests:  {}", result.documents.merge_requests);
+    println!("  Discussions:     {}", result.documents.discussions);
+    if result.documents.truncated > 0 {
+        println!("  Truncated:       {}", style(result.documents.truncated).yellow());
+    }
+    println!();
+
+    println!("{}", style("Search Index").cyan().bold());
+    println!("  FTS indexed:     {}", result.fts.indexed);
+    println!(
+        "  Embedding coverage: {:.1}% ({}/{})",
+        result.embeddings.coverage_pct,
+        result.embeddings.embedded_documents,
+        result.documents.total
+    );
+    if result.embeddings.total_chunks > 0 {
+        println!("  Total chunks:    {}", result.embeddings.total_chunks);
+    }
+    println!();
+
+    println!("{}", style("Queues").cyan().bold());
+    println!("  Dirty sources:   {} pending, {} failed",
+        result.queues.dirty_sources,
+        result.queues.dirty_sources_failed
+    );
+    println!("  Discussion fetch: {} pending, {} failed",
+        result.queues.pending_discussion_fetches,
+        result.queues.pending_discussion_fetches_failed
+    );
+
+    if let Some(ref integrity) = result.integrity {
+        println!();
+        let status = if integrity.ok {
+            style("OK").green().bold()
+        } else {
+            style("ISSUES FOUND").red().bold()
+        };
+        println!("{} Integrity: {}", style("Check").cyan().bold(), status);
+
+        if integrity.fts_doc_mismatch {
+            println!("  {} FTS/document count mismatch", style("!").red());
+        }
+        if integrity.orphan_embeddings > 0 {
+            println!(
+                "  {} {} orphan embeddings",
+                style("!").red(),
+                integrity.orphan_embeddings
+            );
+        }
+        if integrity.stale_metadata > 0 {
+            println!(
+                "  {} {} stale embedding metadata",
+                style("!").red(),
+                integrity.stale_metadata
+            );
+        }
+
+        if let Some(ref repair) = integrity.repair {
+            println!();
+            println!("{}", style("Repair").cyan().bold());
+            if repair.fts_rebuilt {
+                println!("  {} FTS index rebuilt", style("fixed").green());
+            }
+            if repair.orphans_deleted > 0 {
+                println!(
+                    "  {} {} orphan embeddings deleted",
+                    style("fixed").green(),
+                    repair.orphans_deleted
+                );
+            }
+            if repair.stale_cleared > 0 {
+                println!(
+                    "  {} {} stale metadata entries cleared",
+                    style("fixed").green(),
+                    repair.stale_cleared
+                );
+            }
+            if !repair.fts_rebuilt && repair.orphans_deleted == 0 && repair.stale_cleared == 0 {
+                println!("  No issues to repair.");
+            }
+        }
+    }
+}
+
+/// JSON output structures.
+#[derive(Serialize)]
+struct StatsJsonOutput {
+    ok: bool,
+    data: StatsResult,
+}
+
+/// Print JSON robot-mode output.
+pub fn print_stats_json(result: &StatsResult) {
+    let output = StatsJsonOutput {
+        ok: true,
+        data: StatsResult {
+            documents: DocumentStats { ..*&result.documents },
+            embeddings: EmbeddingStats { ..*&result.embeddings },
+            fts: FtsStats { ..*&result.fts },
+            queues: QueueStats { ..*&result.queues },
+            integrity: result.integrity.as_ref().map(|i| IntegrityResult {
+                ok: i.ok,
+                fts_doc_mismatch: i.fts_doc_mismatch,
+                orphan_embeddings: i.orphan_embeddings,
+                stale_metadata: i.stale_metadata,
+                repair: i.repair.as_ref().map(|r| RepairResult {
+                    fts_rebuilt: r.fts_rebuilt,
+                    orphans_deleted: r.orphans_deleted,
+                    stale_cleared: r.stale_cleared,
+                }),
+            }),
+        },
+    };
+    println!("{}", serde_json::to_string(&output).unwrap());
+}
diff --git a/src/cli/commands/sync.rs b/src/cli/commands/sync.rs
new file mode 100644
index 0000000..84ffd0d
--- /dev/null
+++ b/src/cli/commands/sync.rs
@@ -0,0 +1,124 @@
+//! Sync command: unified orchestrator for ingest -> generate-docs -> embed.
+
+use console::style;
+use serde::Serialize;
+use tracing::{info, warn};
+
+use crate::Config;
+use crate::core::error::Result;
+
+use super::embed::run_embed;
+use super::generate_docs::run_generate_docs;
+use super::ingest::run_ingest;
+
+/// Options for the sync command.
+#[derive(Debug, Default)]
+pub struct SyncOptions {
+    pub full: bool,
+    pub force: bool,
+    pub no_embed: bool,
+    pub no_docs: bool,
+}
+
+/// Result of the sync command.
+#[derive(Debug, Default, Serialize)]
+pub struct SyncResult {
+    pub issues_updated: usize,
+    pub mrs_updated: usize,
+    pub discussions_fetched: usize,
+    pub documents_regenerated: usize,
+    pub documents_embedded: usize,
+}
+
+/// Run the full sync pipeline: ingest -> generate-docs -> embed.
+pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResult> {
+    let mut result = SyncResult::default();
+
+    // Stage 1: Ingest issues
+    info!("Sync stage 1/4: ingesting issues");
+    let issues_result = run_ingest(config, "issues", None, options.force, options.full, true).await?;
+    result.issues_updated = issues_result.issues_upserted;
+    result.discussions_fetched += issues_result.discussions_fetched;
+
+    // Stage 2: Ingest MRs
+    info!("Sync stage 2/4: ingesting merge requests");
+    let mrs_result = run_ingest(config, "mrs", None, options.force, options.full, true).await?;
+    result.mrs_updated = mrs_result.mrs_upserted;
+    result.discussions_fetched += mrs_result.discussions_fetched;
+
+    // Stage 3: Generate documents (unless --no-docs)
+    if options.no_docs {
+        info!("Sync stage 3/4: skipping document generation (--no-docs)");
+    } else {
+        info!("Sync stage 3/4: generating documents");
+        let docs_result = run_generate_docs(config, false, None)?;
+        result.documents_regenerated = docs_result.regenerated;
+    }
+
+    // Stage 4: Embed documents (unless --no-embed)
+    if options.no_embed {
+        info!("Sync stage 4/4: skipping embedding (--no-embed)");
+    } else {
+        info!("Sync stage 4/4: embedding documents");
+        match run_embed(config, false).await {
+            Ok(embed_result) => {
+                result.documents_embedded = embed_result.embedded;
+            }
+            Err(e) => {
+                // Graceful degradation: Ollama down is a warning, not an error
+                warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
+            }
+        }
+    }
+
+    info!(
+        issues = result.issues_updated,
+        mrs = result.mrs_updated,
+        discussions = result.discussions_fetched,
+        docs = result.documents_regenerated,
+        embedded = result.documents_embedded,
+        "Sync pipeline complete"
+    );
+
+    Ok(result)
+}
+
+/// Print human-readable sync summary.
+pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
+    println!(
+        "{} Sync complete:",
+        style("done").green().bold(),
+    );
+    println!("  Issues updated:           {}", result.issues_updated);
+    println!("  MRs updated:              {}", result.mrs_updated);
+    println!("  Discussions fetched:       {}", result.discussions_fetched);
+    println!("  Documents regenerated:     {}", result.documents_regenerated);
+    println!("  Documents embedded:        {}", result.documents_embedded);
+    println!(
+        "  Elapsed: {:.1}s",
+        elapsed.as_secs_f64()
+    );
+}
+
+/// JSON output for sync.
+#[derive(Serialize)]
+struct SyncJsonOutput<'a> {
+    ok: bool,
+    data: &'a SyncResult,
+    meta: SyncMeta,
+}
+
+#[derive(Serialize)]
+struct SyncMeta {
+    elapsed_ms: u64,
+}
+
+/// Print JSON robot-mode sync output.
+pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64) {
+    let output = SyncJsonOutput {
+        ok: true,
+        data: result,
+        meta: SyncMeta { elapsed_ms },
+    };
+    println!("{}", serde_json::to_string(&output).unwrap());
+}
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
index c6c2ce0..c01a894 100644
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@@ -69,6 +69,18 @@ pub enum Commands {
         /// Fail if prompts would be shown
         #[arg(long)]
         non_interactive: bool,
+
+        /// GitLab base URL (required in robot mode)
+        #[arg(long)]
+        gitlab_url: Option<String>,
+
+        /// Environment variable name holding GitLab token (required in robot mode)
+        #[arg(long)]
+        token_env_var: Option<String>,
+
+        /// Comma-separated project paths (required in robot mode)
+        #[arg(long)]
+        projects: Option<String>,
     },
 
     /// Create timestamped database backup
@@ -81,9 +93,32 @@ pub enum Commands {
         yes: bool,
     },
 
+    /// Search indexed documents
+    Search(SearchArgs),
+
+    /// Show document and index statistics
+    Stats(StatsArgs),
+
+    /// Generate searchable documents from ingested data
+    #[command(name = "generate-docs")]
+    GenerateDocs(GenerateDocsArgs),
+
+    /// Generate vector embeddings for documents via Ollama
+    Embed(EmbedArgs),
+
+    /// Run full sync pipeline: ingest -> generate-docs -> embed
+    Sync(SyncArgs),
+
     /// Run pending database migrations
     Migrate,
 
+    /// Quick health check: config, database, schema version
+    Health,
+
+    /// Machine-readable command manifest for agent self-discovery
+    #[command(name = "robot-docs")]
+    RobotDocs,
+
     // --- Hidden backward-compat aliases ---
     /// List issues or MRs (deprecated: use 'lore issues' or 'lore mrs')
     #[command(hide = true)]
@@ -299,6 +334,109 @@ pub struct IngestArgs {
     pub full: bool,
 }
 
+/// Arguments for `lore stats`
+#[derive(Parser)]
+pub struct StatsArgs {
+    /// Run integrity checks
+    #[arg(long)]
+    pub check: bool,
+
+    /// Repair integrity issues (requires --check)
+    #[arg(long, requires = "check")]
+    pub repair: bool,
+}
+
+/// Arguments for `lore search <QUERY>`
+#[derive(Parser)]
+pub struct SearchArgs {
+    /// Search query string
+    pub query: String,
+
+    /// Search mode (lexical, hybrid, semantic)
+    #[arg(long, default_value = "hybrid")]
+    pub mode: String,
+
+    /// Filter by source type (issue, mr, discussion)
+    #[arg(long = "type", value_name = "TYPE")]
+    pub source_type: Option<String>,
+
+    /// Filter by author username
+    #[arg(long)]
+    pub author: Option<String>,
+
+    /// Filter by project path
+    #[arg(short = 'p', long)]
+    pub project: Option<String>,
+
+    /// Filter by label (repeatable, AND logic)
+    #[arg(long, action = clap::ArgAction::Append)]
+    pub label: Vec<String>,
+
+    /// Filter by file path (trailing / for prefix match)
+    #[arg(long)]
+    pub path: Option<String>,
+
+    /// Filter by created after (7d, 2w, or YYYY-MM-DD)
+    #[arg(long)]
+    pub after: Option<String>,
+
+    /// Filter by updated after (7d, 2w, or YYYY-MM-DD)
+    #[arg(long = "updated-after")]
+    pub updated_after: Option<String>,
+
+    /// Maximum results (default 20, max 100)
+    #[arg(short = 'n', long = "limit", default_value = "20")]
+    pub limit: usize,
+
+    /// Show ranking explanation per result
+    #[arg(long)]
+    pub explain: bool,
+
+    /// FTS query mode: safe (default) or raw
+    #[arg(long = "fts-mode", default_value = "safe")]
+    pub fts_mode: String,
+}
+
+/// Arguments for `lore generate-docs`
+#[derive(Parser)]
+pub struct GenerateDocsArgs {
+    /// Full rebuild: seed all entities into dirty queue, then drain
+    #[arg(long)]
+    pub full: bool,
+
+    /// Filter to single project
+    #[arg(short = 'p', long)]
+    pub project: Option<String>,
+}
+
+/// Arguments for `lore sync`
+#[derive(Parser)]
+pub struct SyncArgs {
+    /// Reset cursors, fetch everything
+    #[arg(long)]
+    pub full: bool,
+
+    /// Override stale lock
+    #[arg(long)]
+    pub force: bool,
+
+    /// Skip embedding step
+    #[arg(long)]
+    pub no_embed: bool,
+
+    /// Skip document regeneration
+    #[arg(long)]
+    pub no_docs: bool,
+}
+
+/// Arguments for `lore embed`
+#[derive(Parser)]
+pub struct EmbedArgs {
+    /// Retry previously failed embeddings
+    #[arg(long)]
+    pub retry_failed: bool,
+}
+
 /// Arguments for `lore count <ENTITY>`
 #[derive(Parser)]
 pub struct CountArgs {
diff --git a/src/main.rs b/src/main.rs
index dbee8df..237689c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,17 +10,23 @@ use tracing_subscriber::util::SubscriberInitExt;
 
 use lore::Config;
 use lore::cli::commands::{
-    InitInputs, InitOptions, ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser,
-    print_count, print_count_json, print_doctor_results, print_ingest_summary,
-    print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs,
-    print_list_mrs_json, print_show_issue, print_show_issue_json, print_show_mr,
+    InitInputs, InitOptions, InitResult, ListFilters, MrListFilters, SearchCliFilters, open_issue_in_browser,
+    open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_generate_docs,
+    print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues,
+    print_list_issues_json, print_list_mrs, print_list_mrs_json, print_search_results,
+    print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr, print_stats,
+    print_stats_json,
+    print_embed, print_embed_json, print_sync, print_sync_json,
     print_show_mr_json, print_sync_status, print_sync_status_json, run_auth_test, run_count,
-    run_doctor, run_ingest, run_init, run_list_issues, run_list_mrs, run_show_issue, run_show_mr,
-    run_sync_status,
+    run_doctor, run_embed, run_generate_docs, run_ingest, run_init, run_list_issues, run_list_mrs,
+    run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, SyncOptions,
+};
+use lore::cli::{
+    Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
+    SearchArgs, StatsArgs, SyncArgs,
 };
-use lore::cli::{Cli, Commands, CountArgs, IngestArgs, IssuesArgs, MrsArgs};
 use lore::core::db::{create_connection, get_schema_version, run_migrations};
-use lore::core::error::{GiError, RobotErrorOutput};
+use lore::core::error::{LoreError, RobotErrorOutput};
 use lore::core::paths::get_config_path;
 use lore::core::paths::get_db_path;
 
@@ -49,6 +55,10 @@ async fn main() {
     let result = match cli.command {
         Commands::Issues(args) => handle_issues(cli.config.as_deref(), args, robot_mode).await,
         Commands::Mrs(args) => handle_mrs(cli.config.as_deref(), args, robot_mode).await,
+        Commands::Search(args) => handle_search(cli.config.as_deref(), args, robot_mode).await,
+        Commands::Stats(args) => handle_stats(cli.config.as_deref(), args, robot_mode).await,
+        Commands::Embed(args) => handle_embed(cli.config.as_deref(), args, robot_mode).await,
+        Commands::Sync(args) => handle_sync_cmd(cli.config.as_deref(), args, robot_mode).await,
         Commands::Ingest(args) => handle_ingest(cli.config.as_deref(), args, robot_mode).await,
         Commands::Count(args) => {
             handle_count(cli.config.as_deref(), args, robot_mode).await
@@ -60,10 +70,29 @@ async fn main() {
         Commands::Init {
             force,
             non_interactive,
-        } => handle_init(cli.config.as_deref(), force, non_interactive, robot_mode).await,
+            gitlab_url,
+            token_env_var,
+            projects,
+        } => {
+            handle_init(
+                cli.config.as_deref(),
+                force,
+                non_interactive,
+                robot_mode,
+                gitlab_url,
+                token_env_var,
+                projects,
+            )
+            .await
+        }
+        Commands::GenerateDocs(args) => {
+            handle_generate_docs(cli.config.as_deref(), args, robot_mode).await
+        }
         Commands::Backup => handle_backup(robot_mode),
         Commands::Reset { yes: _ } => handle_reset(robot_mode),
         Commands::Migrate => handle_migrate(cli.config.as_deref(), robot_mode).await,
+        Commands::Health => handle_health(cli.config.as_deref(), robot_mode).await,
+        Commands::RobotDocs => handle_robot_docs(robot_mode),
 
         // --- Backward-compat: deprecated aliases ---
         Commands::List {
@@ -159,7 +188,7 @@ async fn main() {
     }
 }
 
-/// Fallback error output for non-GiError errors in robot mode.
+/// Fallback error output for non-LoreError errors in robot mode.
 #[derive(Serialize)]
 struct FallbackErrorOutput {
     error: FallbackError,
@@ -172,8 +201,8 @@ struct FallbackError {
 }
 
 fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
-    // Try to downcast to GiError for structured output
-    if let Some(gi_error) = e.downcast_ref::<GiError>() {
+    // Try to downcast to LoreError for structured output
+    if let Some(gi_error) = e.downcast_ref::<LoreError>() {
         if robot_mode {
             let output = RobotErrorOutput::from(gi_error);
             // Use serde_json for safe serialization; fallback constructs JSON safely
@@ -201,7 +230,7 @@ fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
         }
     }
 
-    // Fallback for non-GiError errors - use serde for proper JSON escaping
+    // Fallback for non-LoreError errors - use serde for proper JSON escaping
     if robot_mode {
         let output = FallbackErrorOutput {
             error: FallbackError {
@@ -473,22 +502,123 @@ async fn handle_sync_status_cmd(
     Ok(())
 }
 
+/// JSON output for init command.
+#[derive(Serialize)]
+struct InitOutput {
+    ok: bool,
+    data: InitOutputData,
+}
+
+#[derive(Serialize)]
+struct InitOutputData {
+    config_path: String,
+    data_dir: String,
+    user: InitOutputUser,
+    projects: Vec<InitOutputProject>,
+}
+
+#[derive(Serialize)]
+struct InitOutputUser {
+    username: String,
+    name: String,
+}
+
+#[derive(Serialize)]
+struct InitOutputProject {
+    path: String,
+    name: String,
+}
+
+fn print_init_json(result: &InitResult) {
+    let output = InitOutput {
+        ok: true,
+        data: InitOutputData {
+            config_path: result.config_path.clone(),
+            data_dir: result.data_dir.clone(),
+            user: InitOutputUser {
+                username: result.user.username.clone(),
+                name: result.user.name.clone(),
+            },
+            projects: result
+                .projects
+                .iter()
+                .map(|p| InitOutputProject {
+                    path: p.path.clone(),
+                    name: p.name.clone(),
+                })
+                .collect(),
+        },
+    };
+    println!("{}", serde_json::to_string(&output).unwrap());
+}
+
 async fn handle_init(
     config_override: Option<&str>,
     force: bool,
     non_interactive: bool,
-    _robot_mode: bool, // TODO: Add robot mode support for init (requires non-interactive implementation)
+    robot_mode: bool,
+    gitlab_url_flag: Option<String>,
+    token_env_var_flag: Option<String>,
+    projects_flag: Option<String>,
 ) -> Result<(), Box<dyn std::error::Error>> {
+    // Robot mode: require all inputs via flags, skip interactive prompts
+    if robot_mode {
+        let missing: Vec<&str> = [
+            gitlab_url_flag.is_none().then_some("--gitlab-url"),
+            token_env_var_flag.is_none().then_some("--token-env-var"),
+            projects_flag.is_none().then_some("--projects"),
+        ]
+        .into_iter()
+        .flatten()
+        .collect();
+
+        if !missing.is_empty() {
+            let output = RobotErrorWithSuggestion {
+                error: RobotErrorSuggestionData {
+                    code: "MISSING_FLAGS".to_string(),
+                    message: format!("Robot mode requires flags: {}", missing.join(", ")),
+                    suggestion: "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project".to_string(),
+                },
+            };
+            eprintln!("{}", serde_json::to_string(&output)?);
+            std::process::exit(2);
+        }
+
+        let project_paths: Vec<String> = projects_flag
+            .unwrap()
+            .split(',')
+            .map(|p| p.trim().to_string())
+            .filter(|p| !p.is_empty())
+            .collect();
+
+        let result = run_init(
+            InitInputs {
+                gitlab_url: gitlab_url_flag.unwrap(),
+                token_env_var: token_env_var_flag.unwrap(),
+                project_paths,
+            },
+            InitOptions {
+                config_path: config_override.map(String::from),
+                force: true,
+                non_interactive: true,
+            },
+        )
+        .await?;
+
+        print_init_json(&result);
+        return Ok(());
+    }
+
+    // Human mode: interactive prompts
     let config_path = get_config_path(config_override);
     let mut confirmed_overwrite = force;
 
-    // Check if config exists and handle overwrite
-    if config_path.exists() {
+    if config_path.exists() && !force {
         if non_interactive {
             eprintln!(
                 "{}",
                 style(format!(
-                    "Config file exists at {}. Cannot proceed in non-interactive mode.",
+                    "Config file exists at {}. Use --force to overwrite.",
                     config_path.display()
                 ))
                 .red()
@@ -496,59 +626,70 @@ async fn handle_init(
             std::process::exit(2);
         }
 
-        if !force {
-            let confirm = Confirm::new()
-                .with_prompt(format!(
-                    "Config file exists at {}. Overwrite?",
-                    config_path.display()
-                ))
-                .default(false)
-                .interact()?;
+        let confirm = Confirm::new()
+            .with_prompt(format!(
+                "Config file exists at {}. Overwrite?",
+                config_path.display()
+            ))
+            .default(false)
+            .interact()?;
 
-            if !confirm {
-                println!("{}", style("Cancelled.").yellow());
-                std::process::exit(2);
-            }
-            confirmed_overwrite = true;
+        if !confirm {
+            println!("{}", style("Cancelled.").yellow());
+            std::process::exit(2);
         }
+        confirmed_overwrite = true;
     }
 
-    // Prompt for GitLab URL
-    let gitlab_url: String = Input::new()
-        .with_prompt("GitLab URL")
-        .default("https://gitlab.com".to_string())
-        .validate_with(|input: &String| -> Result<(), &str> {
-            if url::Url::parse(input).is_ok() {
-                Ok(())
-            } else {
-                Err("Please enter a valid URL")
-            }
-        })
-        .interact_text()?;
+    let gitlab_url: String = if let Some(url) = gitlab_url_flag {
+        url
+    } else {
+        Input::new()
+            .with_prompt("GitLab URL")
+            .default("https://gitlab.com".to_string())
+            .validate_with(|input: &String| -> Result<(), &str> {
+                if url::Url::parse(input).is_ok() {
+                    Ok(())
+                } else {
+                    Err("Please enter a valid URL")
+                }
+            })
+            .interact_text()?
+    };
 
-    // Prompt for token env var
-    let token_env_var: String = Input::new()
-        .with_prompt("Token environment variable name")
-        .default("GITLAB_TOKEN".to_string())
-        .interact_text()?;
+    let token_env_var: String = if let Some(var) = token_env_var_flag {
+        var
+    } else {
+        Input::new()
+            .with_prompt("Token environment variable name")
+            .default("GITLAB_TOKEN".to_string())
+            .interact_text()?
+    };
 
-    // Prompt for project paths
-    let project_paths_input: String = Input::new()
-        .with_prompt("Project paths (comma-separated, e.g., group/project)")
-        .validate_with(|input: &String| -> Result<(), &str> {
-            if input.trim().is_empty() {
-                Err("Please enter at least one project path")
-            } else {
-                Ok(())
-            }
-        })
-        .interact_text()?;
+    let project_paths: Vec<String> = if let Some(projects) = projects_flag {
+        projects
+            .split(',')
+            .map(|p| p.trim().to_string())
+            .filter(|p| !p.is_empty())
+            .collect()
+    } else {
+        let project_paths_input: String = Input::new()
+            .with_prompt("Project paths (comma-separated, e.g., group/project)")
+            .validate_with(|input: &String| -> Result<(), &str> {
+                if input.trim().is_empty() {
+                    Err("Please enter at least one project path")
+                } else {
+                    Ok(())
+                }
+            })
+            .interact_text()?;
 
-    let project_paths: Vec<String> = project_paths_input
-        .split(',')
-        .map(|p| p.trim().to_string())
-        .filter(|p| !p.is_empty())
-        .collect();
+        project_paths_input
+            .split(',')
+            .map(|p| p.trim().to_string())
+            .filter(|p| !p.is_empty())
+            .collect()
+    };
 
     println!("{}", style("\nValidating configuration...").blue());
 
@@ -840,6 +981,385 @@ async fn handle_migrate(
     Ok(())
 }
 
+async fn handle_stats(
+    config_override: Option<&str>,
+    args: StatsArgs,
+    robot_mode: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let config = Config::load(config_override)?;
+    let result = run_stats(&config, args.check, args.repair)?;
+    if robot_mode {
+        print_stats_json(&result);
+    } else {
+        print_stats(&result);
+    }
+    Ok(())
+}
+
+async fn handle_search(
+    config_override: Option<&str>,
+    args: SearchArgs,
+    robot_mode: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let config = Config::load(config_override)?;
+
+    let fts_mode = match args.fts_mode.as_str() {
+        "raw" => lore::search::FtsQueryMode::Raw,
+        _ => lore::search::FtsQueryMode::Safe,
+    };
+
+    let cli_filters = SearchCliFilters {
+        source_type: args.source_type,
+        author: args.author,
+        project: args.project,
+        labels: args.label,
+        path: args.path,
+        after: args.after,
+        updated_after: args.updated_after,
+        limit: args.limit,
+    };
+
+    let start = std::time::Instant::now();
+    let response = run_search(&config, &args.query, cli_filters, fts_mode, args.explain)?;
+    let elapsed_ms = start.elapsed().as_millis() as u64;
+
+    if robot_mode {
+        print_search_results_json(&response, elapsed_ms);
+    } else {
+        print_search_results(&response);
+    }
+    Ok(())
+}
+
+async fn handle_generate_docs(
+    config_override: Option<&str>,
+    args: GenerateDocsArgs,
+    robot_mode: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let config = Config::load(config_override)?;
+
+    let result = run_generate_docs(&config, args.full, args.project.as_deref())?;
+    if robot_mode {
+        print_generate_docs_json(&result);
+    } else {
+        print_generate_docs(&result);
+    }
+    Ok(())
+}
+
+async fn handle_embed(
+    config_override: Option<&str>,
+    args: EmbedArgs,
+    robot_mode: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let config = Config::load(config_override)?;
+    let result = run_embed(&config, args.retry_failed).await?;
+    if robot_mode {
+        print_embed_json(&result);
+    } else {
+        print_embed(&result);
+    }
+    Ok(())
+}
+
+async fn handle_sync_cmd(
+    config_override: Option<&str>,
+    args: SyncArgs,
+    robot_mode: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let config = Config::load(config_override)?;
+    let options = SyncOptions {
+        full: args.full,
+        force: args.force,
+        no_embed: args.no_embed,
+        no_docs: args.no_docs,
+    };
+
+    let start = std::time::Instant::now();
+    let result = run_sync(&config, options).await?;
+    let elapsed = start.elapsed();
+
+    if robot_mode {
+        print_sync_json(&result, elapsed.as_millis() as u64);
+    } else {
+        print_sync(&result, elapsed);
+    }
+    Ok(())
+}
+
+// ============================================================================
+// Health + Robot-docs handlers
+// ============================================================================
+
+/// JSON output for health command.
+#[derive(Serialize)]
+struct HealthOutput {
+    ok: bool,
+    data: HealthData,
+}
+
+#[derive(Serialize)]
+struct HealthData {
+    healthy: bool,
+    config_found: bool,
+    db_found: bool,
+    schema_current: bool,
+    schema_version: i32,
+}
+
+async fn handle_health(
+    config_override: Option<&str>,
+    robot_mode: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let config_path = get_config_path(config_override);
+    let config_found = config_path.exists();
+
+    let (db_found, schema_version, schema_current) = if config_found {
+        match Config::load(config_override) {
+            Ok(config) => {
+                let db_path = get_db_path(config.storage.db_path.as_deref());
+                if db_path.exists() {
+                    match create_connection(&db_path) {
+                        Ok(conn) => {
+                            let version = get_schema_version(&conn);
+                            let latest = 9; // Number of embedded migrations
+                            (true, version, version >= latest)
+                        }
+                        Err(_) => (true, 0, false),
+                    }
+                } else {
+                    (false, 0, false)
+                }
+            }
+            Err(_) => (false, 0, false),
+        }
+    } else {
+        (false, 0, false)
+    };
+
+    let healthy = config_found && db_found && schema_current;
+
+    if robot_mode {
+        let output = HealthOutput {
+            ok: true,
+            data: HealthData {
+                healthy,
+                config_found,
+                db_found,
+                schema_current,
+                schema_version,
+            },
+        };
+        println!("{}", serde_json::to_string(&output)?);
+    } else {
+        let status = |ok: bool| {
+            if ok {
+                style("pass").green()
+            } else {
+                style("FAIL").red()
+            }
+        };
+        println!("Config:  {} ({})", status(config_found), config_path.display());
+        println!("DB:      {}", status(db_found));
+        println!(
+            "Schema:  {} (v{})",
+            status(schema_current),
+            schema_version
+        );
+        println!();
+        if healthy {
+            println!("{}", style("Healthy").green().bold());
+        } else {
+            println!("{}", style("Unhealthy - run 'lore doctor' for details").red().bold());
+        }
+    }
+
+    if !healthy {
+        std::process::exit(1);
+    }
+
+    Ok(())
+}
+
+/// JSON output for robot-docs command.
+#[derive(Serialize)]
+struct RobotDocsOutput {
+    ok: bool,
+    data: RobotDocsData,
+}
+
+#[derive(Serialize)]
+struct RobotDocsData {
+    name: String,
+    version: String,
+    description: String,
+    activation: RobotDocsActivation,
+    commands: serde_json::Value,
+    exit_codes: serde_json::Value,
+    error_format: String,
+    workflows: serde_json::Value,
+}
+
+#[derive(Serialize)]
+struct RobotDocsActivation {
+    flags: Vec<String>,
+    env: String,
+    auto: String,
+}
+
+fn handle_robot_docs(robot_mode: bool) -> Result<(), Box<dyn std::error::Error>> {
+    let version = env!("CARGO_PKG_VERSION").to_string();
+
+    let commands = serde_json::json!({
+        "init": {
+            "description": "Initialize configuration and database",
+            "flags": ["--force", "--non-interactive", "--gitlab-url <URL>", "--token-env-var <VAR>", "--projects <paths>"],
+            "robot_flags": ["--gitlab-url", "--token-env-var", "--projects"],
+            "example": "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project"
+        },
+        "health": {
+            "description": "Quick pre-flight check: config, database, schema version",
+            "flags": [],
+            "example": "lore --robot health"
+        },
+        "auth": {
+            "description": "Verify GitLab authentication",
+            "flags": [],
+            "example": "lore --robot auth"
+        },
+        "doctor": {
+            "description": "Full environment health check (config, auth, DB, Ollama)",
+            "flags": [],
+            "example": "lore --robot doctor"
+        },
+        "ingest": {
+            "description": "Sync data from GitLab",
+            "flags": ["--project <path>", "--force", "--full", "<entity: issues|mrs>"],
+            "example": "lore --robot ingest issues --project group/repo"
+        },
+        "sync": {
+            "description": "Full sync pipeline: ingest -> generate-docs -> embed",
+            "flags": ["--full", "--force", "--no-embed", "--no-docs"],
+            "example": "lore --robot sync"
+        },
+        "issues": {
+            "description": "List or show issues",
+            "flags": ["<IID>", "--limit", "--state", "--project", "--author", "--assignee", "--label", "--milestone", "--since", "--due-before", "--has-due", "--sort", "--asc"],
+            "example": "lore --robot issues --state opened --limit 10"
+        },
+        "mrs": {
+            "description": "List or show merge requests",
+            "flags": ["<IID>", "--limit", "--state", "--project", "--author", "--assignee", "--reviewer", "--label", "--since", "--draft", "--no-draft", "--target", "--source", "--sort", "--asc"],
+            "example": "lore --robot mrs --state opened"
+        },
+        "search": {
+            "description": "Search indexed documents (lexical, hybrid, semantic)",
+            "flags": ["<QUERY>", "--mode", "--type", "--author", "--project", "--label", "--path", "--after", "--updated-after", "--limit", "--explain", "--fts-mode"],
+            "example": "lore --robot search 'authentication bug' --mode hybrid --limit 10"
+        },
+        "count": {
+            "description": "Count entities in local database",
+            "flags": ["<entity: issues|mrs|discussions|notes>", "--for <issue|mr>"],
+            "example": "lore --robot count issues"
+        },
+        "stats": {
+            "description": "Show document and index statistics",
+            "flags": ["--check", "--repair"],
+            "example": "lore --robot stats"
+        },
+        "status": {
+            "description": "Show sync state (cursors, last sync times)",
+            "flags": [],
+            "example": "lore --robot status"
+        },
+        "generate-docs": {
+            "description": "Generate searchable documents from ingested data",
+            "flags": ["--full", "--project <path>"],
+            "example": "lore --robot generate-docs --full"
+        },
+        "embed": {
+            "description": "Generate vector embeddings for documents via Ollama",
+            "flags": ["--retry-failed"],
+            "example": "lore --robot embed"
+        },
+        "migrate": {
+            "description": "Run pending database migrations",
+            "flags": [],
+            "example": "lore --robot migrate"
+        },
+        "version": {
+            "description": "Show version information",
+            "flags": [],
+            "example": "lore --robot version"
+        },
+        "robot-docs": {
+            "description": "This command (agent self-discovery manifest)",
+            "flags": [],
+            "example": "lore robot-docs"
+        }
+    });
+
+    let exit_codes = serde_json::json!({
+        "0": "Success",
+        "1": "Internal error / health check failed",
+        "2": "Config not found / missing flags",
+        "3": "Config invalid",
+        "4": "Token not set",
+        "5": "GitLab auth failed",
+        "6": "Resource not found",
+        "7": "Rate limited",
+        "8": "Network error",
+        "9": "Database locked",
+        "10": "Database error",
+        "11": "Migration failed",
+        "12": "I/O error",
+        "13": "Transform error"
+    });
+
+    let workflows = serde_json::json!({
+        "first_setup": [
+            "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project",
+            "lore --robot doctor",
+            "lore --robot sync"
+        ],
+        "daily_sync": [
+            "lore --robot sync"
+        ],
+        "search": [
+            "lore --robot search 'query' --mode hybrid"
+        ],
+        "pre_flight": [
+            "lore --robot health"
+        ]
+    });
+
+    let output = RobotDocsOutput {
+        ok: true,
+        data: RobotDocsData {
+            name: "lore".to_string(),
+            version,
+            description: "Local GitLab data management with semantic search".to_string(),
+            activation: RobotDocsActivation {
+                flags: vec!["--robot".to_string(), "-J".to_string(), "--json".to_string()],
+                env: "LORE_ROBOT=1".to_string(),
+                auto: "Non-TTY stdout".to_string(),
+            },
+            commands,
+            exit_codes,
+            error_format: "stderr JSON: {\"error\":{\"code\":\"...\",\"message\":\"...\",\"suggestion\":\"...\"}}".to_string(),
+            workflows,
+        },
+    };
+
+    if robot_mode {
+        println!("{}", serde_json::to_string(&output)?);
+    } else {
+        println!("{}", serde_json::to_string_pretty(&output)?);
+    }
+
+    Ok(())
+}
+
 // ============================================================================
 // Backward-compat handlers (deprecated, delegate to new handlers)
 // ============================================================================