feat: Add dry-run mode to ingest, sync, and stats commands

Enables preview of operations without making changes, useful for understanding what would happen before committing to a full sync. Ingest dry-run (--dry-run flag): - Shows resource type, sync mode (full vs incremental), project list - Per-project info: existing count, has_cursor, last_synced timestamp - No GitLab API calls, no database writes Sync dry-run (--dry-run flag): - Preview all four stages: issues ingest, MRs ingest, docs, embed - Shows which stages would run vs be skipped (--no-docs, --no-embed) - Per-project breakdown for both entity types Stats repair dry-run (--dry-run flag): - Shows what would be repaired without executing repairs - "would fix" vs "fixed" indicator in terminal output - dry_run: true field in JSON response Implementation details: - DryRunPreview struct captures project-level sync state - SyncDryRunResult aggregates previews for all sync stages - Terminal output uses yellow styling for "would" actions - JSON output includes dry_run: true at top level Flag handling: - --dry-run and --no-dry-run pair for explicit control - Defaults to false (normal operation) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 11:22:22 -05:00
parent 784fe79b80
commit ab43bbd2db
3 changed files with 409 additions and 34 deletions
--- a/src/cli/commands/ingest.rs
+++ b/src/cli/commands/ingest.rs
@@ -42,6 +42,23 @@ pub struct IngestResult {
    pub resource_events_failed: usize,
 }

+#[derive(Debug, Default, Clone, Serialize)]
+pub struct DryRunPreview {
+    pub resource_type: String,
+    pub projects: Vec<DryRunProjectPreview>,
+    pub sync_mode: String,
+}
+
+#[derive(Debug, Default, Clone, Serialize)]
+pub struct DryRunProjectPreview {
+    pub path: String,
+    pub local_id: i64,
+    pub gitlab_id: i64,
+    pub has_cursor: bool,
+    pub last_synced: Option<String>,
+    pub existing_count: i64,
+}
+
 enum ProjectIngestOutcome {
    Issues {
        path: String,
@@ -86,12 +103,14 @@ impl IngestDisplay {
    }
 }

+#[allow(clippy::too_many_arguments)]
 pub async fn run_ingest(
    config: &Config,
    resource_type: &str,
    project_filter: Option<&str>,
    force: bool,
    full: bool,
+    dry_run: bool,
    display: IngestDisplay,
    stage_bar: Option<ProgressBar>,
 ) -> Result<IngestResult> {
@@ -105,6 +124,7 @@ pub async fn run_ingest(
        project_filter,
        force,
        full,
+        dry_run,
        display,
        stage_bar,
    )
@@ -112,15 +132,107 @@ pub async fn run_ingest(
    .await
 }

+pub fn run_ingest_dry_run(
+    config: &Config,
+    resource_type: &str,
+    project_filter: Option<&str>,
+    full: bool,
+) -> Result<DryRunPreview> {
+    if resource_type != "issues" && resource_type != "mrs" {
+        return Err(LoreError::Other(format!(
+            "Invalid resource type '{}'. Valid types: issues, mrs",
+            resource_type
+        )));
+    }
+
+    let db_path = get_db_path(config.storage.db_path.as_deref());
+    let conn = create_connection(&db_path)?;
+
+    let projects = get_projects_to_sync(&conn, &config.projects, project_filter)?;
+
+    if projects.is_empty() {
+        if let Some(filter) = project_filter {
+            return Err(LoreError::Other(format!(
+                "Project '{}' not found in configuration",
+                filter
+            )));
+        }
+        return Err(LoreError::Other(
+            "No projects configured. Run 'lore init' first.".to_string(),
+        ));
+    }
+
+    let mut preview = DryRunPreview {
+        resource_type: resource_type.to_string(),
+        projects: Vec::new(),
+        sync_mode: if full {
+            "full".to_string()
+        } else {
+            "incremental".to_string()
+        },
+    };
+
+    for (local_project_id, gitlab_project_id, path) in &projects {
+        let cursor_exists: bool = conn
+            .query_row(
+                "SELECT EXISTS(SELECT 1 FROM sync_cursors WHERE project_id = ? AND resource_type = ?)",
+                (*local_project_id, resource_type),
+                |row| row.get(0),
+            )
+            .unwrap_or(false);
+
+        let last_synced: Option<String> = conn
+            .query_row(
+                "SELECT updated_at FROM sync_cursors WHERE project_id = ? AND resource_type = ?",
+                (*local_project_id, resource_type),
+                |row| row.get(0),
+            )
+            .ok();
+
+        let existing_count: i64 = if resource_type == "issues" {
+            conn.query_row(
+                "SELECT COUNT(*) FROM issues WHERE project_id = ?",
+                [*local_project_id],
+                |row| row.get(0),
+            )
+            .unwrap_or(0)
+        } else {
+            conn.query_row(
+                "SELECT COUNT(*) FROM merge_requests WHERE project_id = ?",
+                [*local_project_id],
+                |row| row.get(0),
+            )
+            .unwrap_or(0)
+        };
+
+        preview.projects.push(DryRunProjectPreview {
+            path: path.clone(),
+            local_id: *local_project_id,
+            gitlab_id: *gitlab_project_id,
+            has_cursor: cursor_exists && !full,
+            last_synced: if full { None } else { last_synced },
+            existing_count,
+        });
+    }
+
+    Ok(preview)
+}
+
+#[allow(clippy::too_many_arguments)]
 async fn run_ingest_inner(
    config: &Config,
    resource_type: &str,
    project_filter: Option<&str>,
    force: bool,
    full: bool,
+    dry_run: bool,
    display: IngestDisplay,
    stage_bar: Option<ProgressBar>,
 ) -> Result<IngestResult> {
+    // In dry_run mode, we don't actually ingest - use run_ingest_dry_run instead
+    // This flag is passed through for consistency but the actual dry-run logic
+    // is handled at the caller level
+    let _ = dry_run;
    if resource_type != "issues" && resource_type != "mrs" {
        return Err(LoreError::Other(format!(
            "Invalid resource type '{}'. Valid types: issues, mrs",
@@ -759,3 +871,63 @@ pub fn print_ingest_summary(result: &IngestResult) {
        );
    }
 }
+
+pub fn print_dry_run_preview(preview: &DryRunPreview) {
+    println!(
+        "{} {}",
+        style("Dry Run Preview").cyan().bold(),
+        style("(no changes will be made)").yellow()
+    );
+    println!();
+
+    let type_label = if preview.resource_type == "issues" {
+        "issues"
+    } else {
+        "merge requests"
+    };
+
+    println!("  Resource type: {}", style(type_label).white().bold());
+    println!(
+        "  Sync mode: {}",
+        if preview.sync_mode == "full" {
+            style("full (all data will be re-fetched)").yellow()
+        } else {
+            style("incremental (only changes since last sync)").green()
+        }
+    );
+    println!("  Projects: {}", preview.projects.len());
+    println!();
+
+    println!("{}", style("Projects to sync:").cyan().bold());
+    for project in &preview.projects {
+        let sync_status = if !project.has_cursor {
+            style("initial sync").yellow()
+        } else {
+            style("incremental").green()
+        };
+
+        println!("  {} ({})", style(&project.path).white(), sync_status);
+        println!("    Existing {}: {}", type_label, project.existing_count);
+
+        if let Some(ref last_synced) = project.last_synced {
+            println!("    Last synced: {}", last_synced);
+        }
+    }
+}
+
+#[derive(Serialize)]
+struct DryRunJsonOutput {
+    ok: bool,
+    dry_run: bool,
+    data: DryRunPreview,
+}
+
+pub fn print_dry_run_preview_json(preview: &DryRunPreview) {
+    let output = DryRunJsonOutput {
+        ok: true,
+        dry_run: true,
+        data: preview.clone(),
+    };
+
+    println!("{}", serde_json::to_string(&output).unwrap());
+}
--- a/src/cli/commands/stats.rs
+++ b/src/cli/commands/stats.rs
@@ -69,9 +69,10 @@ pub struct RepairResult {
    pub fts_rebuilt: bool,
    pub orphans_deleted: i64,
    pub stale_cleared: i64,
+    pub dry_run: bool,
 }

-pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResult> {
+pub fn run_stats(config: &Config, check: bool, repair: bool, dry_run: bool) -> Result<StatsResult> {
    let db_path = get_db_path(config.storage.db_path.as_deref());
    let conn = create_connection(&db_path)?;

@@ -220,43 +221,54 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu

        if repair {
            let mut repair_result = RepairResult::default();
+            repair_result.dry_run = dry_run;

            if integrity.fts_doc_mismatch {
-                conn.execute(
-                    "INSERT INTO documents_fts(documents_fts) VALUES('rebuild')",
-                    [],
-                )?;
+                if !dry_run {
+                    conn.execute(
+                        "INSERT INTO documents_fts(documents_fts) VALUES('rebuild')",
+                        [],
+                    )?;
+                }
                repair_result.fts_rebuilt = true;
            }

            if integrity.orphan_embeddings > 0 && table_exists(&conn, "embedding_metadata") {
-                let deleted = conn.execute(
-                    "DELETE FROM embedding_metadata
-                     WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)",
-                    [],
-                )?;
-                repair_result.orphans_deleted = deleted as i64;
-
-                if table_exists(&conn, "embeddings") {
-                    let _ = conn.execute(
-                        "DELETE FROM embeddings
-                         WHERE rowid / 1000 NOT IN (SELECT id FROM documents)",
+                if !dry_run {
+                    let deleted = conn.execute(
+                        "DELETE FROM embedding_metadata
+                         WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)",
                        [],
-                    );
+                    )?;
+                    repair_result.orphans_deleted = deleted as i64;
+
+                    if table_exists(&conn, "embeddings") {
+                        let _ = conn.execute(
+                            "DELETE FROM embeddings
+                             WHERE rowid / 1000 NOT IN (SELECT id FROM documents)",
+                            [],
+                        );
+                    }
+                } else {
+                    repair_result.orphans_deleted = integrity.orphan_embeddings;
                }
            }

            if integrity.stale_metadata > 0 && table_exists(&conn, "embedding_metadata") {
-                let cleared = conn.execute(
-                    "DELETE FROM embedding_metadata
-                     WHERE document_id IN (
-                         SELECT em.document_id FROM embedding_metadata em
-                         JOIN documents d ON d.id = em.document_id
-                         WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash
-                     )",
-                    [],
-                )?;
-                repair_result.stale_cleared = cleared as i64;
+                if !dry_run {
+                    let cleared = conn.execute(
+                        "DELETE FROM embedding_metadata
+                         WHERE document_id IN (
+                             SELECT em.document_id FROM embedding_metadata em
+                             JOIN documents d ON d.id = em.document_id
+                             WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash
+                         )",
+                        [],
+                    )?;
+                    repair_result.stale_cleared = cleared as i64;
+                } else {
+                    repair_result.stale_cleared = integrity.stale_metadata;
+                }
            }

            integrity.repair = Some(repair_result);
@@ -387,22 +399,35 @@ pub fn print_stats(result: &StatsResult) {

        if let Some(ref repair) = integrity.repair {
            println!();
-            println!("{}", style("Repair").cyan().bold());
+            if repair.dry_run {
+                println!(
+                    "{} {}",
+                    style("Repair").cyan().bold(),
+                    style("(dry run - no changes made)").yellow()
+                );
+            } else {
+                println!("{}", style("Repair").cyan().bold());
+            }
+
+            let action = if repair.dry_run {
+                style("would fix").yellow()
+            } else {
+                style("fixed").green()
+            };
+
            if repair.fts_rebuilt {
-                println!("  {} FTS index rebuilt", style("fixed").green());
+                println!("  {} FTS index rebuilt", action);
            }
            if repair.orphans_deleted > 0 {
                println!(
                    "  {} {} orphan embeddings deleted",
-                    style("fixed").green(),
-                    repair.orphans_deleted
+                    action, repair.orphans_deleted
                );
            }
            if repair.stale_cleared > 0 {
                println!(
                    "  {} {} stale metadata entries cleared",
-                    style("fixed").green(),
-                    repair.stale_cleared
+                    action, repair.stale_cleared
                );
            }
            if !repair.fts_rebuilt && repair.orphans_deleted == 0 && repair.stale_cleared == 0 {
@@ -442,6 +467,7 @@ pub fn print_stats_json(result: &StatsResult) {
                    fts_rebuilt: r.fts_rebuilt,
                    orphans_deleted: r.orphans_deleted,
                    stale_cleared: r.stale_cleared,
+                    dry_run: r.dry_run,
                }),
            }),
        },
--- a/src/cli/commands/sync.rs
+++ b/src/cli/commands/sync.rs
@@ -12,7 +12,7 @@ use crate::core::metrics::{MetricsLayer, StageTiming};

 use super::embed::run_embed;
 use super::generate_docs::run_generate_docs;
-use super::ingest::{IngestDisplay, run_ingest};
+use super::ingest::{DryRunPreview, IngestDisplay, run_ingest, run_ingest_dry_run};

 #[derive(Debug, Default)]
 pub struct SyncOptions {
@@ -22,6 +22,7 @@ pub struct SyncOptions {
    pub no_docs: bool,
    pub no_events: bool,
    pub robot_mode: bool,
+    pub dry_run: bool,
 }

 #[derive(Debug, Default, Serialize)]
@@ -74,6 +75,11 @@ pub async fn run_sync(
            ..SyncResult::default()
        };

+        // Handle dry_run mode - show preview without making any changes
+        if options.dry_run {
+            return run_sync_dry_run(config, &options).await;
+        }
+
        let ingest_display = if options.robot_mode {
            IngestDisplay::silent()
        } else {
@@ -103,6 +109,7 @@ pub async fn run_sync(
            None,
            options.force,
            options.full,
+            false, // dry_run - sync has its own dry_run handling
            ingest_display,
            Some(spinner.clone()),
        )
@@ -127,6 +134,7 @@ pub async fn run_sync(
            None,
            options.force,
            options.full,
+            false, // dry_run - sync has its own dry_run handling
            ingest_display,
            Some(spinner.clone()),
        )
@@ -369,3 +377,172 @@ pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64, metrics: Option<&Me
    };
    println!("{}", serde_json::to_string(&output).unwrap());
 }
+
+#[derive(Debug, Default, Serialize)]
+pub struct SyncDryRunResult {
+    pub issues_preview: DryRunPreview,
+    pub mrs_preview: DryRunPreview,
+    pub would_generate_docs: bool,
+    pub would_embed: bool,
+}
+
+async fn run_sync_dry_run(config: &Config, options: &SyncOptions) -> Result<SyncResult> {
+    // Get dry run previews for both issues and MRs
+    let issues_preview = run_ingest_dry_run(config, "issues", None, options.full)?;
+    let mrs_preview = run_ingest_dry_run(config, "mrs", None, options.full)?;
+
+    let dry_result = SyncDryRunResult {
+        issues_preview,
+        mrs_preview,
+        would_generate_docs: !options.no_docs,
+        would_embed: !options.no_embed,
+    };
+
+    if options.robot_mode {
+        print_sync_dry_run_json(&dry_result);
+    } else {
+        print_sync_dry_run(&dry_result);
+    }
+
+    // Return an empty SyncResult since this is just a preview
+    Ok(SyncResult::default())
+}
+
+pub fn print_sync_dry_run(result: &SyncDryRunResult) {
+    println!(
+        "{} {}",
+        style("Sync Dry Run Preview").cyan().bold(),
+        style("(no changes will be made)").yellow()
+    );
+    println!();
+
+    println!("{}", style("Stage 1: Issues Ingestion").white().bold());
+    println!(
+        "  Sync mode: {}",
+        if result.issues_preview.sync_mode == "full" {
+            style("full").yellow()
+        } else {
+            style("incremental").green()
+        }
+    );
+    println!("  Projects: {}", result.issues_preview.projects.len());
+    for project in &result.issues_preview.projects {
+        let sync_status = if !project.has_cursor {
+            style("initial sync").yellow()
+        } else {
+            style("incremental").green()
+        };
+        println!(
+            "    {} ({}) - {} existing",
+            &project.path, sync_status, project.existing_count
+        );
+    }
+    println!();
+
+    println!(
+        "{}",
+        style("Stage 2: Merge Requests Ingestion").white().bold()
+    );
+    println!(
+        "  Sync mode: {}",
+        if result.mrs_preview.sync_mode == "full" {
+            style("full").yellow()
+        } else {
+            style("incremental").green()
+        }
+    );
+    println!("  Projects: {}", result.mrs_preview.projects.len());
+    for project in &result.mrs_preview.projects {
+        let sync_status = if !project.has_cursor {
+            style("initial sync").yellow()
+        } else {
+            style("incremental").green()
+        };
+        println!(
+            "    {} ({}) - {} existing",
+            &project.path, sync_status, project.existing_count
+        );
+    }
+    println!();
+
+    if result.would_generate_docs {
+        println!(
+            "{} {}",
+            style("Stage 3: Document Generation").white().bold(),
+            style("(would run)").green()
+        );
+    } else {
+        println!(
+            "{} {}",
+            style("Stage 3: Document Generation").white().bold(),
+            style("(skipped)").dim()
+        );
+    }
+
+    if result.would_embed {
+        println!(
+            "{} {}",
+            style("Stage 4: Embedding").white().bold(),
+            style("(would run)").green()
+        );
+    } else {
+        println!(
+            "{} {}",
+            style("Stage 4: Embedding").white().bold(),
+            style("(skipped)").dim()
+        );
+    }
+}
+
+#[derive(Serialize)]
+struct SyncDryRunJsonOutput {
+    ok: bool,
+    dry_run: bool,
+    data: SyncDryRunJsonData,
+}
+
+#[derive(Serialize)]
+struct SyncDryRunJsonData {
+    stages: Vec<SyncDryRunStage>,
+}
+
+#[derive(Serialize)]
+struct SyncDryRunStage {
+    name: String,
+    would_run: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    preview: Option<DryRunPreview>,
+}
+
+pub fn print_sync_dry_run_json(result: &SyncDryRunResult) {
+    let output = SyncDryRunJsonOutput {
+        ok: true,
+        dry_run: true,
+        data: SyncDryRunJsonData {
+            stages: vec![
+                SyncDryRunStage {
+                    name: "ingest_issues".to_string(),
+                    would_run: true,
+                    preview: Some(result.issues_preview.clone()),
+                },
+                SyncDryRunStage {
+                    name: "ingest_mrs".to_string(),
+                    would_run: true,
+                    preview: Some(result.mrs_preview.clone()),
+                },
+                SyncDryRunStage {
+                    name: "generate_docs".to_string(),
+                    would_run: result.would_generate_docs,
+                    preview: None,
+                },
+                SyncDryRunStage {
+                    name: "embed".to_string(),
+                    would_run: result.would_embed,
+                    preview: None,
+                },
+            ],
+        },
+    };
+
+    println!("{}", serde_json::to_string(&output).unwrap());
+}