From ab43bbd2db287e5f6f28adf9303ec5e43e6e980b Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Thu, 5 Feb 2026 11:22:22 -0500 Subject: [PATCH] feat: Add dry-run mode to ingest, sync, and stats commands Enables preview of operations without making changes, useful for understanding what would happen before committing to a full sync. Ingest dry-run (--dry-run flag): - Shows resource type, sync mode (full vs incremental), project list - Per-project info: existing count, has_cursor, last_synced timestamp - No GitLab API calls, no database writes Sync dry-run (--dry-run flag): - Preview all four stages: issues ingest, MRs ingest, docs, embed - Shows which stages would run vs be skipped (--no-docs, --no-embed) - Per-project breakdown for both entity types Stats repair dry-run (--dry-run flag): - Shows what would be repaired without executing repairs - "would fix" vs "fixed" indicator in terminal output - dry_run: true field in JSON response Implementation details: - DryRunPreview struct captures project-level sync state - SyncDryRunResult aggregates previews for all sync stages - Terminal output uses yellow styling for "would" actions - JSON output includes dry_run: true at top level Flag handling: - --dry-run and --no-dry-run pair for explicit control - Defaults to false (normal operation) Co-Authored-By: Claude Opus 4.5 --- src/cli/commands/ingest.rs | 172 +++++++++++++++++++++++++++++++++++ src/cli/commands/stats.rs | 92 ++++++++++++------- src/cli/commands/sync.rs | 179 ++++++++++++++++++++++++++++++++++++- 3 files changed, 409 insertions(+), 34 deletions(-) diff --git a/src/cli/commands/ingest.rs b/src/cli/commands/ingest.rs index 1fcd11b..da70f0d 100644 --- a/src/cli/commands/ingest.rs +++ b/src/cli/commands/ingest.rs @@ -42,6 +42,23 @@ pub struct IngestResult { pub resource_events_failed: usize, } +#[derive(Debug, Default, Clone, Serialize)] +pub struct DryRunPreview { + pub resource_type: String, + pub projects: Vec, + pub sync_mode: String, +} + +#[derive(Debug, Default, Clone, Serialize)] +pub struct DryRunProjectPreview { + pub path: String, + pub local_id: i64, + pub gitlab_id: i64, + pub has_cursor: bool, + pub last_synced: Option, + pub existing_count: i64, +} + enum ProjectIngestOutcome { Issues { path: String, @@ -86,12 +103,14 @@ impl IngestDisplay { } } +#[allow(clippy::too_many_arguments)] pub async fn run_ingest( config: &Config, resource_type: &str, project_filter: Option<&str>, force: bool, full: bool, + dry_run: bool, display: IngestDisplay, stage_bar: Option, ) -> Result { @@ -105,6 +124,7 @@ pub async fn run_ingest( project_filter, force, full, + dry_run, display, stage_bar, ) @@ -112,15 +132,107 @@ pub async fn run_ingest( .await } +pub fn run_ingest_dry_run( + config: &Config, + resource_type: &str, + project_filter: Option<&str>, + full: bool, +) -> Result { + if resource_type != "issues" && resource_type != "mrs" { + return Err(LoreError::Other(format!( + "Invalid resource type '{}'. Valid types: issues, mrs", + resource_type + ))); + } + + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + + let projects = get_projects_to_sync(&conn, &config.projects, project_filter)?; + + if projects.is_empty() { + if let Some(filter) = project_filter { + return Err(LoreError::Other(format!( + "Project '{}' not found in configuration", + filter + ))); + } + return Err(LoreError::Other( + "No projects configured. Run 'lore init' first.".to_string(), + )); + } + + let mut preview = DryRunPreview { + resource_type: resource_type.to_string(), + projects: Vec::new(), + sync_mode: if full { + "full".to_string() + } else { + "incremental".to_string() + }, + }; + + for (local_project_id, gitlab_project_id, path) in &projects { + let cursor_exists: bool = conn + .query_row( + "SELECT EXISTS(SELECT 1 FROM sync_cursors WHERE project_id = ? AND resource_type = ?)", + (*local_project_id, resource_type), + |row| row.get(0), + ) + .unwrap_or(false); + + let last_synced: Option = conn + .query_row( + "SELECT updated_at FROM sync_cursors WHERE project_id = ? AND resource_type = ?", + (*local_project_id, resource_type), + |row| row.get(0), + ) + .ok(); + + let existing_count: i64 = if resource_type == "issues" { + conn.query_row( + "SELECT COUNT(*) FROM issues WHERE project_id = ?", + [*local_project_id], + |row| row.get(0), + ) + .unwrap_or(0) + } else { + conn.query_row( + "SELECT COUNT(*) FROM merge_requests WHERE project_id = ?", + [*local_project_id], + |row| row.get(0), + ) + .unwrap_or(0) + }; + + preview.projects.push(DryRunProjectPreview { + path: path.clone(), + local_id: *local_project_id, + gitlab_id: *gitlab_project_id, + has_cursor: cursor_exists && !full, + last_synced: if full { None } else { last_synced }, + existing_count, + }); + } + + Ok(preview) +} + +#[allow(clippy::too_many_arguments)] async fn run_ingest_inner( config: &Config, resource_type: &str, project_filter: Option<&str>, force: bool, full: bool, + dry_run: bool, display: IngestDisplay, stage_bar: Option, ) -> Result { + // In dry_run mode, we don't actually ingest - use run_ingest_dry_run instead + // This flag is passed through for consistency but the actual dry-run logic + // is handled at the caller level + let _ = dry_run; if resource_type != "issues" && resource_type != "mrs" { return Err(LoreError::Other(format!( "Invalid resource type '{}'. Valid types: issues, mrs", @@ -759,3 +871,63 @@ pub fn print_ingest_summary(result: &IngestResult) { ); } } + +pub fn print_dry_run_preview(preview: &DryRunPreview) { + println!( + "{} {}", + style("Dry Run Preview").cyan().bold(), + style("(no changes will be made)").yellow() + ); + println!(); + + let type_label = if preview.resource_type == "issues" { + "issues" + } else { + "merge requests" + }; + + println!(" Resource type: {}", style(type_label).white().bold()); + println!( + " Sync mode: {}", + if preview.sync_mode == "full" { + style("full (all data will be re-fetched)").yellow() + } else { + style("incremental (only changes since last sync)").green() + } + ); + println!(" Projects: {}", preview.projects.len()); + println!(); + + println!("{}", style("Projects to sync:").cyan().bold()); + for project in &preview.projects { + let sync_status = if !project.has_cursor { + style("initial sync").yellow() + } else { + style("incremental").green() + }; + + println!(" {} ({})", style(&project.path).white(), sync_status); + println!(" Existing {}: {}", type_label, project.existing_count); + + if let Some(ref last_synced) = project.last_synced { + println!(" Last synced: {}", last_synced); + } + } +} + +#[derive(Serialize)] +struct DryRunJsonOutput { + ok: bool, + dry_run: bool, + data: DryRunPreview, +} + +pub fn print_dry_run_preview_json(preview: &DryRunPreview) { + let output = DryRunJsonOutput { + ok: true, + dry_run: true, + data: preview.clone(), + }; + + println!("{}", serde_json::to_string(&output).unwrap()); +} diff --git a/src/cli/commands/stats.rs b/src/cli/commands/stats.rs index 38ec6c1..2c44b80 100644 --- a/src/cli/commands/stats.rs +++ b/src/cli/commands/stats.rs @@ -69,9 +69,10 @@ pub struct RepairResult { pub fts_rebuilt: bool, pub orphans_deleted: i64, pub stale_cleared: i64, + pub dry_run: bool, } -pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result { +pub fn run_stats(config: &Config, check: bool, repair: bool, dry_run: bool) -> Result { let db_path = get_db_path(config.storage.db_path.as_deref()); let conn = create_connection(&db_path)?; @@ -220,43 +221,54 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result 0 && table_exists(&conn, "embedding_metadata") { - let deleted = conn.execute( - "DELETE FROM embedding_metadata - WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)", - [], - )?; - repair_result.orphans_deleted = deleted as i64; - - if table_exists(&conn, "embeddings") { - let _ = conn.execute( - "DELETE FROM embeddings - WHERE rowid / 1000 NOT IN (SELECT id FROM documents)", + if !dry_run { + let deleted = conn.execute( + "DELETE FROM embedding_metadata + WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)", [], - ); + )?; + repair_result.orphans_deleted = deleted as i64; + + if table_exists(&conn, "embeddings") { + let _ = conn.execute( + "DELETE FROM embeddings + WHERE rowid / 1000 NOT IN (SELECT id FROM documents)", + [], + ); + } + } else { + repair_result.orphans_deleted = integrity.orphan_embeddings; } } if integrity.stale_metadata > 0 && table_exists(&conn, "embedding_metadata") { - let cleared = conn.execute( - "DELETE FROM embedding_metadata - WHERE document_id IN ( - SELECT em.document_id FROM embedding_metadata em - JOIN documents d ON d.id = em.document_id - WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash - )", - [], - )?; - repair_result.stale_cleared = cleared as i64; + if !dry_run { + let cleared = conn.execute( + "DELETE FROM embedding_metadata + WHERE document_id IN ( + SELECT em.document_id FROM embedding_metadata em + JOIN documents d ON d.id = em.document_id + WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash + )", + [], + )?; + repair_result.stale_cleared = cleared as i64; + } else { + repair_result.stale_cleared = integrity.stale_metadata; + } } integrity.repair = Some(repair_result); @@ -387,22 +399,35 @@ pub fn print_stats(result: &StatsResult) { if let Some(ref repair) = integrity.repair { println!(); - println!("{}", style("Repair").cyan().bold()); + if repair.dry_run { + println!( + "{} {}", + style("Repair").cyan().bold(), + style("(dry run - no changes made)").yellow() + ); + } else { + println!("{}", style("Repair").cyan().bold()); + } + + let action = if repair.dry_run { + style("would fix").yellow() + } else { + style("fixed").green() + }; + if repair.fts_rebuilt { - println!(" {} FTS index rebuilt", style("fixed").green()); + println!(" {} FTS index rebuilt", action); } if repair.orphans_deleted > 0 { println!( " {} {} orphan embeddings deleted", - style("fixed").green(), - repair.orphans_deleted + action, repair.orphans_deleted ); } if repair.stale_cleared > 0 { println!( " {} {} stale metadata entries cleared", - style("fixed").green(), - repair.stale_cleared + action, repair.stale_cleared ); } if !repair.fts_rebuilt && repair.orphans_deleted == 0 && repair.stale_cleared == 0 { @@ -442,6 +467,7 @@ pub fn print_stats_json(result: &StatsResult) { fts_rebuilt: r.fts_rebuilt, orphans_deleted: r.orphans_deleted, stale_cleared: r.stale_cleared, + dry_run: r.dry_run, }), }), }, diff --git a/src/cli/commands/sync.rs b/src/cli/commands/sync.rs index 4c40380..9a852e6 100644 --- a/src/cli/commands/sync.rs +++ b/src/cli/commands/sync.rs @@ -12,7 +12,7 @@ use crate::core::metrics::{MetricsLayer, StageTiming}; use super::embed::run_embed; use super::generate_docs::run_generate_docs; -use super::ingest::{IngestDisplay, run_ingest}; +use super::ingest::{DryRunPreview, IngestDisplay, run_ingest, run_ingest_dry_run}; #[derive(Debug, Default)] pub struct SyncOptions { @@ -22,6 +22,7 @@ pub struct SyncOptions { pub no_docs: bool, pub no_events: bool, pub robot_mode: bool, + pub dry_run: bool, } #[derive(Debug, Default, Serialize)] @@ -74,6 +75,11 @@ pub async fn run_sync( ..SyncResult::default() }; + // Handle dry_run mode - show preview without making any changes + if options.dry_run { + return run_sync_dry_run(config, &options).await; + } + let ingest_display = if options.robot_mode { IngestDisplay::silent() } else { @@ -103,6 +109,7 @@ pub async fn run_sync( None, options.force, options.full, + false, // dry_run - sync has its own dry_run handling ingest_display, Some(spinner.clone()), ) @@ -127,6 +134,7 @@ pub async fn run_sync( None, options.force, options.full, + false, // dry_run - sync has its own dry_run handling ingest_display, Some(spinner.clone()), ) @@ -369,3 +377,172 @@ pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64, metrics: Option<&Me }; println!("{}", serde_json::to_string(&output).unwrap()); } + +#[derive(Debug, Default, Serialize)] +pub struct SyncDryRunResult { + pub issues_preview: DryRunPreview, + pub mrs_preview: DryRunPreview, + pub would_generate_docs: bool, + pub would_embed: bool, +} + +async fn run_sync_dry_run(config: &Config, options: &SyncOptions) -> Result { + // Get dry run previews for both issues and MRs + let issues_preview = run_ingest_dry_run(config, "issues", None, options.full)?; + let mrs_preview = run_ingest_dry_run(config, "mrs", None, options.full)?; + + let dry_result = SyncDryRunResult { + issues_preview, + mrs_preview, + would_generate_docs: !options.no_docs, + would_embed: !options.no_embed, + }; + + if options.robot_mode { + print_sync_dry_run_json(&dry_result); + } else { + print_sync_dry_run(&dry_result); + } + + // Return an empty SyncResult since this is just a preview + Ok(SyncResult::default()) +} + +pub fn print_sync_dry_run(result: &SyncDryRunResult) { + println!( + "{} {}", + style("Sync Dry Run Preview").cyan().bold(), + style("(no changes will be made)").yellow() + ); + println!(); + + println!("{}", style("Stage 1: Issues Ingestion").white().bold()); + println!( + " Sync mode: {}", + if result.issues_preview.sync_mode == "full" { + style("full").yellow() + } else { + style("incremental").green() + } + ); + println!(" Projects: {}", result.issues_preview.projects.len()); + for project in &result.issues_preview.projects { + let sync_status = if !project.has_cursor { + style("initial sync").yellow() + } else { + style("incremental").green() + }; + println!( + " {} ({}) - {} existing", + &project.path, sync_status, project.existing_count + ); + } + println!(); + + println!( + "{}", + style("Stage 2: Merge Requests Ingestion").white().bold() + ); + println!( + " Sync mode: {}", + if result.mrs_preview.sync_mode == "full" { + style("full").yellow() + } else { + style("incremental").green() + } + ); + println!(" Projects: {}", result.mrs_preview.projects.len()); + for project in &result.mrs_preview.projects { + let sync_status = if !project.has_cursor { + style("initial sync").yellow() + } else { + style("incremental").green() + }; + println!( + " {} ({}) - {} existing", + &project.path, sync_status, project.existing_count + ); + } + println!(); + + if result.would_generate_docs { + println!( + "{} {}", + style("Stage 3: Document Generation").white().bold(), + style("(would run)").green() + ); + } else { + println!( + "{} {}", + style("Stage 3: Document Generation").white().bold(), + style("(skipped)").dim() + ); + } + + if result.would_embed { + println!( + "{} {}", + style("Stage 4: Embedding").white().bold(), + style("(would run)").green() + ); + } else { + println!( + "{} {}", + style("Stage 4: Embedding").white().bold(), + style("(skipped)").dim() + ); + } +} + +#[derive(Serialize)] +struct SyncDryRunJsonOutput { + ok: bool, + dry_run: bool, + data: SyncDryRunJsonData, +} + +#[derive(Serialize)] +struct SyncDryRunJsonData { + stages: Vec, +} + +#[derive(Serialize)] +struct SyncDryRunStage { + name: String, + would_run: bool, + #[serde(skip_serializing_if = "Option::is_none")] + preview: Option, +} + +pub fn print_sync_dry_run_json(result: &SyncDryRunResult) { + let output = SyncDryRunJsonOutput { + ok: true, + dry_run: true, + data: SyncDryRunJsonData { + stages: vec![ + SyncDryRunStage { + name: "ingest_issues".to_string(), + would_run: true, + preview: Some(result.issues_preview.clone()), + }, + SyncDryRunStage { + name: "ingest_mrs".to_string(), + would_run: true, + preview: Some(result.mrs_preview.clone()), + }, + SyncDryRunStage { + name: "generate_docs".to_string(), + would_run: result.would_generate_docs, + preview: None, + }, + SyncDryRunStage { + name: "embed".to_string(), + would_run: result.would_embed, + preview: None, + }, + ], + }, + }; + + println!("{}", serde_json::to_string(&output).unwrap()); +}