feat: Add dry-run mode to ingest, sync, and stats commands

Enables preview of operations without making changes, useful for
understanding what would happen before committing to a full sync.

Ingest dry-run (--dry-run flag):
- Shows resource type, sync mode (full vs incremental), project list
- Per-project info: existing count, has_cursor, last_synced timestamp
- No GitLab API calls, no database writes

Sync dry-run (--dry-run flag):
- Preview all four stages: issues ingest, MRs ingest, docs, embed
- Shows which stages would run vs be skipped (--no-docs, --no-embed)
- Per-project breakdown for both entity types

Stats repair dry-run (--dry-run flag):
- Shows what would be repaired without executing repairs
- "would fix" vs "fixed" indicator in terminal output
- dry_run: true field in JSON response

Implementation details:
- DryRunPreview struct captures project-level sync state
- SyncDryRunResult aggregates previews for all sync stages
- Terminal output uses yellow styling for "would" actions
- JSON output includes dry_run: true at top level

Flag handling:
- --dry-run and --no-dry-run pair for explicit control
- Defaults to false (normal operation)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 11:22:22 -05:00
parent 784fe79b80
commit ab43bbd2db
3 changed files with 409 additions and 34 deletions

View File

@@ -42,6 +42,23 @@ pub struct IngestResult {
pub resource_events_failed: usize,
}
#[derive(Debug, Default, Clone, Serialize)]
pub struct DryRunPreview {
pub resource_type: String,
pub projects: Vec<DryRunProjectPreview>,
pub sync_mode: String,
}
#[derive(Debug, Default, Clone, Serialize)]
pub struct DryRunProjectPreview {
pub path: String,
pub local_id: i64,
pub gitlab_id: i64,
pub has_cursor: bool,
pub last_synced: Option<String>,
pub existing_count: i64,
}
enum ProjectIngestOutcome {
Issues {
path: String,
@@ -86,12 +103,14 @@ impl IngestDisplay {
}
}
#[allow(clippy::too_many_arguments)]
pub async fn run_ingest(
config: &Config,
resource_type: &str,
project_filter: Option<&str>,
force: bool,
full: bool,
dry_run: bool,
display: IngestDisplay,
stage_bar: Option<ProgressBar>,
) -> Result<IngestResult> {
@@ -105,6 +124,7 @@ pub async fn run_ingest(
project_filter,
force,
full,
dry_run,
display,
stage_bar,
)
@@ -112,15 +132,107 @@ pub async fn run_ingest(
.await
}
pub fn run_ingest_dry_run(
config: &Config,
resource_type: &str,
project_filter: Option<&str>,
full: bool,
) -> Result<DryRunPreview> {
if resource_type != "issues" && resource_type != "mrs" {
return Err(LoreError::Other(format!(
"Invalid resource type '{}'. Valid types: issues, mrs",
resource_type
)));
}
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let projects = get_projects_to_sync(&conn, &config.projects, project_filter)?;
if projects.is_empty() {
if let Some(filter) = project_filter {
return Err(LoreError::Other(format!(
"Project '{}' not found in configuration",
filter
)));
}
return Err(LoreError::Other(
"No projects configured. Run 'lore init' first.".to_string(),
));
}
let mut preview = DryRunPreview {
resource_type: resource_type.to_string(),
projects: Vec::new(),
sync_mode: if full {
"full".to_string()
} else {
"incremental".to_string()
},
};
for (local_project_id, gitlab_project_id, path) in &projects {
let cursor_exists: bool = conn
.query_row(
"SELECT EXISTS(SELECT 1 FROM sync_cursors WHERE project_id = ? AND resource_type = ?)",
(*local_project_id, resource_type),
|row| row.get(0),
)
.unwrap_or(false);
let last_synced: Option<String> = conn
.query_row(
"SELECT updated_at FROM sync_cursors WHERE project_id = ? AND resource_type = ?",
(*local_project_id, resource_type),
|row| row.get(0),
)
.ok();
let existing_count: i64 = if resource_type == "issues" {
conn.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = ?",
[*local_project_id],
|row| row.get(0),
)
.unwrap_or(0)
} else {
conn.query_row(
"SELECT COUNT(*) FROM merge_requests WHERE project_id = ?",
[*local_project_id],
|row| row.get(0),
)
.unwrap_or(0)
};
preview.projects.push(DryRunProjectPreview {
path: path.clone(),
local_id: *local_project_id,
gitlab_id: *gitlab_project_id,
has_cursor: cursor_exists && !full,
last_synced: if full { None } else { last_synced },
existing_count,
});
}
Ok(preview)
}
#[allow(clippy::too_many_arguments)]
async fn run_ingest_inner(
config: &Config,
resource_type: &str,
project_filter: Option<&str>,
force: bool,
full: bool,
dry_run: bool,
display: IngestDisplay,
stage_bar: Option<ProgressBar>,
) -> Result<IngestResult> {
// In dry_run mode, we don't actually ingest - use run_ingest_dry_run instead
// This flag is passed through for consistency but the actual dry-run logic
// is handled at the caller level
let _ = dry_run;
if resource_type != "issues" && resource_type != "mrs" {
return Err(LoreError::Other(format!(
"Invalid resource type '{}'. Valid types: issues, mrs",
@@ -759,3 +871,63 @@ pub fn print_ingest_summary(result: &IngestResult) {
);
}
}
pub fn print_dry_run_preview(preview: &DryRunPreview) {
println!(
"{} {}",
style("Dry Run Preview").cyan().bold(),
style("(no changes will be made)").yellow()
);
println!();
let type_label = if preview.resource_type == "issues" {
"issues"
} else {
"merge requests"
};
println!(" Resource type: {}", style(type_label).white().bold());
println!(
" Sync mode: {}",
if preview.sync_mode == "full" {
style("full (all data will be re-fetched)").yellow()
} else {
style("incremental (only changes since last sync)").green()
}
);
println!(" Projects: {}", preview.projects.len());
println!();
println!("{}", style("Projects to sync:").cyan().bold());
for project in &preview.projects {
let sync_status = if !project.has_cursor {
style("initial sync").yellow()
} else {
style("incremental").green()
};
println!(" {} ({})", style(&project.path).white(), sync_status);
println!(" Existing {}: {}", type_label, project.existing_count);
if let Some(ref last_synced) = project.last_synced {
println!(" Last synced: {}", last_synced);
}
}
}
#[derive(Serialize)]
struct DryRunJsonOutput {
ok: bool,
dry_run: bool,
data: DryRunPreview,
}
pub fn print_dry_run_preview_json(preview: &DryRunPreview) {
let output = DryRunJsonOutput {
ok: true,
dry_run: true,
data: preview.clone(),
};
println!("{}", serde_json::to_string(&output).unwrap());
}

View File

@@ -69,9 +69,10 @@ pub struct RepairResult {
pub fts_rebuilt: bool,
pub orphans_deleted: i64,
pub stale_cleared: i64,
pub dry_run: bool,
}
pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResult> {
pub fn run_stats(config: &Config, check: bool, repair: bool, dry_run: bool) -> Result<StatsResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
@@ -220,43 +221,54 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
if repair {
let mut repair_result = RepairResult::default();
repair_result.dry_run = dry_run;
if integrity.fts_doc_mismatch {
conn.execute(
"INSERT INTO documents_fts(documents_fts) VALUES('rebuild')",
[],
)?;
if !dry_run {
conn.execute(
"INSERT INTO documents_fts(documents_fts) VALUES('rebuild')",
[],
)?;
}
repair_result.fts_rebuilt = true;
}
if integrity.orphan_embeddings > 0 && table_exists(&conn, "embedding_metadata") {
let deleted = conn.execute(
"DELETE FROM embedding_metadata
WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)",
[],
)?;
repair_result.orphans_deleted = deleted as i64;
if table_exists(&conn, "embeddings") {
let _ = conn.execute(
"DELETE FROM embeddings
WHERE rowid / 1000 NOT IN (SELECT id FROM documents)",
if !dry_run {
let deleted = conn.execute(
"DELETE FROM embedding_metadata
WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = embedding_metadata.document_id)",
[],
);
)?;
repair_result.orphans_deleted = deleted as i64;
if table_exists(&conn, "embeddings") {
let _ = conn.execute(
"DELETE FROM embeddings
WHERE rowid / 1000 NOT IN (SELECT id FROM documents)",
[],
);
}
} else {
repair_result.orphans_deleted = integrity.orphan_embeddings;
}
}
if integrity.stale_metadata > 0 && table_exists(&conn, "embedding_metadata") {
let cleared = conn.execute(
"DELETE FROM embedding_metadata
WHERE document_id IN (
SELECT em.document_id FROM embedding_metadata em
JOIN documents d ON d.id = em.document_id
WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash
)",
[],
)?;
repair_result.stale_cleared = cleared as i64;
if !dry_run {
let cleared = conn.execute(
"DELETE FROM embedding_metadata
WHERE document_id IN (
SELECT em.document_id FROM embedding_metadata em
JOIN documents d ON d.id = em.document_id
WHERE em.chunk_index = 0 AND em.document_hash != d.content_hash
)",
[],
)?;
repair_result.stale_cleared = cleared as i64;
} else {
repair_result.stale_cleared = integrity.stale_metadata;
}
}
integrity.repair = Some(repair_result);
@@ -387,22 +399,35 @@ pub fn print_stats(result: &StatsResult) {
if let Some(ref repair) = integrity.repair {
println!();
println!("{}", style("Repair").cyan().bold());
if repair.dry_run {
println!(
"{} {}",
style("Repair").cyan().bold(),
style("(dry run - no changes made)").yellow()
);
} else {
println!("{}", style("Repair").cyan().bold());
}
let action = if repair.dry_run {
style("would fix").yellow()
} else {
style("fixed").green()
};
if repair.fts_rebuilt {
println!(" {} FTS index rebuilt", style("fixed").green());
println!(" {} FTS index rebuilt", action);
}
if repair.orphans_deleted > 0 {
println!(
" {} {} orphan embeddings deleted",
style("fixed").green(),
repair.orphans_deleted
action, repair.orphans_deleted
);
}
if repair.stale_cleared > 0 {
println!(
" {} {} stale metadata entries cleared",
style("fixed").green(),
repair.stale_cleared
action, repair.stale_cleared
);
}
if !repair.fts_rebuilt && repair.orphans_deleted == 0 && repair.stale_cleared == 0 {
@@ -442,6 +467,7 @@ pub fn print_stats_json(result: &StatsResult) {
fts_rebuilt: r.fts_rebuilt,
orphans_deleted: r.orphans_deleted,
stale_cleared: r.stale_cleared,
dry_run: r.dry_run,
}),
}),
},

View File

@@ -12,7 +12,7 @@ use crate::core::metrics::{MetricsLayer, StageTiming};
use super::embed::run_embed;
use super::generate_docs::run_generate_docs;
use super::ingest::{IngestDisplay, run_ingest};
use super::ingest::{DryRunPreview, IngestDisplay, run_ingest, run_ingest_dry_run};
#[derive(Debug, Default)]
pub struct SyncOptions {
@@ -22,6 +22,7 @@ pub struct SyncOptions {
pub no_docs: bool,
pub no_events: bool,
pub robot_mode: bool,
pub dry_run: bool,
}
#[derive(Debug, Default, Serialize)]
@@ -74,6 +75,11 @@ pub async fn run_sync(
..SyncResult::default()
};
// Handle dry_run mode - show preview without making any changes
if options.dry_run {
return run_sync_dry_run(config, &options).await;
}
let ingest_display = if options.robot_mode {
IngestDisplay::silent()
} else {
@@ -103,6 +109,7 @@ pub async fn run_sync(
None,
options.force,
options.full,
false, // dry_run - sync has its own dry_run handling
ingest_display,
Some(spinner.clone()),
)
@@ -127,6 +134,7 @@ pub async fn run_sync(
None,
options.force,
options.full,
false, // dry_run - sync has its own dry_run handling
ingest_display,
Some(spinner.clone()),
)
@@ -369,3 +377,172 @@ pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64, metrics: Option<&Me
};
println!("{}", serde_json::to_string(&output).unwrap());
}
#[derive(Debug, Default, Serialize)]
pub struct SyncDryRunResult {
pub issues_preview: DryRunPreview,
pub mrs_preview: DryRunPreview,
pub would_generate_docs: bool,
pub would_embed: bool,
}
async fn run_sync_dry_run(config: &Config, options: &SyncOptions) -> Result<SyncResult> {
// Get dry run previews for both issues and MRs
let issues_preview = run_ingest_dry_run(config, "issues", None, options.full)?;
let mrs_preview = run_ingest_dry_run(config, "mrs", None, options.full)?;
let dry_result = SyncDryRunResult {
issues_preview,
mrs_preview,
would_generate_docs: !options.no_docs,
would_embed: !options.no_embed,
};
if options.robot_mode {
print_sync_dry_run_json(&dry_result);
} else {
print_sync_dry_run(&dry_result);
}
// Return an empty SyncResult since this is just a preview
Ok(SyncResult::default())
}
pub fn print_sync_dry_run(result: &SyncDryRunResult) {
println!(
"{} {}",
style("Sync Dry Run Preview").cyan().bold(),
style("(no changes will be made)").yellow()
);
println!();
println!("{}", style("Stage 1: Issues Ingestion").white().bold());
println!(
" Sync mode: {}",
if result.issues_preview.sync_mode == "full" {
style("full").yellow()
} else {
style("incremental").green()
}
);
println!(" Projects: {}", result.issues_preview.projects.len());
for project in &result.issues_preview.projects {
let sync_status = if !project.has_cursor {
style("initial sync").yellow()
} else {
style("incremental").green()
};
println!(
" {} ({}) - {} existing",
&project.path, sync_status, project.existing_count
);
}
println!();
println!(
"{}",
style("Stage 2: Merge Requests Ingestion").white().bold()
);
println!(
" Sync mode: {}",
if result.mrs_preview.sync_mode == "full" {
style("full").yellow()
} else {
style("incremental").green()
}
);
println!(" Projects: {}", result.mrs_preview.projects.len());
for project in &result.mrs_preview.projects {
let sync_status = if !project.has_cursor {
style("initial sync").yellow()
} else {
style("incremental").green()
};
println!(
" {} ({}) - {} existing",
&project.path, sync_status, project.existing_count
);
}
println!();
if result.would_generate_docs {
println!(
"{} {}",
style("Stage 3: Document Generation").white().bold(),
style("(would run)").green()
);
} else {
println!(
"{} {}",
style("Stage 3: Document Generation").white().bold(),
style("(skipped)").dim()
);
}
if result.would_embed {
println!(
"{} {}",
style("Stage 4: Embedding").white().bold(),
style("(would run)").green()
);
} else {
println!(
"{} {}",
style("Stage 4: Embedding").white().bold(),
style("(skipped)").dim()
);
}
}
#[derive(Serialize)]
struct SyncDryRunJsonOutput {
ok: bool,
dry_run: bool,
data: SyncDryRunJsonData,
}
#[derive(Serialize)]
struct SyncDryRunJsonData {
stages: Vec<SyncDryRunStage>,
}
#[derive(Serialize)]
struct SyncDryRunStage {
name: String,
would_run: bool,
#[serde(skip_serializing_if = "Option::is_none")]
preview: Option<DryRunPreview>,
}
pub fn print_sync_dry_run_json(result: &SyncDryRunResult) {
let output = SyncDryRunJsonOutput {
ok: true,
dry_run: true,
data: SyncDryRunJsonData {
stages: vec![
SyncDryRunStage {
name: "ingest_issues".to_string(),
would_run: true,
preview: Some(result.issues_preview.clone()),
},
SyncDryRunStage {
name: "ingest_mrs".to_string(),
would_run: true,
preview: Some(result.mrs_preview.clone()),
},
SyncDryRunStage {
name: "generate_docs".to_string(),
would_run: result.would_generate_docs,
preview: None,
},
SyncDryRunStage {
name: "embed".to_string(),
would_run: result.would_embed,
preview: None,
},
],
},
};
println!("{}", serde_json::to_string(&output).unwrap());
}