Files
gitlore/src/cli/commands/sync.rs
Taylor Eernisse 6b75697638 feat(ingestion): enrich issues with work item status from GraphQL API
Add a "Phase 1.5" status enrichment step to the issue ingestion pipeline
that fetches work item statuses via the GitLab GraphQL API after the
standard REST API ingestion completes.

Schema changes (migration 021):
- Add status_name, status_category, status_color, status_icon_name, and
  status_synced_at columns to the issues table (all nullable)

Ingestion pipeline changes:
- New `enrich_issue_statuses_txn()` function that applies fetched
  statuses in a single transaction with two phases: clear stale statuses
  for issues that no longer have a status widget, then apply new/updated
  statuses from the GraphQL response
- ProgressEvent variants for status enrichment (complete/skipped)
- IngestProjectResult tracks enrichment metrics (seen, enriched, cleared,
  without_widget, partial_error_count, enrichment_mode, errors)
- Robot mode JSON output includes per-project status enrichment details

Configuration:
- New `sync.fetchWorkItemStatus` config option (defaults true) to disable
  GraphQL status enrichment on instances without Premium/Ultimate
- `LoreError::GitLabAuthFailed` now treated as permanent API error so
  status enrichment auth failures don't trigger retries

Also removes the unnecessary nested SAVEPOINT in store_closes_issues_refs
(already runs within the orchestrator's transaction context).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 08:09:21 -05:00

577 lines
18 KiB
Rust

use console::style;
use indicatif::{ProgressBar, ProgressStyle};
use serde::Serialize;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use tracing::Instrument;
use tracing::{info, warn};
use crate::Config;
use crate::core::error::Result;
use crate::core::metrics::{MetricsLayer, StageTiming};
use crate::core::shutdown::ShutdownSignal;
use super::embed::run_embed;
use super::generate_docs::run_generate_docs;
use super::ingest::{DryRunPreview, IngestDisplay, run_ingest, run_ingest_dry_run};
#[derive(Debug, Default)]
pub struct SyncOptions {
pub full: bool,
pub force: bool,
pub no_embed: bool,
pub no_docs: bool,
pub no_events: bool,
pub robot_mode: bool,
pub dry_run: bool,
}
#[derive(Debug, Default, Serialize)]
pub struct SyncResult {
#[serde(skip)]
pub run_id: String,
pub issues_updated: usize,
pub mrs_updated: usize,
pub discussions_fetched: usize,
pub resource_events_fetched: usize,
pub resource_events_failed: usize,
pub mr_diffs_fetched: usize,
pub mr_diffs_failed: usize,
pub documents_regenerated: usize,
pub documents_embedded: usize,
pub status_enrichment_errors: usize,
}
fn stage_spinner(stage: u8, total: u8, msg: &str, robot_mode: bool) -> ProgressBar {
if robot_mode {
return ProgressBar::hidden();
}
let pb = crate::cli::progress::multi().add(ProgressBar::new_spinner());
pb.set_style(
ProgressStyle::default_spinner()
.template("{spinner:.blue} {prefix} {msg}")
.expect("valid template"),
);
pb.enable_steady_tick(std::time::Duration::from_millis(80));
pb.set_prefix(format!("[{stage}/{total}]"));
pb.set_message(msg.to_string());
pb
}
pub async fn run_sync(
config: &Config,
options: SyncOptions,
run_id: Option<&str>,
signal: &ShutdownSignal,
) -> Result<SyncResult> {
let generated_id;
let run_id = match run_id {
Some(id) => id,
None => {
generated_id = uuid::Uuid::new_v4().simple().to_string();
&generated_id[..8]
}
};
let span = tracing::info_span!("sync", %run_id);
async move {
let mut result = SyncResult {
run_id: run_id.to_string(),
..SyncResult::default()
};
// Handle dry_run mode - show preview without making any changes
if options.dry_run {
return run_sync_dry_run(config, &options).await;
}
let ingest_display = if options.robot_mode {
IngestDisplay::silent()
} else {
IngestDisplay::progress_only()
};
let total_stages: u8 = if options.no_docs && options.no_embed {
2
} else if options.no_docs || options.no_embed {
3
} else {
4
};
let mut current_stage: u8 = 0;
current_stage += 1;
let spinner = stage_spinner(
current_stage,
total_stages,
"Fetching issues from GitLab...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: ingesting issues");
let issues_result = run_ingest(
config,
"issues",
None,
options.force,
options.full,
false, // dry_run - sync has its own dry_run handling
ingest_display,
Some(spinner.clone()),
signal,
)
.await?;
result.issues_updated = issues_result.issues_upserted;
result.discussions_fetched += issues_result.discussions_fetched;
result.resource_events_fetched += issues_result.resource_events_fetched;
result.resource_events_failed += issues_result.resource_events_failed;
result.status_enrichment_errors += issues_result.status_enrichment_errors;
spinner.finish_and_clear();
if signal.is_cancelled() {
info!("Shutdown requested after issues stage, returning partial sync results");
return Ok(result);
}
current_stage += 1;
let spinner = stage_spinner(
current_stage,
total_stages,
"Fetching merge requests from GitLab...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: ingesting merge requests");
let mrs_result = run_ingest(
config,
"mrs",
None,
options.force,
options.full,
false, // dry_run - sync has its own dry_run handling
ingest_display,
Some(spinner.clone()),
signal,
)
.await?;
result.mrs_updated = mrs_result.mrs_upserted;
result.discussions_fetched += mrs_result.discussions_fetched;
result.resource_events_fetched += mrs_result.resource_events_fetched;
result.resource_events_failed += mrs_result.resource_events_failed;
result.mr_diffs_fetched += mrs_result.mr_diffs_fetched;
result.mr_diffs_failed += mrs_result.mr_diffs_failed;
spinner.finish_and_clear();
if signal.is_cancelled() {
info!("Shutdown requested after MRs stage, returning partial sync results");
return Ok(result);
}
if !options.no_docs {
current_stage += 1;
let spinner = stage_spinner(
current_stage,
total_stages,
"Processing documents...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: generating documents");
let docs_bar = if options.robot_mode {
ProgressBar::hidden()
} else {
let b = crate::cli::progress::multi().add(ProgressBar::new(0));
b.set_style(
ProgressStyle::default_bar()
.template(
" {spinner:.blue} Processing documents [{bar:30.cyan/dim}] {pos}/{len}",
)
.unwrap()
.progress_chars("=> "),
);
b
};
let docs_bar_clone = docs_bar.clone();
let tick_started = Arc::new(AtomicBool::new(false));
let tick_started_clone = Arc::clone(&tick_started);
let docs_cb: Box<dyn Fn(usize, usize)> = Box::new(move |processed, total| {
if total > 0 {
if !tick_started_clone.swap(true, Ordering::Relaxed) {
docs_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100));
}
docs_bar_clone.set_length(total as u64);
docs_bar_clone.set_position(processed as u64);
}
});
let docs_result = run_generate_docs(config, options.full, None, Some(docs_cb))?;
result.documents_regenerated = docs_result.regenerated;
docs_bar.finish_and_clear();
spinner.finish_and_clear();
} else {
info!("Sync: skipping document generation (--no-docs)");
}
if !options.no_embed {
current_stage += 1;
let spinner = stage_spinner(
current_stage,
total_stages,
"Generating embeddings...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: embedding documents");
let embed_bar = if options.robot_mode {
ProgressBar::hidden()
} else {
let b = crate::cli::progress::multi().add(ProgressBar::new(0));
b.set_style(
ProgressStyle::default_bar()
.template(
" {spinner:.blue} Generating embeddings [{bar:30.cyan/dim}] {pos}/{len}",
)
.unwrap()
.progress_chars("=> "),
);
b
};
let embed_bar_clone = embed_bar.clone();
let tick_started = Arc::new(AtomicBool::new(false));
let tick_started_clone = Arc::clone(&tick_started);
let embed_cb: Box<dyn Fn(usize, usize)> = Box::new(move |processed, total| {
if total > 0 {
if !tick_started_clone.swap(true, Ordering::Relaxed) {
embed_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100));
}
embed_bar_clone.set_length(total as u64);
embed_bar_clone.set_position(processed as u64);
}
});
match run_embed(config, options.full, false, Some(embed_cb), signal).await {
Ok(embed_result) => {
result.documents_embedded = embed_result.docs_embedded;
embed_bar.finish_and_clear();
spinner.finish_and_clear();
}
Err(e) => {
embed_bar.finish_and_clear();
spinner.finish_and_clear();
if !options.robot_mode {
eprintln!(" {} Embedding skipped ({})", style("warn").yellow(), e);
}
warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
}
}
} else {
info!("Sync: skipping embedding (--no-embed)");
}
info!(
issues = result.issues_updated,
mrs = result.mrs_updated,
discussions = result.discussions_fetched,
resource_events = result.resource_events_fetched,
resource_events_failed = result.resource_events_failed,
mr_diffs = result.mr_diffs_fetched,
mr_diffs_failed = result.mr_diffs_failed,
docs = result.documents_regenerated,
embedded = result.documents_embedded,
"Sync pipeline complete"
);
Ok(result)
}
.instrument(span)
.await
}
pub fn print_sync(
result: &SyncResult,
elapsed: std::time::Duration,
metrics: Option<&MetricsLayer>,
) {
println!("{} Sync complete:", style("done").green().bold(),);
println!(" Issues updated: {}", result.issues_updated);
println!(" MRs updated: {}", result.mrs_updated);
println!(
" Discussions fetched: {}",
result.discussions_fetched
);
if result.mr_diffs_fetched > 0 || result.mr_diffs_failed > 0 {
println!(" MR diffs fetched: {}", result.mr_diffs_fetched);
if result.mr_diffs_failed > 0 {
println!(" MR diffs failed: {}", result.mr_diffs_failed);
}
}
if result.resource_events_fetched > 0 || result.resource_events_failed > 0 {
println!(
" Resource events fetched: {}",
result.resource_events_fetched
);
if result.resource_events_failed > 0 {
println!(
" Resource events failed: {}",
result.resource_events_failed
);
}
}
println!(
" Documents regenerated: {}",
result.documents_regenerated
);
println!(" Documents embedded: {}", result.documents_embedded);
println!(" Elapsed: {:.1}s", elapsed.as_secs_f64());
if let Some(metrics) = metrics {
let stages = metrics.extract_timings();
if !stages.is_empty() {
print_timing_summary(&stages);
}
}
}
fn print_timing_summary(stages: &[StageTiming]) {
println!();
println!("{}", style("Stage timing:").dim());
for stage in stages {
for sub in &stage.sub_stages {
print_stage_line(sub, 1);
}
}
}
fn print_stage_line(stage: &StageTiming, depth: usize) {
let indent = " ".repeat(depth);
let name = if let Some(ref project) = stage.project {
format!("{} ({})", stage.name, project)
} else {
stage.name.clone()
};
let pad_width = 30_usize.saturating_sub(indent.len() + name.len());
let dots = ".".repeat(pad_width.max(2));
let mut suffix = String::new();
if stage.items_processed > 0 {
suffix.push_str(&format!("{} items", stage.items_processed));
}
if stage.errors > 0 {
if !suffix.is_empty() {
suffix.push_str(", ");
}
suffix.push_str(&format!("{} errors", stage.errors));
}
if stage.rate_limit_hits > 0 {
if !suffix.is_empty() {
suffix.push_str(", ");
}
suffix.push_str(&format!("{} rate limits", stage.rate_limit_hits));
}
let time_str = format!("{:.1}s", stage.elapsed_ms as f64 / 1000.0);
if suffix.is_empty() {
println!("{indent}{name} {dots} {time_str}");
} else {
println!("{indent}{name} {dots} {time_str} ({suffix})");
}
for sub in &stage.sub_stages {
print_stage_line(sub, depth + 1);
}
}
#[derive(Serialize)]
struct SyncJsonOutput<'a> {
ok: bool,
data: &'a SyncResult,
meta: SyncMeta,
}
#[derive(Serialize)]
struct SyncMeta {
run_id: String,
elapsed_ms: u64,
#[serde(skip_serializing_if = "Vec::is_empty")]
stages: Vec<StageTiming>,
}
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64, metrics: Option<&MetricsLayer>) {
let stages = metrics.map_or_else(Vec::new, MetricsLayer::extract_timings);
let output = SyncJsonOutput {
ok: true,
data: result,
meta: SyncMeta {
run_id: result.run_id.clone(),
elapsed_ms,
stages,
},
};
println!("{}", serde_json::to_string(&output).unwrap());
}
#[derive(Debug, Default, Serialize)]
pub struct SyncDryRunResult {
pub issues_preview: DryRunPreview,
pub mrs_preview: DryRunPreview,
pub would_generate_docs: bool,
pub would_embed: bool,
}
async fn run_sync_dry_run(config: &Config, options: &SyncOptions) -> Result<SyncResult> {
// Get dry run previews for both issues and MRs
let issues_preview = run_ingest_dry_run(config, "issues", None, options.full)?;
let mrs_preview = run_ingest_dry_run(config, "mrs", None, options.full)?;
let dry_result = SyncDryRunResult {
issues_preview,
mrs_preview,
would_generate_docs: !options.no_docs,
would_embed: !options.no_embed,
};
if options.robot_mode {
print_sync_dry_run_json(&dry_result);
} else {
print_sync_dry_run(&dry_result);
}
// Return an empty SyncResult since this is just a preview
Ok(SyncResult::default())
}
pub fn print_sync_dry_run(result: &SyncDryRunResult) {
println!(
"{} {}",
style("Sync Dry Run Preview").cyan().bold(),
style("(no changes will be made)").yellow()
);
println!();
println!("{}", style("Stage 1: Issues Ingestion").white().bold());
println!(
" Sync mode: {}",
if result.issues_preview.sync_mode == "full" {
style("full").yellow()
} else {
style("incremental").green()
}
);
println!(" Projects: {}", result.issues_preview.projects.len());
for project in &result.issues_preview.projects {
let sync_status = if !project.has_cursor {
style("initial sync").yellow()
} else {
style("incremental").green()
};
println!(
" {} ({}) - {} existing",
&project.path, sync_status, project.existing_count
);
}
println!();
println!(
"{}",
style("Stage 2: Merge Requests Ingestion").white().bold()
);
println!(
" Sync mode: {}",
if result.mrs_preview.sync_mode == "full" {
style("full").yellow()
} else {
style("incremental").green()
}
);
println!(" Projects: {}", result.mrs_preview.projects.len());
for project in &result.mrs_preview.projects {
let sync_status = if !project.has_cursor {
style("initial sync").yellow()
} else {
style("incremental").green()
};
println!(
" {} ({}) - {} existing",
&project.path, sync_status, project.existing_count
);
}
println!();
if result.would_generate_docs {
println!(
"{} {}",
style("Stage 3: Document Generation").white().bold(),
style("(would run)").green()
);
} else {
println!(
"{} {}",
style("Stage 3: Document Generation").white().bold(),
style("(skipped)").dim()
);
}
if result.would_embed {
println!(
"{} {}",
style("Stage 4: Embedding").white().bold(),
style("(would run)").green()
);
} else {
println!(
"{} {}",
style("Stage 4: Embedding").white().bold(),
style("(skipped)").dim()
);
}
}
#[derive(Serialize)]
struct SyncDryRunJsonOutput {
ok: bool,
dry_run: bool,
data: SyncDryRunJsonData,
}
#[derive(Serialize)]
struct SyncDryRunJsonData {
stages: Vec<SyncDryRunStage>,
}
#[derive(Serialize)]
struct SyncDryRunStage {
name: String,
would_run: bool,
#[serde(skip_serializing_if = "Option::is_none")]
preview: Option<DryRunPreview>,
}
pub fn print_sync_dry_run_json(result: &SyncDryRunResult) {
let output = SyncDryRunJsonOutput {
ok: true,
dry_run: true,
data: SyncDryRunJsonData {
stages: vec![
SyncDryRunStage {
name: "ingest_issues".to_string(),
would_run: true,
preview: Some(result.issues_preview.clone()),
},
SyncDryRunStage {
name: "ingest_mrs".to_string(),
would_run: true,
preview: Some(result.mrs_preview.clone()),
},
SyncDryRunStage {
name: "generate_docs".to_string(),
would_run: result.would_generate_docs,
preview: None,
},
SyncDryRunStage {
name: "embed".to_string(),
would_run: result.would_embed,
preview: None,
},
],
},
};
println!("{}", serde_json::to_string(&output).unwrap());
}