feat(sync): Instrument pipeline with tracing spans, run_id correlation, and metrics
Add end-to-end observability to the sync and ingest pipelines: Sync command: - Generate UUID-based run_id for each sync invocation, propagated through all child spans for log correlation across stages - Accept MetricsLayer reference to extract hierarchical StageTiming data after pipeline completion for robot-mode performance output - Record sync runs in DB via SyncRunRecorder (start/succeed/fail lifecycle) - Wrap entire sync execution in a root tracing span with run_id field Ingest command: - Wrap run_ingest in an instrumented root span with run_id and resource_type - Add project path prefix to discussion progress bars for multi-project clarity - Reset resource_events_synced_for_updated_at on --full re-sync Sync status: - Expand from single last_run to configurable recent runs list (default 10) - Parse and expose StageTiming metrics from stored metrics_json - Add run_id, total_items_processed, total_errors to SyncRunInfo - Add mr_count to DataSummary for complete entity coverage Orchestrator: - Add #[instrument] with structured fields to issue and MR ingestion functions - Record items_processed, items_skipped, errors on span close for MetricsLayer - Emit granular progress events (IssuesFetchStarted, IssuesFetchComplete) - Pass project_id through to drain_resource_events for scoped job claiming Document regenerator and embedding pipeline: - Add #[instrument] spans with items_processed, items_skipped, errors fields - Record final counts on span close for metrics extraction Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -3,10 +3,12 @@
|
||||
use console::style;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use serde::Serialize;
|
||||
use tracing::Instrument;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::Config;
|
||||
use crate::core::error::Result;
|
||||
use crate::core::metrics::{MetricsLayer, StageTiming};
|
||||
|
||||
use super::embed::run_embed;
|
||||
use super::generate_docs::run_generate_docs;
|
||||
@@ -26,6 +28,8 @@ pub struct SyncOptions {
|
||||
/// Result of the sync command.
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct SyncResult {
|
||||
#[serde(skip)]
|
||||
pub run_id: String,
|
||||
pub issues_updated: usize,
|
||||
pub mrs_updated: usize,
|
||||
pub discussions_fetched: usize,
|
||||
@@ -52,133 +56,162 @@ fn stage_spinner(stage: u8, total: u8, msg: &str, robot_mode: bool) -> ProgressB
|
||||
}
|
||||
|
||||
/// Run the full sync pipeline: ingest -> generate-docs -> embed.
|
||||
pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResult> {
|
||||
let mut result = SyncResult::default();
|
||||
|
||||
let ingest_display = if options.robot_mode {
|
||||
IngestDisplay::silent()
|
||||
} else {
|
||||
IngestDisplay::progress_only()
|
||||
};
|
||||
|
||||
let total_stages: u8 = if options.no_docs && options.no_embed {
|
||||
2
|
||||
} else if options.no_docs || options.no_embed {
|
||||
3
|
||||
} else {
|
||||
4
|
||||
};
|
||||
let mut current_stage: u8 = 0;
|
||||
|
||||
// Stage 1: Ingest issues
|
||||
current_stage += 1;
|
||||
let spinner = stage_spinner(
|
||||
current_stage,
|
||||
total_stages,
|
||||
"Fetching issues from GitLab...",
|
||||
options.robot_mode,
|
||||
);
|
||||
info!("Sync stage {current_stage}/{total_stages}: ingesting issues");
|
||||
let issues_result = run_ingest(
|
||||
config,
|
||||
"issues",
|
||||
None,
|
||||
options.force,
|
||||
options.full,
|
||||
ingest_display,
|
||||
)
|
||||
.await?;
|
||||
result.issues_updated = issues_result.issues_upserted;
|
||||
result.discussions_fetched += issues_result.discussions_fetched;
|
||||
result.resource_events_fetched += issues_result.resource_events_fetched;
|
||||
result.resource_events_failed += issues_result.resource_events_failed;
|
||||
spinner.finish_and_clear();
|
||||
|
||||
// Stage 2: Ingest MRs
|
||||
current_stage += 1;
|
||||
let spinner = stage_spinner(
|
||||
current_stage,
|
||||
total_stages,
|
||||
"Fetching merge requests from GitLab...",
|
||||
options.robot_mode,
|
||||
);
|
||||
info!("Sync stage {current_stage}/{total_stages}: ingesting merge requests");
|
||||
let mrs_result = run_ingest(
|
||||
config,
|
||||
"mrs",
|
||||
None,
|
||||
options.force,
|
||||
options.full,
|
||||
ingest_display,
|
||||
)
|
||||
.await?;
|
||||
result.mrs_updated = mrs_result.mrs_upserted;
|
||||
result.discussions_fetched += mrs_result.discussions_fetched;
|
||||
result.resource_events_fetched += mrs_result.resource_events_fetched;
|
||||
result.resource_events_failed += mrs_result.resource_events_failed;
|
||||
spinner.finish_and_clear();
|
||||
|
||||
// Stage 3: Generate documents (unless --no-docs)
|
||||
if !options.no_docs {
|
||||
current_stage += 1;
|
||||
let spinner = stage_spinner(
|
||||
current_stage,
|
||||
total_stages,
|
||||
"Processing documents...",
|
||||
options.robot_mode,
|
||||
);
|
||||
info!("Sync stage {current_stage}/{total_stages}: generating documents");
|
||||
let docs_result = run_generate_docs(config, false, None)?;
|
||||
result.documents_regenerated = docs_result.regenerated;
|
||||
spinner.finish_and_clear();
|
||||
} else {
|
||||
info!("Sync: skipping document generation (--no-docs)");
|
||||
}
|
||||
|
||||
// Stage 4: Embed documents (unless --no-embed)
|
||||
if !options.no_embed {
|
||||
current_stage += 1;
|
||||
let spinner = stage_spinner(
|
||||
current_stage,
|
||||
total_stages,
|
||||
"Generating embeddings...",
|
||||
options.robot_mode,
|
||||
);
|
||||
info!("Sync stage {current_stage}/{total_stages}: embedding documents");
|
||||
match run_embed(config, options.full, false).await {
|
||||
Ok(embed_result) => {
|
||||
result.documents_embedded = embed_result.embedded;
|
||||
spinner.finish_and_clear();
|
||||
}
|
||||
Err(e) => {
|
||||
// Graceful degradation: Ollama down is a warning, not an error
|
||||
spinner.finish_and_clear();
|
||||
if !options.robot_mode {
|
||||
eprintln!(" {} Embedding skipped ({})", style("warn").yellow(), e);
|
||||
}
|
||||
warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
|
||||
}
|
||||
///
|
||||
/// `run_id` is an optional correlation ID for log/metrics tracing.
|
||||
/// When called from `handle_sync_cmd`, this should be the same ID
|
||||
/// stored in the `sync_runs` table so logs and DB records correlate.
|
||||
pub async fn run_sync(
|
||||
config: &Config,
|
||||
options: SyncOptions,
|
||||
run_id: Option<&str>,
|
||||
) -> Result<SyncResult> {
|
||||
let generated_id;
|
||||
let run_id = match run_id {
|
||||
Some(id) => id,
|
||||
None => {
|
||||
generated_id = uuid::Uuid::new_v4().simple().to_string();
|
||||
&generated_id[..8]
|
||||
}
|
||||
} else {
|
||||
info!("Sync: skipping embedding (--no-embed)");
|
||||
};
|
||||
let span = tracing::info_span!("sync", %run_id);
|
||||
|
||||
async move {
|
||||
let mut result = SyncResult {
|
||||
run_id: run_id.to_string(),
|
||||
..SyncResult::default()
|
||||
};
|
||||
|
||||
let ingest_display = if options.robot_mode {
|
||||
IngestDisplay::silent()
|
||||
} else {
|
||||
IngestDisplay::progress_only()
|
||||
};
|
||||
|
||||
let total_stages: u8 = if options.no_docs && options.no_embed {
|
||||
2
|
||||
} else if options.no_docs || options.no_embed {
|
||||
3
|
||||
} else {
|
||||
4
|
||||
};
|
||||
let mut current_stage: u8 = 0;
|
||||
|
||||
// Stage 1: Ingest issues
|
||||
current_stage += 1;
|
||||
let spinner = stage_spinner(
|
||||
current_stage,
|
||||
total_stages,
|
||||
"Fetching issues from GitLab...",
|
||||
options.robot_mode,
|
||||
);
|
||||
info!("Sync stage {current_stage}/{total_stages}: ingesting issues");
|
||||
let issues_result = run_ingest(
|
||||
config,
|
||||
"issues",
|
||||
None,
|
||||
options.force,
|
||||
options.full,
|
||||
ingest_display,
|
||||
)
|
||||
.await?;
|
||||
result.issues_updated = issues_result.issues_upserted;
|
||||
result.discussions_fetched += issues_result.discussions_fetched;
|
||||
result.resource_events_fetched += issues_result.resource_events_fetched;
|
||||
result.resource_events_failed += issues_result.resource_events_failed;
|
||||
spinner.finish_and_clear();
|
||||
|
||||
// Stage 2: Ingest MRs
|
||||
current_stage += 1;
|
||||
let spinner = stage_spinner(
|
||||
current_stage,
|
||||
total_stages,
|
||||
"Fetching merge requests from GitLab...",
|
||||
options.robot_mode,
|
||||
);
|
||||
info!("Sync stage {current_stage}/{total_stages}: ingesting merge requests");
|
||||
let mrs_result = run_ingest(
|
||||
config,
|
||||
"mrs",
|
||||
None,
|
||||
options.force,
|
||||
options.full,
|
||||
ingest_display,
|
||||
)
|
||||
.await?;
|
||||
result.mrs_updated = mrs_result.mrs_upserted;
|
||||
result.discussions_fetched += mrs_result.discussions_fetched;
|
||||
result.resource_events_fetched += mrs_result.resource_events_fetched;
|
||||
result.resource_events_failed += mrs_result.resource_events_failed;
|
||||
spinner.finish_and_clear();
|
||||
|
||||
// Stage 3: Generate documents (unless --no-docs)
|
||||
if !options.no_docs {
|
||||
current_stage += 1;
|
||||
let spinner = stage_spinner(
|
||||
current_stage,
|
||||
total_stages,
|
||||
"Processing documents...",
|
||||
options.robot_mode,
|
||||
);
|
||||
info!("Sync stage {current_stage}/{total_stages}: generating documents");
|
||||
let docs_result = run_generate_docs(config, false, None)?;
|
||||
result.documents_regenerated = docs_result.regenerated;
|
||||
spinner.finish_and_clear();
|
||||
} else {
|
||||
info!("Sync: skipping document generation (--no-docs)");
|
||||
}
|
||||
|
||||
// Stage 4: Embed documents (unless --no-embed)
|
||||
if !options.no_embed {
|
||||
current_stage += 1;
|
||||
let spinner = stage_spinner(
|
||||
current_stage,
|
||||
total_stages,
|
||||
"Generating embeddings...",
|
||||
options.robot_mode,
|
||||
);
|
||||
info!("Sync stage {current_stage}/{total_stages}: embedding documents");
|
||||
match run_embed(config, options.full, false).await {
|
||||
Ok(embed_result) => {
|
||||
result.documents_embedded = embed_result.embedded;
|
||||
spinner.finish_and_clear();
|
||||
}
|
||||
Err(e) => {
|
||||
// Graceful degradation: Ollama down is a warning, not an error
|
||||
spinner.finish_and_clear();
|
||||
if !options.robot_mode {
|
||||
eprintln!(" {} Embedding skipped ({})", style("warn").yellow(), e);
|
||||
}
|
||||
warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info!("Sync: skipping embedding (--no-embed)");
|
||||
}
|
||||
|
||||
info!(
|
||||
issues = result.issues_updated,
|
||||
mrs = result.mrs_updated,
|
||||
discussions = result.discussions_fetched,
|
||||
resource_events = result.resource_events_fetched,
|
||||
resource_events_failed = result.resource_events_failed,
|
||||
docs = result.documents_regenerated,
|
||||
embedded = result.documents_embedded,
|
||||
"Sync pipeline complete"
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
info!(
|
||||
issues = result.issues_updated,
|
||||
mrs = result.mrs_updated,
|
||||
discussions = result.discussions_fetched,
|
||||
resource_events = result.resource_events_fetched,
|
||||
resource_events_failed = result.resource_events_failed,
|
||||
docs = result.documents_regenerated,
|
||||
embedded = result.documents_embedded,
|
||||
"Sync pipeline complete"
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
.instrument(span)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Print human-readable sync summary.
|
||||
pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
|
||||
pub fn print_sync(
|
||||
result: &SyncResult,
|
||||
elapsed: std::time::Duration,
|
||||
metrics: Option<&MetricsLayer>,
|
||||
) {
|
||||
println!("{} Sync complete:", style("done").green().bold(),);
|
||||
println!(" Issues updated: {}", result.issues_updated);
|
||||
println!(" MRs updated: {}", result.mrs_updated);
|
||||
@@ -204,6 +237,65 @@ pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
|
||||
);
|
||||
println!(" Documents embedded: {}", result.documents_embedded);
|
||||
println!(" Elapsed: {:.1}s", elapsed.as_secs_f64());
|
||||
|
||||
// Print per-stage timing breakdown if metrics are available
|
||||
if let Some(metrics) = metrics {
|
||||
let stages = metrics.extract_timings();
|
||||
if !stages.is_empty() {
|
||||
print_timing_summary(&stages);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Print per-stage timing breakdown for interactive users.
|
||||
fn print_timing_summary(stages: &[StageTiming]) {
|
||||
println!();
|
||||
println!("{}", style("Stage timing:").dim());
|
||||
for stage in stages {
|
||||
for sub in &stage.sub_stages {
|
||||
print_stage_line(sub, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Print a single stage timing line with indentation.
|
||||
fn print_stage_line(stage: &StageTiming, depth: usize) {
|
||||
let indent = " ".repeat(depth);
|
||||
let name = if let Some(ref project) = stage.project {
|
||||
format!("{} ({})", stage.name, project)
|
||||
} else {
|
||||
stage.name.clone()
|
||||
};
|
||||
let pad_width = 30_usize.saturating_sub(indent.len() + name.len());
|
||||
let dots = ".".repeat(pad_width.max(2));
|
||||
|
||||
let mut suffix = String::new();
|
||||
if stage.items_processed > 0 {
|
||||
suffix.push_str(&format!("{} items", stage.items_processed));
|
||||
}
|
||||
if stage.errors > 0 {
|
||||
if !suffix.is_empty() {
|
||||
suffix.push_str(", ");
|
||||
}
|
||||
suffix.push_str(&format!("{} errors", stage.errors));
|
||||
}
|
||||
if stage.rate_limit_hits > 0 {
|
||||
if !suffix.is_empty() {
|
||||
suffix.push_str(", ");
|
||||
}
|
||||
suffix.push_str(&format!("{} rate limits", stage.rate_limit_hits));
|
||||
}
|
||||
|
||||
let time_str = format!("{:.1}s", stage.elapsed_ms as f64 / 1000.0);
|
||||
if suffix.is_empty() {
|
||||
println!("{indent}{name} {dots} {time_str}");
|
||||
} else {
|
||||
println!("{indent}{name} {dots} {time_str} ({suffix})");
|
||||
}
|
||||
|
||||
for sub in &stage.sub_stages {
|
||||
print_stage_line(sub, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// JSON output for sync.
|
||||
@@ -216,15 +308,23 @@ struct SyncJsonOutput<'a> {
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SyncMeta {
|
||||
run_id: String,
|
||||
elapsed_ms: u64,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
stages: Vec<StageTiming>,
|
||||
}
|
||||
|
||||
/// Print JSON robot-mode sync output.
|
||||
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64) {
|
||||
/// Print JSON robot-mode sync output with optional metrics.
|
||||
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64, metrics: Option<&MetricsLayer>) {
|
||||
let stages = metrics.map_or_else(Vec::new, MetricsLayer::extract_timings);
|
||||
let output = SyncJsonOutput {
|
||||
ok: true,
|
||||
data: result,
|
||||
meta: SyncMeta { elapsed_ms },
|
||||
meta: SyncMeta {
|
||||
run_id: result.run_id.clone(),
|
||||
elapsed_ms,
|
||||
stages,
|
||||
},
|
||||
};
|
||||
println!("{}", serde_json::to_string(&output).unwrap());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user