feat(sync): Instrument pipeline with tracing spans, run_id correlation, and metrics
Add end-to-end observability to the sync and ingest pipelines: Sync command: - Generate UUID-based run_id for each sync invocation, propagated through all child spans for log correlation across stages - Accept MetricsLayer reference to extract hierarchical StageTiming data after pipeline completion for robot-mode performance output - Record sync runs in DB via SyncRunRecorder (start/succeed/fail lifecycle) - Wrap entire sync execution in a root tracing span with run_id field Ingest command: - Wrap run_ingest in an instrumented root span with run_id and resource_type - Add project path prefix to discussion progress bars for multi-project clarity - Reset resource_events_synced_for_updated_at on --full re-sync Sync status: - Expand from single last_run to configurable recent runs list (default 10) - Parse and expose StageTiming metrics from stored metrics_json - Add run_id, total_items_processed, total_errors to SyncRunInfo - Add mr_count to DataSummary for complete entity coverage Orchestrator: - Add #[instrument] with structured fields to issue and MR ingestion functions - Record items_processed, items_skipped, errors on span close for MetricsLayer - Emit granular progress events (IssuesFetchStarted, IssuesFetchComplete) - Pass project_id through to drain_resource_events for scoped job claiming Document regenerator and embedding pipeline: - Add #[instrument] spans with items_processed, items_skipped, errors fields - Record final counts on span close for metrics extraction Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -7,8 +7,11 @@ use serde::Serialize;
|
||||
use crate::Config;
|
||||
use crate::core::db::create_connection;
|
||||
use crate::core::error::Result;
|
||||
use crate::core::metrics::StageTiming;
|
||||
use crate::core::paths::get_db_path;
|
||||
use crate::core::time::ms_to_iso;
|
||||
use crate::core::time::{format_full_datetime, ms_to_iso};
|
||||
|
||||
const RECENT_RUNS_LIMIT: usize = 10;
|
||||
|
||||
/// Sync run information.
|
||||
#[derive(Debug)]
|
||||
@@ -19,6 +22,10 @@ pub struct SyncRunInfo {
|
||||
pub status: String,
|
||||
pub command: String,
|
||||
pub error: Option<String>,
|
||||
pub run_id: Option<String>,
|
||||
pub total_items_processed: i64,
|
||||
pub total_errors: i64,
|
||||
pub stages: Option<Vec<StageTiming>>,
|
||||
}
|
||||
|
||||
/// Cursor position information.
|
||||
@@ -34,6 +41,7 @@ pub struct CursorInfo {
|
||||
#[derive(Debug)]
|
||||
pub struct DataSummary {
|
||||
pub issue_count: i64,
|
||||
pub mr_count: i64,
|
||||
pub discussion_count: i64,
|
||||
pub note_count: i64,
|
||||
pub system_note_count: i64,
|
||||
@@ -42,7 +50,7 @@ pub struct DataSummary {
|
||||
/// Complete sync status result.
|
||||
#[derive(Debug)]
|
||||
pub struct SyncStatusResult {
|
||||
pub last_run: Option<SyncRunInfo>,
|
||||
pub runs: Vec<SyncRunInfo>,
|
||||
pub cursors: Vec<CursorInfo>,
|
||||
pub summary: DataSummary,
|
||||
}
|
||||
@@ -52,42 +60,49 @@ pub fn run_sync_status(config: &Config) -> Result<SyncStatusResult> {
|
||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||
let conn = create_connection(&db_path)?;
|
||||
|
||||
let last_run = get_last_sync_run(&conn)?;
|
||||
let runs = get_recent_sync_runs(&conn, RECENT_RUNS_LIMIT)?;
|
||||
let cursors = get_cursor_positions(&conn)?;
|
||||
let summary = get_data_summary(&conn)?;
|
||||
|
||||
Ok(SyncStatusResult {
|
||||
last_run,
|
||||
runs,
|
||||
cursors,
|
||||
summary,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the most recent sync run.
|
||||
fn get_last_sync_run(conn: &Connection) -> Result<Option<SyncRunInfo>> {
|
||||
/// Get the most recent sync runs.
|
||||
fn get_recent_sync_runs(conn: &Connection, limit: usize) -> Result<Vec<SyncRunInfo>> {
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT id, started_at, finished_at, status, command, error
|
||||
"SELECT id, started_at, finished_at, status, command, error,
|
||||
run_id, total_items_processed, total_errors, metrics_json
|
||||
FROM sync_runs
|
||||
ORDER BY started_at DESC
|
||||
LIMIT 1",
|
||||
LIMIT ?1",
|
||||
)?;
|
||||
|
||||
let result = stmt.query_row([], |row| {
|
||||
Ok(SyncRunInfo {
|
||||
id: row.get(0)?,
|
||||
started_at: row.get(1)?,
|
||||
finished_at: row.get(2)?,
|
||||
status: row.get(3)?,
|
||||
command: row.get(4)?,
|
||||
error: row.get(5)?,
|
||||
})
|
||||
});
|
||||
let runs: std::result::Result<Vec<_>, _> = stmt
|
||||
.query_map([limit as i64], |row| {
|
||||
let metrics_json: Option<String> = row.get(9)?;
|
||||
let stages: Option<Vec<StageTiming>> =
|
||||
metrics_json.and_then(|json| serde_json::from_str(&json).ok());
|
||||
|
||||
match result {
|
||||
Ok(info) => Ok(Some(info)),
|
||||
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
Ok(SyncRunInfo {
|
||||
id: row.get(0)?,
|
||||
started_at: row.get(1)?,
|
||||
finished_at: row.get(2)?,
|
||||
status: row.get(3)?,
|
||||
command: row.get(4)?,
|
||||
error: row.get(5)?,
|
||||
run_id: row.get(6)?,
|
||||
total_items_processed: row.get::<_, Option<i64>>(7)?.unwrap_or(0),
|
||||
total_errors: row.get::<_, Option<i64>>(8)?.unwrap_or(0),
|
||||
stages,
|
||||
})
|
||||
})?
|
||||
.collect();
|
||||
|
||||
Ok(runs?)
|
||||
}
|
||||
|
||||
/// Get cursor positions for all projects/resource types.
|
||||
@@ -119,6 +134,10 @@ fn get_data_summary(conn: &Connection) -> Result<DataSummary> {
|
||||
.query_row("SELECT COUNT(*) FROM issues", [], |row| row.get(0))
|
||||
.unwrap_or(0);
|
||||
|
||||
let mr_count: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM merge_requests", [], |row| row.get(0))
|
||||
.unwrap_or(0);
|
||||
|
||||
let discussion_count: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM discussions", [], |row| row.get(0))
|
||||
.unwrap_or(0);
|
||||
@@ -133,6 +152,7 @@ fn get_data_summary(conn: &Connection) -> Result<DataSummary> {
|
||||
|
||||
Ok(DataSummary {
|
||||
issue_count,
|
||||
mr_count,
|
||||
discussion_count,
|
||||
note_count,
|
||||
system_note_count,
|
||||
@@ -149,15 +169,17 @@ fn format_duration(ms: i64) -> String {
|
||||
format!("{}h {}m {}s", hours, minutes % 60, seconds % 60)
|
||||
} else if minutes > 0 {
|
||||
format!("{}m {}s", minutes, seconds % 60)
|
||||
} else if ms >= 1000 {
|
||||
format!("{:.1}s", ms as f64 / 1000.0)
|
||||
} else {
|
||||
format!("{}s", seconds)
|
||||
format!("{}ms", ms)
|
||||
}
|
||||
}
|
||||
|
||||
/// Format number with thousands separators.
|
||||
fn format_number(n: i64) -> String {
|
||||
let is_negative = n < 0;
|
||||
let abs_n = n.abs();
|
||||
let abs_n = n.unsigned_abs();
|
||||
let s = abs_n.to_string();
|
||||
let chars: Vec<char> = s.chars().collect();
|
||||
let mut result = String::new();
|
||||
@@ -176,7 +198,10 @@ fn format_number(n: i64) -> String {
|
||||
result
|
||||
}
|
||||
|
||||
/// JSON output structures for robot mode.
|
||||
// ============================================================================
|
||||
// JSON output structures for robot mode
|
||||
// ============================================================================
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SyncStatusJsonOutput {
|
||||
ok: bool,
|
||||
@@ -185,7 +210,7 @@ struct SyncStatusJsonOutput {
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SyncStatusJsonData {
|
||||
last_sync: Option<SyncRunJsonInfo>,
|
||||
runs: Vec<SyncRunJsonInfo>,
|
||||
cursors: Vec<CursorJsonInfo>,
|
||||
summary: SummaryJsonInfo,
|
||||
}
|
||||
@@ -201,7 +226,13 @@ struct SyncRunJsonInfo {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
duration_ms: Option<i64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
run_id: Option<String>,
|
||||
total_items_processed: i64,
|
||||
total_errors: i64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
error: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
stages: Option<Vec<StageTiming>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -217,6 +248,7 @@ struct CursorJsonInfo {
|
||||
#[derive(Serialize)]
|
||||
struct SummaryJsonInfo {
|
||||
issues: i64,
|
||||
merge_requests: i64,
|
||||
discussions: i64,
|
||||
notes: i64,
|
||||
system_notes: i64,
|
||||
@@ -224,18 +256,26 @@ struct SummaryJsonInfo {
|
||||
|
||||
/// Print sync status as JSON (robot mode).
|
||||
pub fn print_sync_status_json(result: &SyncStatusResult) {
|
||||
let last_sync = result.last_run.as_ref().map(|run| {
|
||||
let duration_ms = run.finished_at.map(|f| f - run.started_at);
|
||||
SyncRunJsonInfo {
|
||||
id: run.id,
|
||||
status: run.status.clone(),
|
||||
command: run.command.clone(),
|
||||
started_at: ms_to_iso(run.started_at),
|
||||
completed_at: run.finished_at.map(ms_to_iso),
|
||||
duration_ms,
|
||||
error: run.error.clone(),
|
||||
}
|
||||
});
|
||||
let runs = result
|
||||
.runs
|
||||
.iter()
|
||||
.map(|run| {
|
||||
let duration_ms = run.finished_at.map(|f| f - run.started_at);
|
||||
SyncRunJsonInfo {
|
||||
id: run.id,
|
||||
status: run.status.clone(),
|
||||
command: run.command.clone(),
|
||||
started_at: ms_to_iso(run.started_at),
|
||||
completed_at: run.finished_at.map(ms_to_iso),
|
||||
duration_ms,
|
||||
run_id: run.run_id.clone(),
|
||||
total_items_processed: run.total_items_processed,
|
||||
total_errors: run.total_errors,
|
||||
error: run.error.clone(),
|
||||
stages: run.stages.clone(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let cursors = result
|
||||
.cursors
|
||||
@@ -251,10 +291,11 @@ pub fn print_sync_status_json(result: &SyncStatusResult) {
|
||||
let output = SyncStatusJsonOutput {
|
||||
ok: true,
|
||||
data: SyncStatusJsonData {
|
||||
last_sync,
|
||||
runs,
|
||||
cursors,
|
||||
summary: SummaryJsonInfo {
|
||||
issues: result.summary.issue_count,
|
||||
merge_requests: result.summary.mr_count,
|
||||
discussions: result.summary.discussion_count,
|
||||
notes: result.summary.note_count - result.summary.system_note_count,
|
||||
system_notes: result.summary.system_note_count,
|
||||
@@ -265,41 +306,25 @@ pub fn print_sync_status_json(result: &SyncStatusResult) {
|
||||
println!("{}", serde_json::to_string(&output).unwrap());
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Human-readable output
|
||||
// ============================================================================
|
||||
|
||||
/// Print sync status result.
|
||||
pub fn print_sync_status(result: &SyncStatusResult) {
|
||||
// Last Sync section
|
||||
println!("{}", style("Last Sync").bold().underlined());
|
||||
// Recent Runs section
|
||||
println!("{}", style("Recent Sync Runs").bold().underlined());
|
||||
println!();
|
||||
|
||||
match &result.last_run {
|
||||
Some(run) => {
|
||||
let status_styled = match run.status.as_str() {
|
||||
"succeeded" => style(&run.status).green(),
|
||||
"failed" => style(&run.status).red(),
|
||||
"running" => style(&run.status).yellow(),
|
||||
_ => style(&run.status).dim(),
|
||||
};
|
||||
|
||||
println!(" Status: {}", status_styled);
|
||||
println!(" Command: {}", run.command);
|
||||
println!(" Started: {}", ms_to_iso(run.started_at));
|
||||
|
||||
if let Some(finished) = run.finished_at {
|
||||
println!(" Completed: {}", ms_to_iso(finished));
|
||||
let duration = finished - run.started_at;
|
||||
println!(" Duration: {}", format_duration(duration));
|
||||
}
|
||||
|
||||
if let Some(error) = &run.error {
|
||||
println!(" Error: {}", style(error).red());
|
||||
}
|
||||
}
|
||||
None => {
|
||||
println!(" {}", style("No sync runs recorded yet.").dim());
|
||||
println!(
|
||||
" {}",
|
||||
style("Run 'lore ingest --type=issues' to start.").dim()
|
||||
);
|
||||
if result.runs.is_empty() {
|
||||
println!(" {}", style("No sync runs recorded yet.").dim());
|
||||
println!(
|
||||
" {}",
|
||||
style("Run 'lore sync' or 'lore ingest' to start.").dim()
|
||||
);
|
||||
} else {
|
||||
for run in &result.runs {
|
||||
print_run_line(run);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -344,6 +369,10 @@ pub fn print_sync_status(result: &SyncStatusResult) {
|
||||
" Issues: {}",
|
||||
style(format_number(result.summary.issue_count)).bold()
|
||||
);
|
||||
println!(
|
||||
" MRs: {}",
|
||||
style(format_number(result.summary.mr_count)).bold()
|
||||
);
|
||||
println!(
|
||||
" Discussions: {}",
|
||||
style(format_number(result.summary.discussion_count)).bold()
|
||||
@@ -361,14 +390,63 @@ pub fn print_sync_status(result: &SyncStatusResult) {
|
||||
);
|
||||
}
|
||||
|
||||
/// Print a single run as a compact one-liner.
|
||||
fn print_run_line(run: &SyncRunInfo) {
|
||||
let status_styled = match run.status.as_str() {
|
||||
"succeeded" => style(&run.status).green(),
|
||||
"failed" => style(&run.status).red(),
|
||||
"running" => style(&run.status).yellow(),
|
||||
_ => style(&run.status).dim(),
|
||||
};
|
||||
|
||||
let run_label = run
|
||||
.run_id
|
||||
.as_deref()
|
||||
.map_or_else(|| format!("#{}", run.id), |id| format!("Run {id}"));
|
||||
|
||||
let duration = run.finished_at.map(|f| format_duration(f - run.started_at));
|
||||
|
||||
let time = format_full_datetime(run.started_at);
|
||||
|
||||
let mut parts = vec![
|
||||
format!("{}", style(run_label).bold()),
|
||||
format!("{status_styled}"),
|
||||
format!("{}", style(&run.command).dim()),
|
||||
time,
|
||||
];
|
||||
|
||||
if let Some(d) = duration {
|
||||
parts.push(d);
|
||||
} else {
|
||||
parts.push("in progress".to_string());
|
||||
}
|
||||
|
||||
if run.total_items_processed > 0 {
|
||||
parts.push(format!("{} items", run.total_items_processed));
|
||||
}
|
||||
|
||||
if run.total_errors > 0 {
|
||||
parts.push(format!(
|
||||
"{}",
|
||||
style(format!("{} errors", run.total_errors)).red()
|
||||
));
|
||||
}
|
||||
|
||||
println!(" {}", parts.join(" | "));
|
||||
|
||||
if let Some(error) = &run.error {
|
||||
println!(" {}", style(error).red());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn format_duration_handles_seconds() {
|
||||
assert_eq!(format_duration(5_000), "5s");
|
||||
assert_eq!(format_duration(59_000), "59s");
|
||||
assert_eq!(format_duration(5_000), "5.0s");
|
||||
assert_eq!(format_duration(59_000), "59.0s");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -385,6 +463,12 @@ mod tests {
|
||||
assert_eq!(format_duration(3_723_000), "1h 2m 3s");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_duration_handles_milliseconds() {
|
||||
assert_eq!(format_duration(500), "500ms");
|
||||
assert_eq!(format_duration(0), "0ms");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_number_adds_thousands_separators() {
|
||||
assert_eq!(format_number(1000), "1,000");
|
||||
|
||||
Reference in New Issue
Block a user