Introduce the foundational observability layer for the sync pipeline: - MetricsLayer: Custom tracing subscriber layer that captures span timing and structured fields, materializing them into a hierarchical Vec<StageTiming> tree for robot-mode performance data output - logging: Dual-layer subscriber infrastructure with configurable stderr verbosity (-v/-vv/-vvv) and always-on JSON file logging with daily rotation and configurable retention (default 30 days) - SyncRunRecorder: Compile-time enforced lifecycle recorder for sync_runs table (start -> succeed|fail), with correlation IDs and aggregate counts - LoggingConfig: New config section for log_dir, retention_days, and file_logging toggle - get_log_dir(): Path helper for log directory resolution - is_permanent_api_error(): Distinguish retryable vs permanent API failures (only 404 is truly permanent; 403/auth errors may be environmental) Database changes: - Migration 013: Add resource_events_synced_for_updated_at watermark columns to issues and merge_requests tables for incremental resource event sync - Migration 014: Enrich sync_runs with run_id correlation ID, aggregate counts (total_items_processed, total_errors), and run_id index - Wrap file-based migrations in savepoints for rollback safety Dependencies: Add uuid (run_id generation), tracing-appender (file logging) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
233 lines
7.3 KiB
Rust
233 lines
7.3 KiB
Rust
//! Sync run lifecycle recorder.
|
|
//!
|
|
//! Encapsulates the INSERT-on-start, UPDATE-on-finish lifecycle for the
|
|
//! `sync_runs` table, enabling sync history tracking and observability.
|
|
|
|
use rusqlite::Connection;
|
|
|
|
use super::error::Result;
|
|
use super::metrics::StageTiming;
|
|
use super::time::now_ms;
|
|
|
|
/// Records a single sync run's lifecycle in the `sync_runs` table.
|
|
///
|
|
/// Created via [`start`](Self::start), then finalized with either
|
|
/// [`succeed`](Self::succeed) or [`fail`](Self::fail). Both finalizers
|
|
/// consume `self` to enforce single-use at compile time.
|
|
pub struct SyncRunRecorder {
|
|
row_id: i64,
|
|
}
|
|
|
|
impl SyncRunRecorder {
|
|
/// Insert a new `sync_runs` row with `status='running'`.
|
|
pub fn start(conn: &Connection, command: &str, run_id: &str) -> Result<Self> {
|
|
let now = now_ms();
|
|
conn.execute(
|
|
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, run_id)
|
|
VALUES (?1, ?2, 'running', ?3, ?4)",
|
|
rusqlite::params![now, now, command, run_id],
|
|
)?;
|
|
let row_id = conn.last_insert_rowid();
|
|
Ok(Self { row_id })
|
|
}
|
|
|
|
/// Mark run as succeeded with full metrics.
|
|
pub fn succeed(
|
|
self,
|
|
conn: &Connection,
|
|
metrics: &[StageTiming],
|
|
total_items: usize,
|
|
total_errors: usize,
|
|
) -> Result<()> {
|
|
let now = now_ms();
|
|
let metrics_json = serde_json::to_string(metrics).unwrap_or_else(|_| "[]".to_string());
|
|
conn.execute(
|
|
"UPDATE sync_runs
|
|
SET finished_at = ?1, status = 'succeeded',
|
|
metrics_json = ?2, total_items_processed = ?3, total_errors = ?4
|
|
WHERE id = ?5",
|
|
rusqlite::params![
|
|
now,
|
|
metrics_json,
|
|
total_items as i64,
|
|
total_errors as i64,
|
|
self.row_id
|
|
],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Mark run as failed with error message and optional partial metrics.
|
|
pub fn fail(
|
|
self,
|
|
conn: &Connection,
|
|
error: &str,
|
|
metrics: Option<&[StageTiming]>,
|
|
) -> Result<()> {
|
|
let now = now_ms();
|
|
let metrics_json =
|
|
metrics.map(|m| serde_json::to_string(m).unwrap_or_else(|_| "[]".to_string()));
|
|
conn.execute(
|
|
"UPDATE sync_runs
|
|
SET finished_at = ?1, status = 'failed', error = ?2,
|
|
metrics_json = ?3
|
|
WHERE id = ?4",
|
|
rusqlite::params![now, error, metrics_json, self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::core::db::{create_connection, run_migrations};
|
|
use std::path::Path;
|
|
|
|
fn setup_test_db() -> Connection {
|
|
let conn = create_connection(Path::new(":memory:")).unwrap();
|
|
run_migrations(&conn).unwrap();
|
|
conn
|
|
}
|
|
|
|
#[test]
|
|
fn test_sync_run_recorder_start() {
|
|
let conn = setup_test_db();
|
|
let recorder = SyncRunRecorder::start(&conn, "sync", "abc12345").unwrap();
|
|
assert!(recorder.row_id > 0);
|
|
|
|
let (status, command, run_id): (String, String, String) = conn
|
|
.query_row(
|
|
"SELECT status, command, run_id FROM sync_runs WHERE id = ?1",
|
|
[recorder.row_id],
|
|
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(status, "running");
|
|
assert_eq!(command, "sync");
|
|
assert_eq!(run_id, "abc12345");
|
|
}
|
|
|
|
#[test]
|
|
fn test_sync_run_recorder_succeed() {
|
|
let conn = setup_test_db();
|
|
let recorder = SyncRunRecorder::start(&conn, "sync", "def67890").unwrap();
|
|
let row_id = recorder.row_id;
|
|
|
|
let metrics = vec![StageTiming {
|
|
name: "ingest".to_string(),
|
|
project: None,
|
|
elapsed_ms: 1200,
|
|
items_processed: 50,
|
|
items_skipped: 0,
|
|
errors: 2,
|
|
rate_limit_hits: 0,
|
|
retries: 0,
|
|
sub_stages: vec![],
|
|
}];
|
|
|
|
recorder.succeed(&conn, &metrics, 50, 2).unwrap();
|
|
|
|
let (status, finished_at, metrics_json, total_items, total_errors): (
|
|
String,
|
|
Option<i64>,
|
|
Option<String>,
|
|
i64,
|
|
i64,
|
|
) = conn
|
|
.query_row(
|
|
"SELECT status, finished_at, metrics_json, total_items_processed, total_errors
|
|
FROM sync_runs WHERE id = ?1",
|
|
[row_id],
|
|
|row| {
|
|
Ok((
|
|
row.get(0)?,
|
|
row.get(1)?,
|
|
row.get(2)?,
|
|
row.get(3)?,
|
|
row.get(4)?,
|
|
))
|
|
},
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(status, "succeeded");
|
|
assert!(finished_at.is_some());
|
|
assert!(metrics_json.is_some());
|
|
assert_eq!(total_items, 50);
|
|
assert_eq!(total_errors, 2);
|
|
|
|
// Verify metrics_json is parseable
|
|
let parsed: Vec<StageTiming> = serde_json::from_str(&metrics_json.unwrap()).unwrap();
|
|
assert_eq!(parsed.len(), 1);
|
|
assert_eq!(parsed[0].name, "ingest");
|
|
}
|
|
|
|
#[test]
|
|
fn test_sync_run_recorder_fail() {
|
|
let conn = setup_test_db();
|
|
let recorder = SyncRunRecorder::start(&conn, "ingest issues", "fail0001").unwrap();
|
|
let row_id = recorder.row_id;
|
|
|
|
recorder.fail(&conn, "GitLab auth failed", None).unwrap();
|
|
|
|
let (status, finished_at, error, metrics_json): (
|
|
String,
|
|
Option<i64>,
|
|
Option<String>,
|
|
Option<String>,
|
|
) = conn
|
|
.query_row(
|
|
"SELECT status, finished_at, error, metrics_json
|
|
FROM sync_runs WHERE id = ?1",
|
|
[row_id],
|
|
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(status, "failed");
|
|
assert!(finished_at.is_some());
|
|
assert_eq!(error.as_deref(), Some("GitLab auth failed"));
|
|
assert!(metrics_json.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_sync_run_recorder_fail_with_partial_metrics() {
|
|
let conn = setup_test_db();
|
|
let recorder = SyncRunRecorder::start(&conn, "sync", "part0001").unwrap();
|
|
let row_id = recorder.row_id;
|
|
|
|
let partial_metrics = vec![StageTiming {
|
|
name: "ingest_issues".to_string(),
|
|
project: Some("group/repo".to_string()),
|
|
elapsed_ms: 800,
|
|
items_processed: 30,
|
|
items_skipped: 0,
|
|
errors: 0,
|
|
rate_limit_hits: 1,
|
|
retries: 0,
|
|
sub_stages: vec![],
|
|
}];
|
|
|
|
recorder
|
|
.fail(&conn, "Embedding failed", Some(&partial_metrics))
|
|
.unwrap();
|
|
|
|
let (status, metrics_json): (String, Option<String>) = conn
|
|
.query_row(
|
|
"SELECT status, metrics_json FROM sync_runs WHERE id = ?1",
|
|
[row_id],
|
|
|row| Ok((row.get(0)?, row.get(1)?)),
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(status, "failed");
|
|
assert!(metrics_json.is_some());
|
|
|
|
let parsed: Vec<StageTiming> = serde_json::from_str(&metrics_json.unwrap()).unwrap();
|
|
assert_eq!(parsed.len(), 1);
|
|
assert_eq!(parsed[0].name, "ingest_issues");
|
|
}
|
|
}
|