feat(observability): Add metrics, logging, and sync-run core modules
Introduce the foundational observability layer for the sync pipeline: - MetricsLayer: Custom tracing subscriber layer that captures span timing and structured fields, materializing them into a hierarchical Vec<StageTiming> tree for robot-mode performance data output - logging: Dual-layer subscriber infrastructure with configurable stderr verbosity (-v/-vv/-vvv) and always-on JSON file logging with daily rotation and configurable retention (default 30 days) - SyncRunRecorder: Compile-time enforced lifecycle recorder for sync_runs table (start -> succeed|fail), with correlation IDs and aggregate counts - LoggingConfig: New config section for log_dir, retention_days, and file_logging toggle - get_log_dir(): Path helper for log directory resolution - is_permanent_api_error(): Distinguish retryable vs permanent API failures (only 404 is truly permanent; 403/auth errors may be environmental) Database changes: - Migration 013: Add resource_events_synced_for_updated_at watermark columns to issues and merge_requests tables for incremental resource event sync - Migration 014: Enrich sync_runs with run_id correlation ID, aggregate counts (total_items_processed, total_errors), and run_id index - Wrap file-based migrations in savepoints for rollback safety Dependencies: Add uuid (run_id generation), tracing-appender (file logging) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
232
src/core/sync_run.rs
Normal file
232
src/core/sync_run.rs
Normal file
@@ -0,0 +1,232 @@
|
||||
//! Sync run lifecycle recorder.
|
||||
//!
|
||||
//! Encapsulates the INSERT-on-start, UPDATE-on-finish lifecycle for the
|
||||
//! `sync_runs` table, enabling sync history tracking and observability.
|
||||
|
||||
use rusqlite::Connection;
|
||||
|
||||
use super::error::Result;
|
||||
use super::metrics::StageTiming;
|
||||
use super::time::now_ms;
|
||||
|
||||
/// Records a single sync run's lifecycle in the `sync_runs` table.
|
||||
///
|
||||
/// Created via [`start`](Self::start), then finalized with either
|
||||
/// [`succeed`](Self::succeed) or [`fail`](Self::fail). Both finalizers
|
||||
/// consume `self` to enforce single-use at compile time.
|
||||
pub struct SyncRunRecorder {
|
||||
row_id: i64,
|
||||
}
|
||||
|
||||
impl SyncRunRecorder {
|
||||
/// Insert a new `sync_runs` row with `status='running'`.
|
||||
pub fn start(conn: &Connection, command: &str, run_id: &str) -> Result<Self> {
|
||||
let now = now_ms();
|
||||
conn.execute(
|
||||
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, run_id)
|
||||
VALUES (?1, ?2, 'running', ?3, ?4)",
|
||||
rusqlite::params![now, now, command, run_id],
|
||||
)?;
|
||||
let row_id = conn.last_insert_rowid();
|
||||
Ok(Self { row_id })
|
||||
}
|
||||
|
||||
/// Mark run as succeeded with full metrics.
|
||||
pub fn succeed(
|
||||
self,
|
||||
conn: &Connection,
|
||||
metrics: &[StageTiming],
|
||||
total_items: usize,
|
||||
total_errors: usize,
|
||||
) -> Result<()> {
|
||||
let now = now_ms();
|
||||
let metrics_json = serde_json::to_string(metrics).unwrap_or_else(|_| "[]".to_string());
|
||||
conn.execute(
|
||||
"UPDATE sync_runs
|
||||
SET finished_at = ?1, status = 'succeeded',
|
||||
metrics_json = ?2, total_items_processed = ?3, total_errors = ?4
|
||||
WHERE id = ?5",
|
||||
rusqlite::params![
|
||||
now,
|
||||
metrics_json,
|
||||
total_items as i64,
|
||||
total_errors as i64,
|
||||
self.row_id
|
||||
],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mark run as failed with error message and optional partial metrics.
|
||||
pub fn fail(
|
||||
self,
|
||||
conn: &Connection,
|
||||
error: &str,
|
||||
metrics: Option<&[StageTiming]>,
|
||||
) -> Result<()> {
|
||||
let now = now_ms();
|
||||
let metrics_json =
|
||||
metrics.map(|m| serde_json::to_string(m).unwrap_or_else(|_| "[]".to_string()));
|
||||
conn.execute(
|
||||
"UPDATE sync_runs
|
||||
SET finished_at = ?1, status = 'failed', error = ?2,
|
||||
metrics_json = ?3
|
||||
WHERE id = ?4",
|
||||
rusqlite::params![now, error, metrics_json, self.row_id],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::core::db::{create_connection, run_migrations};
|
||||
use std::path::Path;
|
||||
|
||||
fn setup_test_db() -> Connection {
|
||||
let conn = create_connection(Path::new(":memory:")).unwrap();
|
||||
run_migrations(&conn).unwrap();
|
||||
conn
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sync_run_recorder_start() {
|
||||
let conn = setup_test_db();
|
||||
let recorder = SyncRunRecorder::start(&conn, "sync", "abc12345").unwrap();
|
||||
assert!(recorder.row_id > 0);
|
||||
|
||||
let (status, command, run_id): (String, String, String) = conn
|
||||
.query_row(
|
||||
"SELECT status, command, run_id FROM sync_runs WHERE id = ?1",
|
||||
[recorder.row_id],
|
||||
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(status, "running");
|
||||
assert_eq!(command, "sync");
|
||||
assert_eq!(run_id, "abc12345");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sync_run_recorder_succeed() {
|
||||
let conn = setup_test_db();
|
||||
let recorder = SyncRunRecorder::start(&conn, "sync", "def67890").unwrap();
|
||||
let row_id = recorder.row_id;
|
||||
|
||||
let metrics = vec![StageTiming {
|
||||
name: "ingest".to_string(),
|
||||
project: None,
|
||||
elapsed_ms: 1200,
|
||||
items_processed: 50,
|
||||
items_skipped: 0,
|
||||
errors: 2,
|
||||
rate_limit_hits: 0,
|
||||
retries: 0,
|
||||
sub_stages: vec![],
|
||||
}];
|
||||
|
||||
recorder.succeed(&conn, &metrics, 50, 2).unwrap();
|
||||
|
||||
let (status, finished_at, metrics_json, total_items, total_errors): (
|
||||
String,
|
||||
Option<i64>,
|
||||
Option<String>,
|
||||
i64,
|
||||
i64,
|
||||
) = conn
|
||||
.query_row(
|
||||
"SELECT status, finished_at, metrics_json, total_items_processed, total_errors
|
||||
FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|row| {
|
||||
Ok((
|
||||
row.get(0)?,
|
||||
row.get(1)?,
|
||||
row.get(2)?,
|
||||
row.get(3)?,
|
||||
row.get(4)?,
|
||||
))
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(status, "succeeded");
|
||||
assert!(finished_at.is_some());
|
||||
assert!(metrics_json.is_some());
|
||||
assert_eq!(total_items, 50);
|
||||
assert_eq!(total_errors, 2);
|
||||
|
||||
// Verify metrics_json is parseable
|
||||
let parsed: Vec<StageTiming> = serde_json::from_str(&metrics_json.unwrap()).unwrap();
|
||||
assert_eq!(parsed.len(), 1);
|
||||
assert_eq!(parsed[0].name, "ingest");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sync_run_recorder_fail() {
|
||||
let conn = setup_test_db();
|
||||
let recorder = SyncRunRecorder::start(&conn, "ingest issues", "fail0001").unwrap();
|
||||
let row_id = recorder.row_id;
|
||||
|
||||
recorder.fail(&conn, "GitLab auth failed", None).unwrap();
|
||||
|
||||
let (status, finished_at, error, metrics_json): (
|
||||
String,
|
||||
Option<i64>,
|
||||
Option<String>,
|
||||
Option<String>,
|
||||
) = conn
|
||||
.query_row(
|
||||
"SELECT status, finished_at, error, metrics_json
|
||||
FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(status, "failed");
|
||||
assert!(finished_at.is_some());
|
||||
assert_eq!(error.as_deref(), Some("GitLab auth failed"));
|
||||
assert!(metrics_json.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sync_run_recorder_fail_with_partial_metrics() {
|
||||
let conn = setup_test_db();
|
||||
let recorder = SyncRunRecorder::start(&conn, "sync", "part0001").unwrap();
|
||||
let row_id = recorder.row_id;
|
||||
|
||||
let partial_metrics = vec![StageTiming {
|
||||
name: "ingest_issues".to_string(),
|
||||
project: Some("group/repo".to_string()),
|
||||
elapsed_ms: 800,
|
||||
items_processed: 30,
|
||||
items_skipped: 0,
|
||||
errors: 0,
|
||||
rate_limit_hits: 1,
|
||||
retries: 0,
|
||||
sub_stages: vec![],
|
||||
}];
|
||||
|
||||
recorder
|
||||
.fail(&conn, "Embedding failed", Some(&partial_metrics))
|
||||
.unwrap();
|
||||
|
||||
let (status, metrics_json): (String, Option<String>) = conn
|
||||
.query_row(
|
||||
"SELECT status, metrics_json FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|row| Ok((row.get(0)?, row.get(1)?)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(status, "failed");
|
||||
assert!(metrics_json.is_some());
|
||||
|
||||
let parsed: Vec<StageTiming> = serde_json::from_str(&metrics_json.unwrap()).unwrap();
|
||||
assert_eq!(parsed.len(), 1);
|
||||
assert_eq!(parsed[0].name, "ingest_issues");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user