feat(sync): concurrent drains, atomic watermarks, graceful Ctrl+C shutdown

Three fixes to the sync pipeline:

1. Atomic watermarks: wrap complete_job + update_watermark in a single
   SQLite transaction so crash between them can't leave partial state.

2. Concurrent drain loops: prefetch HTTP requests via join_all (batch
   size = dependent_concurrency), then write serially to DB. Reduces
   ~9K sequential requests from ~19 min to ~2.4 min.

3. Graceful shutdown: install Ctrl+C handler via ShutdownSignal
   (Arc<AtomicBool>), thread through orchestrator/CLI, release locked
   jobs on interrupt, record sync_run as "failed".

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-06 11:22:04 -05:00
parent 32783080f1
commit 405e5370dc
9 changed files with 536 additions and 92 deletions

View File

@@ -9,6 +9,7 @@ use tracing::{info, warn};
use crate::Config;
use crate::core::error::Result;
use crate::core::metrics::{MetricsLayer, StageTiming};
use crate::core::shutdown::ShutdownSignal;
use super::embed::run_embed;
use super::generate_docs::run_generate_docs;
@@ -58,6 +59,7 @@ pub async fn run_sync(
config: &Config,
options: SyncOptions,
run_id: Option<&str>,
signal: &ShutdownSignal,
) -> Result<SyncResult> {
let generated_id;
let run_id = match run_id {
@@ -112,6 +114,7 @@ pub async fn run_sync(
false, // dry_run - sync has its own dry_run handling
ingest_display,
Some(spinner.clone()),
signal,
)
.await?;
result.issues_updated = issues_result.issues_upserted;
@@ -120,6 +123,11 @@ pub async fn run_sync(
result.resource_events_failed += issues_result.resource_events_failed;
spinner.finish_and_clear();
if signal.is_cancelled() {
info!("Shutdown requested after issues stage, returning partial sync results");
return Ok(result);
}
current_stage += 1;
let spinner = stage_spinner(
current_stage,
@@ -137,6 +145,7 @@ pub async fn run_sync(
false, // dry_run - sync has its own dry_run handling
ingest_display,
Some(spinner.clone()),
signal,
)
.await?;
result.mrs_updated = mrs_result.mrs_upserted;
@@ -145,6 +154,11 @@ pub async fn run_sync(
result.resource_events_failed += mrs_result.resource_events_failed;
spinner.finish_and_clear();
if signal.is_cancelled() {
info!("Shutdown requested after MRs stage, returning partial sync results");
return Ok(result);
}
if !options.no_docs {
current_stage += 1;
let spinner = stage_spinner(