feat(sync): concurrent drains, atomic watermarks, graceful Ctrl+C shutdown
Three fixes to the sync pipeline: 1. Atomic watermarks: wrap complete_job + update_watermark in a single SQLite transaction so crash between them can't leave partial state. 2. Concurrent drain loops: prefetch HTTP requests via join_all (batch size = dependent_concurrency), then write serially to DB. Reduces ~9K sequential requests from ~19 min to ~2.4 min. 3. Graceful shutdown: install Ctrl+C handler via ShutdownSignal (Arc<AtomicBool>), thread through orchestrator/CLI, release locked jobs on interrupt, record sync_run as "failed". Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
70
src/main.rs
70
src/main.rs
@@ -30,10 +30,12 @@ use lore::cli::{
|
||||
use lore::core::db::{
|
||||
LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations,
|
||||
};
|
||||
use lore::core::dependent_queue::release_all_locked_jobs;
|
||||
use lore::core::error::{LoreError, RobotErrorOutput};
|
||||
use lore::core::logging;
|
||||
use lore::core::metrics::MetricsLayer;
|
||||
use lore::core::paths::{get_config_path, get_db_path, get_log_dir};
|
||||
use lore::core::shutdown::ShutdownSignal;
|
||||
use lore::core::sync_run::SyncRunRecorder;
|
||||
|
||||
#[tokio::main]
|
||||
@@ -658,6 +660,13 @@ async fn handle_ingest(
|
||||
let run_id_short = &run_id[..8];
|
||||
let recorder = SyncRunRecorder::start(&recorder_conn, &command, run_id_short)?;
|
||||
|
||||
let signal = ShutdownSignal::new();
|
||||
let signal_for_handler = signal.clone();
|
||||
tokio::spawn(async move {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
signal_for_handler.cancel();
|
||||
});
|
||||
|
||||
let ingest_result: std::result::Result<(), Box<dyn std::error::Error>> = async {
|
||||
match args.entity.as_deref() {
|
||||
Some(resource_type) => {
|
||||
@@ -670,6 +679,7 @@ async fn handle_ingest(
|
||||
false,
|
||||
display,
|
||||
None,
|
||||
&signal,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -697,6 +707,7 @@ async fn handle_ingest(
|
||||
false,
|
||||
display,
|
||||
None,
|
||||
&signal,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -709,6 +720,7 @@ async fn handle_ingest(
|
||||
false,
|
||||
display,
|
||||
None,
|
||||
&signal,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -725,6 +737,22 @@ async fn handle_ingest(
|
||||
.await;
|
||||
|
||||
match ingest_result {
|
||||
Ok(()) if signal.is_cancelled() => {
|
||||
let stages = metrics.extract_timings();
|
||||
let _ = release_all_locked_jobs(&recorder_conn);
|
||||
let _ = recorder.fail(
|
||||
&recorder_conn,
|
||||
"Interrupted by user (Ctrl+C)",
|
||||
Some(&stages),
|
||||
);
|
||||
if !robot_mode {
|
||||
eprintln!(
|
||||
"{}",
|
||||
style("Interrupted by Ctrl+C. Partial data has been saved.").yellow()
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Ok(()) => {
|
||||
let stages = metrics.extract_timings();
|
||||
let total_items: usize = stages.iter().map(|s| s.items_processed).sum();
|
||||
@@ -734,6 +762,7 @@ async fn handle_ingest(
|
||||
}
|
||||
Err(e) => {
|
||||
let stages = metrics.extract_timings();
|
||||
let _ = release_all_locked_jobs(&recorder_conn);
|
||||
let _ = recorder.fail(&recorder_conn, &e.to_string(), Some(&stages));
|
||||
Err(e)
|
||||
}
|
||||
@@ -1521,7 +1550,8 @@ async fn handle_sync_cmd(
|
||||
|
||||
// For dry_run, skip recording and just show the preview
|
||||
if dry_run {
|
||||
run_sync(&config, options, None).await?;
|
||||
let signal = ShutdownSignal::new();
|
||||
run_sync(&config, options, None, &signal).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -1531,8 +1561,43 @@ async fn handle_sync_cmd(
|
||||
let run_id_short = &run_id[..8];
|
||||
let recorder = SyncRunRecorder::start(&recorder_conn, "sync", run_id_short)?;
|
||||
|
||||
let signal = ShutdownSignal::new();
|
||||
let signal_for_handler = signal.clone();
|
||||
tokio::spawn(async move {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
signal_for_handler.cancel();
|
||||
});
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
match run_sync(&config, options, Some(run_id_short)).await {
|
||||
match run_sync(&config, options, Some(run_id_short), &signal).await {
|
||||
Ok(result) if signal.is_cancelled() => {
|
||||
let elapsed = start.elapsed();
|
||||
let stages = metrics.extract_timings();
|
||||
let released = release_all_locked_jobs(&recorder_conn).unwrap_or(0);
|
||||
let _ = recorder.fail(
|
||||
&recorder_conn,
|
||||
"Interrupted by user (Ctrl+C)",
|
||||
Some(&stages),
|
||||
);
|
||||
|
||||
if robot_mode {
|
||||
print_sync_json(&result, elapsed.as_millis() as u64, Some(metrics));
|
||||
} else {
|
||||
eprintln!();
|
||||
eprintln!(
|
||||
"{}",
|
||||
console::style("Interrupted by Ctrl+C. Partial results:").yellow()
|
||||
);
|
||||
print_sync(&result, elapsed, Some(metrics));
|
||||
if released > 0 {
|
||||
eprintln!(
|
||||
"{}",
|
||||
console::style(format!("Released {released} locked jobs")).dim()
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Ok(result) => {
|
||||
let elapsed = start.elapsed();
|
||||
let stages = metrics.extract_timings();
|
||||
@@ -1552,6 +1617,7 @@ async fn handle_sync_cmd(
|
||||
}
|
||||
Err(e) => {
|
||||
let stages = metrics.extract_timings();
|
||||
let _ = release_all_locked_jobs(&recorder_conn);
|
||||
let _ = recorder.fail(&recorder_conn, &e.to_string(), Some(&stages));
|
||||
Err(e.into())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user