feat(sync): concurrent drains, atomic watermarks, graceful Ctrl+C shutdown
Three fixes to the sync pipeline: 1. Atomic watermarks: wrap complete_job + update_watermark in a single SQLite transaction so crash between them can't leave partial state. 2. Concurrent drain loops: prefetch HTTP requests via join_all (batch size = dependent_concurrency), then write serially to DB. Reduces ~9K sequential requests from ~19 min to ~2.4 min. 3. Graceful shutdown: install Ctrl+C handler via ShutdownSignal (Arc<AtomicBool>), thread through orchestrator/CLI, release locked jobs on interrupt, record sync_run as "failed". Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -103,6 +103,28 @@ pub fn complete_job(conn: &Connection, job_id: i64) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Same DELETE as `complete_job`, but on an existing transaction so the caller
|
||||
/// can bundle it atomically with a watermark update.
|
||||
pub fn complete_job_tx(tx: &rusqlite::Transaction<'_>, job_id: i64) -> Result<()> {
|
||||
tx.execute(
|
||||
"DELETE FROM pending_dependent_fetches WHERE id = ?1",
|
||||
rusqlite::params![job_id],
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Release all currently locked jobs (set `locked_at = NULL`).
|
||||
/// Used during graceful shutdown so the next sync doesn't wait for stale locks.
|
||||
pub fn release_all_locked_jobs(conn: &Connection) -> Result<usize> {
|
||||
let changes = conn.execute(
|
||||
"UPDATE pending_dependent_fetches SET locked_at = NULL WHERE locked_at IS NOT NULL",
|
||||
[],
|
||||
)?;
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
pub fn fail_job(conn: &Connection, job_id: i64, error: &str) -> Result<()> {
|
||||
let now = now_ms();
|
||||
|
||||
@@ -200,3 +222,109 @@ pub fn count_claimable_jobs(conn: &Connection, project_id: i64) -> Result<HashMa
|
||||
|
||||
Ok(counts)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use super::*;
|
||||
use crate::core::db::{create_connection, run_migrations};
|
||||
|
||||
fn setup_db_with_job() -> (Connection, i64) {
|
||||
let conn = create_connection(Path::new(":memory:")).unwrap();
|
||||
run_migrations(&conn).unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
|
||||
VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let project_id: i64 = conn
|
||||
.query_row("SELECT id FROM projects LIMIT 1", [], |row| row.get(0))
|
||||
.unwrap();
|
||||
|
||||
enqueue_job(&conn, project_id, "issue", 42, 100, "resource_events", None).unwrap();
|
||||
|
||||
let job_id: i64 = conn
|
||||
.query_row(
|
||||
"SELECT id FROM pending_dependent_fetches LIMIT 1",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
(conn, job_id)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn complete_job_tx_commits() {
|
||||
let (conn, job_id) = setup_db_with_job();
|
||||
|
||||
let tx = conn.unchecked_transaction().unwrap();
|
||||
complete_job_tx(&tx, job_id).unwrap();
|
||||
tx.commit().unwrap();
|
||||
|
||||
let count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM pending_dependent_fetches WHERE id = ?1",
|
||||
[job_id],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(count, 0, "job should be deleted after commit");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn complete_job_tx_rollback() {
|
||||
let (conn, job_id) = setup_db_with_job();
|
||||
|
||||
{
|
||||
let tx = conn.unchecked_transaction().unwrap();
|
||||
complete_job_tx(&tx, job_id).unwrap();
|
||||
// drop tx without commit = rollback
|
||||
}
|
||||
|
||||
let count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM pending_dependent_fetches WHERE id = ?1",
|
||||
[job_id],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(count, 1, "job should survive dropped (rolled-back) tx");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn release_all_locked_jobs_clears_locks() {
|
||||
let (conn, _job_id) = setup_db_with_job();
|
||||
|
||||
let project_id: i64 = conn
|
||||
.query_row("SELECT id FROM projects LIMIT 1", [], |row| row.get(0))
|
||||
.unwrap();
|
||||
let jobs = claim_jobs(&conn, "resource_events", project_id, 10).unwrap();
|
||||
assert_eq!(jobs.len(), 1);
|
||||
|
||||
let locked: bool = conn
|
||||
.query_row(
|
||||
"SELECT locked_at IS NOT NULL FROM pending_dependent_fetches WHERE id = ?1",
|
||||
[jobs[0].id],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(locked, "job should be locked after claim");
|
||||
|
||||
let released = release_all_locked_jobs(&conn).unwrap();
|
||||
assert_eq!(released, 1);
|
||||
|
||||
let locked: bool = conn
|
||||
.query_row(
|
||||
"SELECT locked_at IS NOT NULL FROM pending_dependent_fetches WHERE id = ?1",
|
||||
[jobs[0].id],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(!locked, "job should be unlocked after release_all");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ pub mod paths;
|
||||
pub mod payloads;
|
||||
pub mod project;
|
||||
pub mod references;
|
||||
pub mod shutdown;
|
||||
pub mod sync_run;
|
||||
pub mod time;
|
||||
pub mod timeline;
|
||||
|
||||
63
src/core/shutdown.rs
Normal file
63
src/core/shutdown.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
/// A cooperative cancellation token for graceful shutdown.
|
||||
///
|
||||
/// Clone-able and cheaply checkable from any thread or async task.
|
||||
/// When `cancel()` is called (typically from a Ctrl+C signal handler),
|
||||
/// all clones observe the cancellation via `is_cancelled()`.
|
||||
#[derive(Clone)]
|
||||
pub struct ShutdownSignal {
|
||||
cancelled: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl ShutdownSignal {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
cancelled: Arc::new(AtomicBool::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cancel(&self) {
|
||||
self.cancelled.store(true, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn is_cancelled(&self) -> bool {
|
||||
self.cancelled.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ShutdownSignal {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn signal_starts_uncancelled() {
|
||||
let signal = ShutdownSignal::new();
|
||||
assert!(!signal.is_cancelled());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_sets_flag() {
|
||||
let signal = ShutdownSignal::new();
|
||||
signal.cancel();
|
||||
assert!(signal.is_cancelled());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clone_propagates_cancellation() {
|
||||
let signal = ShutdownSignal::new();
|
||||
let clone = signal.clone();
|
||||
signal.cancel();
|
||||
assert!(
|
||||
clone.is_cancelled(),
|
||||
"clone should see cancellation from original"
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user