feat(sync): concurrent drains, atomic watermarks, graceful Ctrl+C shutdown

Three fixes to the sync pipeline:

1. Atomic watermarks: wrap complete_job + update_watermark in a single
   SQLite transaction so crash between them can't leave partial state.

2. Concurrent drain loops: prefetch HTTP requests via join_all (batch
   size = dependent_concurrency), then write serially to DB. Reduces
   ~9K sequential requests from ~19 min to ~2.4 min.

3. Graceful shutdown: install Ctrl+C handler via ShutdownSignal
   (Arc<AtomicBool>), thread through orchestrator/CLI, release locked
   jobs on interrupt, record sync_run as "failed".

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-06 11:22:04 -05:00
parent 32783080f1
commit 405e5370dc
9 changed files with 536 additions and 92 deletions

10
Cargo.lock generated
View File

@@ -1756,6 +1756,15 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook-registry"
version = "1.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "simd-adler32" name = "simd-adler32"
version = "0.3.8" version = "0.3.8"
@@ -1968,6 +1977,7 @@ dependencies = [
"libc", "libc",
"mio", "mio",
"pin-project-lite", "pin-project-lite",
"signal-hook-registry",
"socket2", "socket2",
"tokio-macros", "tokio-macros",
"windows-sys 0.61.2", "windows-sys 0.61.2",

View File

@@ -30,7 +30,7 @@ open = "5"
# HTTP # HTTP
reqwest = { version = "0.12", features = ["json"] } reqwest = { version = "0.12", features = ["json"] }
tokio = { version = "1", features = ["rt-multi-thread", "macros", "time"] } tokio = { version = "1", features = ["rt-multi-thread", "macros", "time", "signal"] }
# Async streaming for pagination # Async streaming for pagination
async-stream = "0.3" async-stream = "0.3"

View File

@@ -14,6 +14,7 @@ use crate::core::error::{LoreError, Result};
use crate::core::lock::{AppLock, LockOptions}; use crate::core::lock::{AppLock, LockOptions};
use crate::core::paths::get_db_path; use crate::core::paths::get_db_path;
use crate::core::project::resolve_project; use crate::core::project::resolve_project;
use crate::core::shutdown::ShutdownSignal;
use crate::gitlab::GitLabClient; use crate::gitlab::GitLabClient;
use crate::ingestion::{ use crate::ingestion::{
IngestMrProjectResult, IngestProjectResult, ProgressEvent, ingest_project_issues_with_progress, IngestMrProjectResult, IngestProjectResult, ProgressEvent, ingest_project_issues_with_progress,
@@ -113,6 +114,7 @@ pub async fn run_ingest(
dry_run: bool, dry_run: bool,
display: IngestDisplay, display: IngestDisplay,
stage_bar: Option<ProgressBar>, stage_bar: Option<ProgressBar>,
signal: &ShutdownSignal,
) -> Result<IngestResult> { ) -> Result<IngestResult> {
let run_id = uuid::Uuid::new_v4().simple().to_string(); let run_id = uuid::Uuid::new_v4().simple().to_string();
let run_id = &run_id[..8]; let run_id = &run_id[..8];
@@ -127,6 +129,7 @@ pub async fn run_ingest(
dry_run, dry_run,
display, display,
stage_bar, stage_bar,
signal,
) )
.instrument(span) .instrument(span)
.await .await
@@ -228,6 +231,7 @@ async fn run_ingest_inner(
dry_run: bool, dry_run: bool,
display: IngestDisplay, display: IngestDisplay,
stage_bar: Option<ProgressBar>, stage_bar: Option<ProgressBar>,
signal: &ShutdownSignal,
) -> Result<IngestResult> { ) -> Result<IngestResult> {
// In dry_run mode, we don't actually ingest - use run_ingest_dry_run instead // In dry_run mode, we don't actually ingest - use run_ingest_dry_run instead
// This flag is passed through for consistency but the actual dry-run logic // This flag is passed through for consistency but the actual dry-run logic
@@ -350,6 +354,7 @@ async fn run_ingest_inner(
let agg_disc_total = Arc::clone(&agg_disc_total); let agg_disc_total = Arc::clone(&agg_disc_total);
let agg_events = Arc::clone(&agg_events); let agg_events = Arc::clone(&agg_events);
let agg_events_total = Arc::clone(&agg_events_total); let agg_events_total = Arc::clone(&agg_events_total);
let signal = signal.clone();
async move { async move {
let proj_conn = create_connection(&db_path)?; let proj_conn = create_connection(&db_path)?;
@@ -506,6 +511,7 @@ async fn run_ingest_inner(
local_project_id, local_project_id,
gitlab_project_id, gitlab_project_id,
Some(progress_callback), Some(progress_callback),
&signal,
) )
.await?; .await?;
@@ -522,6 +528,7 @@ async fn run_ingest_inner(
gitlab_project_id, gitlab_project_id,
full, full,
Some(progress_callback), Some(progress_callback),
&signal,
) )
.await?; .await?;

View File

@@ -9,6 +9,7 @@ use tracing::{info, warn};
use crate::Config; use crate::Config;
use crate::core::error::Result; use crate::core::error::Result;
use crate::core::metrics::{MetricsLayer, StageTiming}; use crate::core::metrics::{MetricsLayer, StageTiming};
use crate::core::shutdown::ShutdownSignal;
use super::embed::run_embed; use super::embed::run_embed;
use super::generate_docs::run_generate_docs; use super::generate_docs::run_generate_docs;
@@ -58,6 +59,7 @@ pub async fn run_sync(
config: &Config, config: &Config,
options: SyncOptions, options: SyncOptions,
run_id: Option<&str>, run_id: Option<&str>,
signal: &ShutdownSignal,
) -> Result<SyncResult> { ) -> Result<SyncResult> {
let generated_id; let generated_id;
let run_id = match run_id { let run_id = match run_id {
@@ -112,6 +114,7 @@ pub async fn run_sync(
false, // dry_run - sync has its own dry_run handling false, // dry_run - sync has its own dry_run handling
ingest_display, ingest_display,
Some(spinner.clone()), Some(spinner.clone()),
signal,
) )
.await?; .await?;
result.issues_updated = issues_result.issues_upserted; result.issues_updated = issues_result.issues_upserted;
@@ -120,6 +123,11 @@ pub async fn run_sync(
result.resource_events_failed += issues_result.resource_events_failed; result.resource_events_failed += issues_result.resource_events_failed;
spinner.finish_and_clear(); spinner.finish_and_clear();
if signal.is_cancelled() {
info!("Shutdown requested after issues stage, returning partial sync results");
return Ok(result);
}
current_stage += 1; current_stage += 1;
let spinner = stage_spinner( let spinner = stage_spinner(
current_stage, current_stage,
@@ -137,6 +145,7 @@ pub async fn run_sync(
false, // dry_run - sync has its own dry_run handling false, // dry_run - sync has its own dry_run handling
ingest_display, ingest_display,
Some(spinner.clone()), Some(spinner.clone()),
signal,
) )
.await?; .await?;
result.mrs_updated = mrs_result.mrs_upserted; result.mrs_updated = mrs_result.mrs_upserted;
@@ -145,6 +154,11 @@ pub async fn run_sync(
result.resource_events_failed += mrs_result.resource_events_failed; result.resource_events_failed += mrs_result.resource_events_failed;
spinner.finish_and_clear(); spinner.finish_and_clear();
if signal.is_cancelled() {
info!("Shutdown requested after MRs stage, returning partial sync results");
return Ok(result);
}
if !options.no_docs { if !options.no_docs {
current_stage += 1; current_stage += 1;
let spinner = stage_spinner( let spinner = stage_spinner(

View File

@@ -103,6 +103,28 @@ pub fn complete_job(conn: &Connection, job_id: i64) -> Result<()> {
Ok(()) Ok(())
} }
/// Same DELETE as `complete_job`, but on an existing transaction so the caller
/// can bundle it atomically with a watermark update.
pub fn complete_job_tx(tx: &rusqlite::Transaction<'_>, job_id: i64) -> Result<()> {
tx.execute(
"DELETE FROM pending_dependent_fetches WHERE id = ?1",
rusqlite::params![job_id],
)?;
Ok(())
}
/// Release all currently locked jobs (set `locked_at = NULL`).
/// Used during graceful shutdown so the next sync doesn't wait for stale locks.
pub fn release_all_locked_jobs(conn: &Connection) -> Result<usize> {
let changes = conn.execute(
"UPDATE pending_dependent_fetches SET locked_at = NULL WHERE locked_at IS NOT NULL",
[],
)?;
Ok(changes)
}
pub fn fail_job(conn: &Connection, job_id: i64, error: &str) -> Result<()> { pub fn fail_job(conn: &Connection, job_id: i64, error: &str) -> Result<()> {
let now = now_ms(); let now = now_ms();
@@ -200,3 +222,109 @@ pub fn count_claimable_jobs(conn: &Connection, project_id: i64) -> Result<HashMa
Ok(counts) Ok(counts)
} }
#[cfg(test)]
mod tests {
use std::path::Path;
use super::*;
use crate::core::db::{create_connection, run_migrations};
fn setup_db_with_job() -> (Connection, i64) {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
[],
)
.unwrap();
let project_id: i64 = conn
.query_row("SELECT id FROM projects LIMIT 1", [], |row| row.get(0))
.unwrap();
enqueue_job(&conn, project_id, "issue", 42, 100, "resource_events", None).unwrap();
let job_id: i64 = conn
.query_row(
"SELECT id FROM pending_dependent_fetches LIMIT 1",
[],
|row| row.get(0),
)
.unwrap();
(conn, job_id)
}
#[test]
fn complete_job_tx_commits() {
let (conn, job_id) = setup_db_with_job();
let tx = conn.unchecked_transaction().unwrap();
complete_job_tx(&tx, job_id).unwrap();
tx.commit().unwrap();
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM pending_dependent_fetches WHERE id = ?1",
[job_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 0, "job should be deleted after commit");
}
#[test]
fn complete_job_tx_rollback() {
let (conn, job_id) = setup_db_with_job();
{
let tx = conn.unchecked_transaction().unwrap();
complete_job_tx(&tx, job_id).unwrap();
// drop tx without commit = rollback
}
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM pending_dependent_fetches WHERE id = ?1",
[job_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 1, "job should survive dropped (rolled-back) tx");
}
#[test]
fn release_all_locked_jobs_clears_locks() {
let (conn, _job_id) = setup_db_with_job();
let project_id: i64 = conn
.query_row("SELECT id FROM projects LIMIT 1", [], |row| row.get(0))
.unwrap();
let jobs = claim_jobs(&conn, "resource_events", project_id, 10).unwrap();
assert_eq!(jobs.len(), 1);
let locked: bool = conn
.query_row(
"SELECT locked_at IS NOT NULL FROM pending_dependent_fetches WHERE id = ?1",
[jobs[0].id],
|row| row.get(0),
)
.unwrap();
assert!(locked, "job should be locked after claim");
let released = release_all_locked_jobs(&conn).unwrap();
assert_eq!(released, 1);
let locked: bool = conn
.query_row(
"SELECT locked_at IS NOT NULL FROM pending_dependent_fetches WHERE id = ?1",
[jobs[0].id],
|row| row.get(0),
)
.unwrap();
assert!(!locked, "job should be unlocked after release_all");
}
}

View File

@@ -12,6 +12,7 @@ pub mod paths;
pub mod payloads; pub mod payloads;
pub mod project; pub mod project;
pub mod references; pub mod references;
pub mod shutdown;
pub mod sync_run; pub mod sync_run;
pub mod time; pub mod time;
pub mod timeline; pub mod timeline;

63
src/core/shutdown.rs Normal file
View File

@@ -0,0 +1,63 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
/// A cooperative cancellation token for graceful shutdown.
///
/// Clone-able and cheaply checkable from any thread or async task.
/// When `cancel()` is called (typically from a Ctrl+C signal handler),
/// all clones observe the cancellation via `is_cancelled()`.
#[derive(Clone)]
pub struct ShutdownSignal {
cancelled: Arc<AtomicBool>,
}
impl ShutdownSignal {
pub fn new() -> Self {
Self {
cancelled: Arc::new(AtomicBool::new(false)),
}
}
pub fn cancel(&self) {
self.cancelled.store(true, Ordering::Relaxed);
}
pub fn is_cancelled(&self) -> bool {
self.cancelled.load(Ordering::Relaxed)
}
}
impl Default for ShutdownSignal {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn signal_starts_uncancelled() {
let signal = ShutdownSignal::new();
assert!(!signal.is_cancelled());
}
#[test]
fn cancel_sets_flag() {
let signal = ShutdownSignal::new();
signal.cancel();
assert!(signal.is_cancelled());
}
#[test]
fn clone_propagates_cancellation() {
let signal = ShutdownSignal::new();
let clone = signal.clone();
signal.cancel();
assert!(
clone.is_cancelled(),
"clone should see cancellation from original"
);
}
}

View File

@@ -4,12 +4,13 @@ use tracing::{debug, info, instrument, warn};
use crate::Config; use crate::Config;
use crate::core::dependent_queue::{ use crate::core::dependent_queue::{
claim_jobs, complete_job, count_claimable_jobs, enqueue_job, fail_job, reclaim_stale_locks, claim_jobs, complete_job_tx, count_claimable_jobs, enqueue_job, fail_job, reclaim_stale_locks,
}; };
use crate::core::error::Result; use crate::core::error::Result;
use crate::core::references::{ use crate::core::references::{
EntityReference, insert_entity_reference, resolve_issue_local_id, resolve_project_path, EntityReference, insert_entity_reference, resolve_issue_local_id, resolve_project_path,
}; };
use crate::core::shutdown::ShutdownSignal;
use crate::gitlab::GitLabClient; use crate::gitlab::GitLabClient;
use super::discussions::ingest_issue_discussions; use super::discussions::ingest_issue_discussions;
@@ -84,12 +85,21 @@ pub async fn ingest_project_issues(
project_id: i64, project_id: i64,
gitlab_project_id: i64, gitlab_project_id: i64,
) -> Result<IngestProjectResult> { ) -> Result<IngestProjectResult> {
ingest_project_issues_with_progress(conn, client, config, project_id, gitlab_project_id, None) let signal = ShutdownSignal::new();
ingest_project_issues_with_progress(
conn,
client,
config,
project_id,
gitlab_project_id,
None,
&signal,
)
.await .await
} }
#[instrument( #[instrument(
skip(conn, client, config, progress), skip(conn, client, config, progress, signal),
fields(project_id, gitlab_project_id, items_processed, items_skipped, errors) fields(project_id, gitlab_project_id, items_processed, items_skipped, errors)
)] )]
pub async fn ingest_project_issues_with_progress( pub async fn ingest_project_issues_with_progress(
@@ -99,6 +109,7 @@ pub async fn ingest_project_issues_with_progress(
project_id: i64, project_id: i64,
gitlab_project_id: i64, gitlab_project_id: i64,
progress: Option<ProgressCallback>, progress: Option<ProgressCallback>,
signal: &ShutdownSignal,
) -> Result<IngestProjectResult> { ) -> Result<IngestProjectResult> {
let mut result = IngestProjectResult::default(); let mut result = IngestProjectResult::default();
let emit = |event: ProgressEvent| { let emit = |event: ProgressEvent| {
@@ -130,6 +141,11 @@ pub async fn ingest_project_issues_with_progress(
let total_issues = total_issues as usize; let total_issues = total_issues as usize;
result.issues_skipped_discussion_sync = total_issues.saturating_sub(issues_needing_sync.len()); result.issues_skipped_discussion_sync = total_issues.saturating_sub(issues_needing_sync.len());
if signal.is_cancelled() {
info!("Shutdown requested, returning partial issue results");
return Ok(result);
}
if issues_needing_sync.is_empty() { if issues_needing_sync.is_empty() {
info!("No issues need discussion sync"); info!("No issues need discussion sync");
} else { } else {
@@ -150,6 +166,7 @@ pub async fn ingest_project_issues_with_progress(
project_id, project_id,
&issues_needing_sync, &issues_needing_sync,
&progress, &progress,
signal,
) )
.await?; .await?;
@@ -163,6 +180,11 @@ pub async fn ingest_project_issues_with_progress(
} }
} }
if signal.is_cancelled() {
info!("Shutdown requested, returning partial issue results");
return Ok(result);
}
if config.sync.fetch_resource_events { if config.sync.fetch_resource_events {
let enqueued = enqueue_resource_events_for_entity_type(conn, project_id, "issue")?; let enqueued = enqueue_resource_events_for_entity_type(conn, project_id, "issue")?;
if enqueued > 0 { if enqueued > 0 {
@@ -176,6 +198,7 @@ pub async fn ingest_project_issues_with_progress(
project_id, project_id,
gitlab_project_id, gitlab_project_id,
&progress, &progress,
signal,
) )
.await?; .await?;
result.resource_events_fetched = drain_result.fetched; result.resource_events_fetched = drain_result.fetched;
@@ -211,6 +234,7 @@ pub async fn ingest_project_issues_with_progress(
Ok(result) Ok(result)
} }
#[allow(clippy::too_many_arguments)]
async fn sync_discussions_sequential( async fn sync_discussions_sequential(
conn: &Connection, conn: &Connection,
client: &GitLabClient, client: &GitLabClient,
@@ -219,6 +243,7 @@ async fn sync_discussions_sequential(
local_project_id: i64, local_project_id: i64,
issues: &[IssueForDiscussionSync], issues: &[IssueForDiscussionSync],
progress: &Option<ProgressCallback>, progress: &Option<ProgressCallback>,
signal: &ShutdownSignal,
) -> Result<Vec<super::discussions::IngestDiscussionsResult>> { ) -> Result<Vec<super::discussions::IngestDiscussionsResult>> {
let batch_size = config.sync.dependent_concurrency as usize; let batch_size = config.sync.dependent_concurrency as usize;
let total = issues.len(); let total = issues.len();
@@ -226,6 +251,10 @@ async fn sync_discussions_sequential(
let mut results = Vec::with_capacity(issues.len()); let mut results = Vec::with_capacity(issues.len());
for chunk in issues.chunks(batch_size) { for chunk in issues.chunks(batch_size) {
if signal.is_cancelled() {
info!("Shutdown requested during discussion sync, returning partial results");
break;
}
for issue in chunk { for issue in chunk {
let disc_result = ingest_issue_discussions( let disc_result = ingest_issue_discussions(
conn, conn,
@@ -258,6 +287,7 @@ pub async fn ingest_project_merge_requests(
gitlab_project_id: i64, gitlab_project_id: i64,
full_sync: bool, full_sync: bool,
) -> Result<IngestMrProjectResult> { ) -> Result<IngestMrProjectResult> {
let signal = ShutdownSignal::new();
ingest_project_merge_requests_with_progress( ingest_project_merge_requests_with_progress(
conn, conn,
client, client,
@@ -266,14 +296,16 @@ pub async fn ingest_project_merge_requests(
gitlab_project_id, gitlab_project_id,
full_sync, full_sync,
None, None,
&signal,
) )
.await .await
} }
#[instrument( #[instrument(
skip(conn, client, config, progress), skip(conn, client, config, progress, signal),
fields(project_id, gitlab_project_id, items_processed, items_skipped, errors) fields(project_id, gitlab_project_id, items_processed, items_skipped, errors)
)] )]
#[allow(clippy::too_many_arguments)]
pub async fn ingest_project_merge_requests_with_progress( pub async fn ingest_project_merge_requests_with_progress(
conn: &Connection, conn: &Connection,
client: &GitLabClient, client: &GitLabClient,
@@ -282,6 +314,7 @@ pub async fn ingest_project_merge_requests_with_progress(
gitlab_project_id: i64, gitlab_project_id: i64,
full_sync: bool, full_sync: bool,
progress: Option<ProgressCallback>, progress: Option<ProgressCallback>,
signal: &ShutdownSignal,
) -> Result<IngestMrProjectResult> { ) -> Result<IngestMrProjectResult> {
let mut result = IngestMrProjectResult::default(); let mut result = IngestMrProjectResult::default();
let emit = |event: ProgressEvent| { let emit = |event: ProgressEvent| {
@@ -323,6 +356,11 @@ pub async fn ingest_project_merge_requests_with_progress(
let total_mrs = total_mrs as usize; let total_mrs = total_mrs as usize;
result.mrs_skipped_discussion_sync = total_mrs.saturating_sub(mrs_needing_sync.len()); result.mrs_skipped_discussion_sync = total_mrs.saturating_sub(mrs_needing_sync.len());
if signal.is_cancelled() {
info!("Shutdown requested, returning partial MR results");
return Ok(result);
}
if mrs_needing_sync.is_empty() { if mrs_needing_sync.is_empty() {
info!("No MRs need discussion sync"); info!("No MRs need discussion sync");
} else { } else {
@@ -343,6 +381,7 @@ pub async fn ingest_project_merge_requests_with_progress(
project_id, project_id,
&mrs_needing_sync, &mrs_needing_sync,
&progress, &progress,
signal,
) )
.await?; .await?;
@@ -360,6 +399,11 @@ pub async fn ingest_project_merge_requests_with_progress(
} }
} }
if signal.is_cancelled() {
info!("Shutdown requested, returning partial MR results");
return Ok(result);
}
if config.sync.fetch_resource_events { if config.sync.fetch_resource_events {
let enqueued = enqueue_resource_events_for_entity_type(conn, project_id, "merge_request")?; let enqueued = enqueue_resource_events_for_entity_type(conn, project_id, "merge_request")?;
if enqueued > 0 { if enqueued > 0 {
@@ -373,6 +417,7 @@ pub async fn ingest_project_merge_requests_with_progress(
project_id, project_id,
gitlab_project_id, gitlab_project_id,
&progress, &progress,
signal,
) )
.await?; .await?;
result.resource_events_fetched = drain_result.fetched; result.resource_events_fetched = drain_result.fetched;
@@ -388,6 +433,11 @@ pub async fn ingest_project_merge_requests_with_progress(
} }
} }
if signal.is_cancelled() {
info!("Shutdown requested, returning partial MR results");
return Ok(result);
}
let note_refs = crate::core::note_parser::extract_refs_from_system_notes(conn, project_id)?; let note_refs = crate::core::note_parser::extract_refs_from_system_notes(conn, project_id)?;
if note_refs.inserted > 0 || note_refs.skipped_unresolvable > 0 { if note_refs.inserted > 0 || note_refs.skipped_unresolvable > 0 {
debug!( debug!(
@@ -411,6 +461,7 @@ pub async fn ingest_project_merge_requests_with_progress(
project_id, project_id,
gitlab_project_id, gitlab_project_id,
&progress, &progress,
signal,
) )
.await?; .await?;
result.closes_issues_fetched = closes_result.fetched; result.closes_issues_fetched = closes_result.fetched;
@@ -440,6 +491,7 @@ pub async fn ingest_project_merge_requests_with_progress(
Ok(result) Ok(result)
} }
#[allow(clippy::too_many_arguments)]
async fn sync_mr_discussions_sequential( async fn sync_mr_discussions_sequential(
conn: &Connection, conn: &Connection,
client: &GitLabClient, client: &GitLabClient,
@@ -448,6 +500,7 @@ async fn sync_mr_discussions_sequential(
local_project_id: i64, local_project_id: i64,
mrs: &[MrForDiscussionSync], mrs: &[MrForDiscussionSync],
progress: &Option<ProgressCallback>, progress: &Option<ProgressCallback>,
signal: &ShutdownSignal,
) -> Result<Vec<super::mr_discussions::IngestMrDiscussionsResult>> { ) -> Result<Vec<super::mr_discussions::IngestMrDiscussionsResult>> {
let batch_size = config.sync.dependent_concurrency as usize; let batch_size = config.sync.dependent_concurrency as usize;
let total = mrs.len(); let total = mrs.len();
@@ -456,6 +509,10 @@ async fn sync_mr_discussions_sequential(
let mut processed = 0; let mut processed = 0;
for chunk in mrs.chunks(batch_size) { for chunk in mrs.chunks(batch_size) {
if signal.is_cancelled() {
info!("Shutdown requested during MR discussion sync, returning partial results");
break;
}
let prefetch_futures = chunk.iter().map(|mr| { let prefetch_futures = chunk.iter().map(|mr| {
prefetch_mr_discussions(client, gitlab_project_id, local_project_id, mr.clone()) prefetch_mr_discussions(client, gitlab_project_id, local_project_id, mr.clone())
}); });
@@ -559,8 +616,48 @@ fn enqueue_resource_events_for_entity_type(
Ok(enqueued) Ok(enqueued)
} }
/// Result of a concurrent HTTP prefetch for resource events.
#[allow(clippy::type_complexity)]
struct PrefetchedResourceEvents {
job_id: i64,
project_id: i64,
entity_type: String,
entity_iid: i64,
entity_local_id: i64,
result: std::result::Result<
(
Vec<crate::gitlab::types::GitLabStateEvent>,
Vec<crate::gitlab::types::GitLabLabelEvent>,
Vec<crate::gitlab::types::GitLabMilestoneEvent>,
),
crate::core::error::LoreError,
>,
}
async fn prefetch_resource_events(
client: &GitLabClient,
gitlab_project_id: i64,
job_id: i64,
project_id: i64,
entity_type: String,
entity_iid: i64,
entity_local_id: i64,
) -> PrefetchedResourceEvents {
let result = client
.fetch_all_resource_events(gitlab_project_id, &entity_type, entity_iid)
.await;
PrefetchedResourceEvents {
job_id,
project_id,
entity_type,
entity_iid,
entity_local_id,
result,
}
}
#[instrument( #[instrument(
skip(conn, client, config, progress), skip(conn, client, config, progress, signal),
fields(project_id, gitlab_project_id, items_processed, errors) fields(project_id, gitlab_project_id, items_processed, errors)
)] )]
async fn drain_resource_events( async fn drain_resource_events(
@@ -570,6 +667,7 @@ async fn drain_resource_events(
project_id: i64, project_id: i64,
gitlab_project_id: i64, gitlab_project_id: i64,
progress: &Option<ProgressCallback>, progress: &Option<ProgressCallback>,
signal: &ShutdownSignal,
) -> Result<DrainResult> { ) -> Result<DrainResult> {
let mut result = DrainResult::default(); let mut result = DrainResult::default();
let batch_size = config.sync.dependent_concurrency as usize; let batch_size = config.sync.dependent_concurrency as usize;
@@ -603,33 +701,49 @@ async fn drain_resource_events(
let mut seen_job_ids = std::collections::HashSet::new(); let mut seen_job_ids = std::collections::HashSet::new();
loop { loop {
if signal.is_cancelled() {
info!("Shutdown requested during resource events drain, returning partial results");
break;
}
let jobs = claim_jobs(conn, "resource_events", project_id, batch_size)?; let jobs = claim_jobs(conn, "resource_events", project_id, batch_size)?;
if jobs.is_empty() { if jobs.is_empty() {
break; break;
} }
let mut any_new_in_batch = false; // Phase 1: Concurrent HTTP fetches
let futures: Vec<_> = jobs
.iter()
.filter(|j| seen_job_ids.insert(j.id))
.map(|j| {
prefetch_resource_events(
client,
gitlab_project_id,
j.id,
j.project_id,
j.entity_type.clone(),
j.entity_iid,
j.entity_local_id,
)
})
.collect();
for job in &jobs { if futures.is_empty() {
if !seen_job_ids.insert(job.id) { warn!("All claimed jobs were already processed, breaking drain loop");
warn!( break;
job_id = job.id,
"Skipping already-processed job in same drain run"
);
continue;
} }
any_new_in_batch = true;
match client let prefetched = join_all(futures).await;
.fetch_all_resource_events(gitlab_project_id, &job.entity_type, job.entity_iid)
.await // Phase 2: Serial DB writes
{ for p in prefetched {
match p.result {
Ok((state_events, label_events, milestone_events)) => { Ok((state_events, label_events, milestone_events)) => {
let store_result = store_resource_events( let store_result = store_resource_events(
conn, conn,
job.project_id, p.project_id,
&job.entity_type, &p.entity_type,
job.entity_local_id, p.entity_local_id,
&state_events, &state_events,
&label_events, &label_events,
&milestone_events, &milestone_events,
@@ -637,22 +751,24 @@ async fn drain_resource_events(
match store_result { match store_result {
Ok(()) => { Ok(()) => {
complete_job(conn, job.id)?; let tx = conn.unchecked_transaction()?;
update_resource_event_watermark( complete_job_tx(&tx, p.job_id)?;
conn, update_resource_event_watermark_tx(
&job.entity_type, &tx,
job.entity_local_id, &p.entity_type,
p.entity_local_id,
)?; )?;
tx.commit()?;
result.fetched += 1; result.fetched += 1;
} }
Err(e) => { Err(e) => {
warn!( warn!(
entity_type = %job.entity_type, entity_type = %p.entity_type,
entity_iid = job.entity_iid, entity_iid = p.entity_iid,
error = %e, error = %e,
"Failed to store resource events" "Failed to store resource events"
); );
fail_job(conn, job.id, &e.to_string())?; fail_job(conn, p.job_id, &e.to_string())?;
result.failed += 1; result.failed += 1;
} }
} }
@@ -660,26 +776,24 @@ async fn drain_resource_events(
Err(e) => { Err(e) => {
if e.is_permanent_api_error() { if e.is_permanent_api_error() {
debug!( debug!(
entity_type = %job.entity_type, entity_type = %p.entity_type,
entity_iid = job.entity_iid, entity_iid = p.entity_iid,
error = %e, error = %e,
"Permanent API error for resource events, marking complete" "Permanent API error for resource events, marking complete"
); );
complete_job(conn, job.id)?; let tx = conn.unchecked_transaction()?;
update_resource_event_watermark( complete_job_tx(&tx, p.job_id)?;
conn, update_resource_event_watermark_tx(&tx, &p.entity_type, p.entity_local_id)?;
&job.entity_type, tx.commit()?;
job.entity_local_id,
)?;
result.skipped_not_found += 1; result.skipped_not_found += 1;
} else { } else {
warn!( warn!(
entity_type = %job.entity_type, entity_type = %p.entity_type,
entity_iid = job.entity_iid, entity_iid = p.entity_iid,
error = %e, error = %e,
"Failed to fetch resource events from GitLab" "Failed to fetch resource events from GitLab"
); );
fail_job(conn, job.id, &e.to_string())?; fail_job(conn, p.job_id, &e.to_string())?;
result.failed += 1; result.failed += 1;
} }
} }
@@ -691,11 +805,6 @@ async fn drain_resource_events(
total: total_pending, total: total_pending,
}); });
} }
if !any_new_in_batch {
warn!("All claimed jobs were already processed, breaking drain loop");
break;
}
} }
emit(ProgressEvent::ResourceEventsFetchComplete { emit(ProgressEvent::ResourceEventsFetchComplete {
@@ -762,20 +871,31 @@ fn store_resource_events(
Ok(()) Ok(())
} }
fn update_resource_event_watermark( fn update_closes_issues_watermark_tx(
conn: &Connection, tx: &rusqlite::Transaction<'_>,
mr_local_id: i64,
) -> Result<()> {
tx.execute(
"UPDATE merge_requests SET closes_issues_synced_for_updated_at = updated_at WHERE id = ?",
[mr_local_id],
)?;
Ok(())
}
fn update_resource_event_watermark_tx(
tx: &rusqlite::Transaction<'_>,
entity_type: &str, entity_type: &str,
entity_local_id: i64, entity_local_id: i64,
) -> Result<()> { ) -> Result<()> {
match entity_type { match entity_type {
"issue" => { "issue" => {
conn.execute( tx.execute(
"UPDATE issues SET resource_events_synced_for_updated_at = updated_at WHERE id = ?", "UPDATE issues SET resource_events_synced_for_updated_at = updated_at WHERE id = ?",
[entity_local_id], [entity_local_id],
)?; )?;
} }
"merge_request" => { "merge_request" => {
conn.execute( tx.execute(
"UPDATE merge_requests SET resource_events_synced_for_updated_at = updated_at WHERE id = ?", "UPDATE merge_requests SET resource_events_synced_for_updated_at = updated_at WHERE id = ?",
[entity_local_id], [entity_local_id],
)?; )?;
@@ -785,14 +905,6 @@ fn update_resource_event_watermark(
Ok(()) Ok(())
} }
fn update_closes_issues_watermark(conn: &Connection, mr_local_id: i64) -> Result<()> {
conn.execute(
"UPDATE merge_requests SET closes_issues_synced_for_updated_at = updated_at WHERE id = ?",
[mr_local_id],
)?;
Ok(())
}
fn enqueue_mr_closes_issues_jobs(conn: &Connection, project_id: i64) -> Result<usize> { fn enqueue_mr_closes_issues_jobs(conn: &Connection, project_id: i64) -> Result<usize> {
// Remove stale jobs for MRs that haven't changed since their last closes_issues sync // Remove stale jobs for MRs that haven't changed since their last closes_issues sync
conn.execute( conn.execute(
@@ -833,8 +945,37 @@ fn enqueue_mr_closes_issues_jobs(conn: &Connection, project_id: i64) -> Result<u
Ok(enqueued) Ok(enqueued)
} }
/// Result of a concurrent HTTP prefetch for closes-issues references.
struct PrefetchedClosesIssues {
job_id: i64,
entity_iid: i64,
entity_local_id: i64,
result: std::result::Result<
Vec<crate::gitlab::types::GitLabIssueRef>,
crate::core::error::LoreError,
>,
}
async fn prefetch_closes_issues(
client: &GitLabClient,
gitlab_project_id: i64,
job_id: i64,
entity_iid: i64,
entity_local_id: i64,
) -> PrefetchedClosesIssues {
let result = client
.fetch_mr_closes_issues(gitlab_project_id, entity_iid)
.await;
PrefetchedClosesIssues {
job_id,
entity_iid,
entity_local_id,
result,
}
}
#[instrument( #[instrument(
skip(conn, client, config, progress), skip(conn, client, config, progress, signal),
fields(project_id, gitlab_project_id, items_processed, errors) fields(project_id, gitlab_project_id, items_processed, errors)
)] )]
async fn drain_mr_closes_issues( async fn drain_mr_closes_issues(
@@ -844,6 +985,7 @@ async fn drain_mr_closes_issues(
project_id: i64, project_id: i64,
gitlab_project_id: i64, gitlab_project_id: i64,
progress: &Option<ProgressCallback>, progress: &Option<ProgressCallback>,
signal: &ShutdownSignal,
) -> Result<DrainResult> { ) -> Result<DrainResult> {
let mut result = DrainResult::default(); let mut result = DrainResult::default();
let batch_size = config.sync.dependent_concurrency as usize; let batch_size = config.sync.dependent_concurrency as usize;
@@ -877,48 +1019,64 @@ async fn drain_mr_closes_issues(
let mut seen_job_ids = std::collections::HashSet::new(); let mut seen_job_ids = std::collections::HashSet::new();
loop { loop {
if signal.is_cancelled() {
info!("Shutdown requested during closes_issues drain, returning partial results");
break;
}
let jobs = claim_jobs(conn, "mr_closes_issues", project_id, batch_size)?; let jobs = claim_jobs(conn, "mr_closes_issues", project_id, batch_size)?;
if jobs.is_empty() { if jobs.is_empty() {
break; break;
} }
let mut any_new_in_batch = false; // Phase 1: Concurrent HTTP fetches
let futures: Vec<_> = jobs
.iter()
.filter(|j| seen_job_ids.insert(j.id))
.map(|j| {
prefetch_closes_issues(
client,
gitlab_project_id,
j.id,
j.entity_iid,
j.entity_local_id,
)
})
.collect();
for job in &jobs { if futures.is_empty() {
if !seen_job_ids.insert(job.id) { warn!("All claimed mr_closes_issues jobs were already processed, breaking drain loop");
warn!( break;
job_id = job.id,
"Skipping already-processed mr_closes_issues job"
);
continue;
} }
any_new_in_batch = true;
match client let prefetched = join_all(futures).await;
.fetch_mr_closes_issues(gitlab_project_id, job.entity_iid)
.await // Phase 2: Serial DB writes
{ for p in prefetched {
match p.result {
Ok(closes_issues) => { Ok(closes_issues) => {
let store_result = store_closes_issues_refs( let store_result = store_closes_issues_refs(
conn, conn,
project_id, project_id,
job.entity_local_id, p.entity_local_id,
&closes_issues, &closes_issues,
); );
match store_result { match store_result {
Ok(()) => { Ok(()) => {
complete_job(conn, job.id)?; let tx = conn.unchecked_transaction()?;
update_closes_issues_watermark(conn, job.entity_local_id)?; complete_job_tx(&tx, p.job_id)?;
update_closes_issues_watermark_tx(&tx, p.entity_local_id)?;
tx.commit()?;
result.fetched += 1; result.fetched += 1;
} }
Err(e) => { Err(e) => {
warn!( warn!(
entity_iid = job.entity_iid, entity_iid = p.entity_iid,
error = %e, error = %e,
"Failed to store closes_issues references" "Failed to store closes_issues references"
); );
fail_job(conn, job.id, &e.to_string())?; fail_job(conn, p.job_id, &e.to_string())?;
result.failed += 1; result.failed += 1;
} }
} }
@@ -926,20 +1084,22 @@ async fn drain_mr_closes_issues(
Err(e) => { Err(e) => {
if e.is_permanent_api_error() { if e.is_permanent_api_error() {
debug!( debug!(
entity_iid = job.entity_iid, entity_iid = p.entity_iid,
error = %e, error = %e,
"Permanent API error for closes_issues, marking complete" "Permanent API error for closes_issues, marking complete"
); );
complete_job(conn, job.id)?; let tx = conn.unchecked_transaction()?;
update_closes_issues_watermark(conn, job.entity_local_id)?; complete_job_tx(&tx, p.job_id)?;
update_closes_issues_watermark_tx(&tx, p.entity_local_id)?;
tx.commit()?;
result.skipped_not_found += 1; result.skipped_not_found += 1;
} else { } else {
warn!( warn!(
entity_iid = job.entity_iid, entity_iid = p.entity_iid,
error = %e, error = %e,
"Failed to fetch closes_issues from GitLab" "Failed to fetch closes_issues from GitLab"
); );
fail_job(conn, job.id, &e.to_string())?; fail_job(conn, p.job_id, &e.to_string())?;
result.failed += 1; result.failed += 1;
} }
} }
@@ -951,11 +1111,6 @@ async fn drain_mr_closes_issues(
total: total_pending, total: total_pending,
}); });
} }
if !any_new_in_batch {
warn!("All claimed mr_closes_issues jobs were already processed, breaking drain loop");
break;
}
} }
emit(ProgressEvent::ClosesIssuesFetchComplete { emit(ProgressEvent::ClosesIssuesFetchComplete {

View File

@@ -30,10 +30,12 @@ use lore::cli::{
use lore::core::db::{ use lore::core::db::{
LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations, LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations,
}; };
use lore::core::dependent_queue::release_all_locked_jobs;
use lore::core::error::{LoreError, RobotErrorOutput}; use lore::core::error::{LoreError, RobotErrorOutput};
use lore::core::logging; use lore::core::logging;
use lore::core::metrics::MetricsLayer; use lore::core::metrics::MetricsLayer;
use lore::core::paths::{get_config_path, get_db_path, get_log_dir}; use lore::core::paths::{get_config_path, get_db_path, get_log_dir};
use lore::core::shutdown::ShutdownSignal;
use lore::core::sync_run::SyncRunRecorder; use lore::core::sync_run::SyncRunRecorder;
#[tokio::main] #[tokio::main]
@@ -658,6 +660,13 @@ async fn handle_ingest(
let run_id_short = &run_id[..8]; let run_id_short = &run_id[..8];
let recorder = SyncRunRecorder::start(&recorder_conn, &command, run_id_short)?; let recorder = SyncRunRecorder::start(&recorder_conn, &command, run_id_short)?;
let signal = ShutdownSignal::new();
let signal_for_handler = signal.clone();
tokio::spawn(async move {
let _ = tokio::signal::ctrl_c().await;
signal_for_handler.cancel();
});
let ingest_result: std::result::Result<(), Box<dyn std::error::Error>> = async { let ingest_result: std::result::Result<(), Box<dyn std::error::Error>> = async {
match args.entity.as_deref() { match args.entity.as_deref() {
Some(resource_type) => { Some(resource_type) => {
@@ -670,6 +679,7 @@ async fn handle_ingest(
false, false,
display, display,
None, None,
&signal,
) )
.await?; .await?;
@@ -697,6 +707,7 @@ async fn handle_ingest(
false, false,
display, display,
None, None,
&signal,
) )
.await?; .await?;
@@ -709,6 +720,7 @@ async fn handle_ingest(
false, false,
display, display,
None, None,
&signal,
) )
.await?; .await?;
@@ -725,6 +737,22 @@ async fn handle_ingest(
.await; .await;
match ingest_result { match ingest_result {
Ok(()) if signal.is_cancelled() => {
let stages = metrics.extract_timings();
let _ = release_all_locked_jobs(&recorder_conn);
let _ = recorder.fail(
&recorder_conn,
"Interrupted by user (Ctrl+C)",
Some(&stages),
);
if !robot_mode {
eprintln!(
"{}",
style("Interrupted by Ctrl+C. Partial data has been saved.").yellow()
);
}
Ok(())
}
Ok(()) => { Ok(()) => {
let stages = metrics.extract_timings(); let stages = metrics.extract_timings();
let total_items: usize = stages.iter().map(|s| s.items_processed).sum(); let total_items: usize = stages.iter().map(|s| s.items_processed).sum();
@@ -734,6 +762,7 @@ async fn handle_ingest(
} }
Err(e) => { Err(e) => {
let stages = metrics.extract_timings(); let stages = metrics.extract_timings();
let _ = release_all_locked_jobs(&recorder_conn);
let _ = recorder.fail(&recorder_conn, &e.to_string(), Some(&stages)); let _ = recorder.fail(&recorder_conn, &e.to_string(), Some(&stages));
Err(e) Err(e)
} }
@@ -1521,7 +1550,8 @@ async fn handle_sync_cmd(
// For dry_run, skip recording and just show the preview // For dry_run, skip recording and just show the preview
if dry_run { if dry_run {
run_sync(&config, options, None).await?; let signal = ShutdownSignal::new();
run_sync(&config, options, None, &signal).await?;
return Ok(()); return Ok(());
} }
@@ -1531,8 +1561,43 @@ async fn handle_sync_cmd(
let run_id_short = &run_id[..8]; let run_id_short = &run_id[..8];
let recorder = SyncRunRecorder::start(&recorder_conn, "sync", run_id_short)?; let recorder = SyncRunRecorder::start(&recorder_conn, "sync", run_id_short)?;
let signal = ShutdownSignal::new();
let signal_for_handler = signal.clone();
tokio::spawn(async move {
let _ = tokio::signal::ctrl_c().await;
signal_for_handler.cancel();
});
let start = std::time::Instant::now(); let start = std::time::Instant::now();
match run_sync(&config, options, Some(run_id_short)).await { match run_sync(&config, options, Some(run_id_short), &signal).await {
Ok(result) if signal.is_cancelled() => {
let elapsed = start.elapsed();
let stages = metrics.extract_timings();
let released = release_all_locked_jobs(&recorder_conn).unwrap_or(0);
let _ = recorder.fail(
&recorder_conn,
"Interrupted by user (Ctrl+C)",
Some(&stages),
);
if robot_mode {
print_sync_json(&result, elapsed.as_millis() as u64, Some(metrics));
} else {
eprintln!();
eprintln!(
"{}",
console::style("Interrupted by Ctrl+C. Partial results:").yellow()
);
print_sync(&result, elapsed, Some(metrics));
if released > 0 {
eprintln!(
"{}",
console::style(format!("Released {released} locked jobs")).dim()
);
}
}
Ok(())
}
Ok(result) => { Ok(result) => {
let elapsed = start.elapsed(); let elapsed = start.elapsed();
let stages = metrics.extract_timings(); let stages = metrics.extract_timings();
@@ -1552,6 +1617,7 @@ async fn handle_sync_cmd(
} }
Err(e) => { Err(e) => {
let stages = metrics.extract_timings(); let stages = metrics.extract_timings();
let _ = release_all_locked_jobs(&recorder_conn);
let _ = recorder.fail(&recorder_conn, &e.to_string(), Some(&stages)); let _ = recorder.fail(&recorder_conn, &e.to_string(), Some(&stages));
Err(e.into()) Err(e.into())
} }