Add the ability to sync specific issues or merge requests by IID without
running a full incremental sync. This enables fast, targeted data refresh
for individual entities — useful for agent workflows, debugging, and
real-time investigation of specific issues or MRs.
Architecture:
- New CLI flags: --issue <IID> and --mr <IID> (repeatable, up to 100 total)
scoped to a single project via -p/--project
- Preflight phase validates all IIDs exist on GitLab before any DB writes,
with TOCTOU-aware soft verification at ingest time
- 6-stage pipeline: preflight -> fetch -> ingest -> dependents -> docs -> embed
- Each stage is cancellation-aware via ShutdownSignal
- Dedicated SyncRunRecorder extensions track surgical-specific counters
(issues_fetched, mrs_ingested, docs_regenerated, etc.)
New modules:
- src/ingestion/surgical.rs: Core surgical fetch/ingest/dependent logic
with preflight_fetch(), ingest_issue_by_iid(), ingest_mr_by_iid(),
and fetch_dependents_for_{issue,mr}()
- src/cli/commands/sync_surgical.rs: Full CLI orchestrator with progress
spinners, human/robot output, and cancellation handling
- src/embedding/pipeline.rs: embed_documents_by_ids() for scoped embedding
- src/documents/regenerator.rs: regenerate_dirty_documents_for_sources()
for scoped document regeneration
Database changes:
- Migration 027: Extends sync_runs with mode, phase, surgical_iids_json,
per-entity counters, and cancelled_at column
- New indexes: idx_sync_runs_mode_started, idx_sync_runs_status_phase_started
GitLab client:
- get_issue_by_iid() and get_mr_by_iid() single-entity fetch methods
Error handling:
- New SurgicalPreflightFailed error variant with entity_type, iid, project,
and reason fields. Shares exit code 6 with GitLabNotFound.
Includes comprehensive test coverage:
- 645 lines of surgical ingestion tests (wiremock-based)
- 184 lines of scoped embedding tests
- 85 lines of scoped regeneration tests
- 113 lines of GitLab client single-entity tests
- 236 lines of sync_run surgical column/counter tests
- Unit tests for SyncOptions, error codes, and CLI validation
140 lines
4.4 KiB
Rust
140 lines
4.4 KiB
Rust
use rusqlite::Connection;
|
|
|
|
use super::error::Result;
|
|
use super::metrics::StageTiming;
|
|
use super::time::now_ms;
|
|
|
|
pub struct SyncRunRecorder {
|
|
row_id: i64,
|
|
}
|
|
|
|
impl SyncRunRecorder {
|
|
pub fn start(conn: &Connection, command: &str, run_id: &str) -> Result<Self> {
|
|
let now = now_ms();
|
|
conn.execute(
|
|
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, run_id)
|
|
VALUES (?1, ?2, 'running', ?3, ?4)",
|
|
rusqlite::params![now, now, command, run_id],
|
|
)?;
|
|
let row_id = conn.last_insert_rowid();
|
|
Ok(Self { row_id })
|
|
}
|
|
|
|
/// Returns the database row ID of this sync run.
|
|
pub fn row_id(&self) -> i64 {
|
|
self.row_id
|
|
}
|
|
|
|
/// Sets surgical-mode metadata on the run (mode, phase, IID manifest).
|
|
pub fn set_surgical_metadata(
|
|
&self,
|
|
conn: &Connection,
|
|
mode: &str,
|
|
phase: &str,
|
|
surgical_iids_json: &str,
|
|
) -> Result<()> {
|
|
conn.execute(
|
|
"UPDATE sync_runs
|
|
SET mode = ?1, phase = ?2, surgical_iids_json = ?3
|
|
WHERE id = ?4",
|
|
rusqlite::params![mode, phase, surgical_iids_json, self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Updates the current phase and refreshes the heartbeat timestamp.
|
|
pub fn update_phase(&self, conn: &Connection, phase: &str) -> Result<()> {
|
|
let now = now_ms();
|
|
conn.execute(
|
|
"UPDATE sync_runs SET phase = ?1, heartbeat_at = ?2 WHERE id = ?3",
|
|
rusqlite::params![phase, now, self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Increments a counter column by 1 based on entity type and stage.
|
|
/// Unknown (entity_type, stage) combinations are silently ignored.
|
|
pub fn record_entity_result(
|
|
&self,
|
|
conn: &Connection,
|
|
entity_type: &str,
|
|
stage: &str,
|
|
) -> Result<()> {
|
|
let column = match (entity_type, stage) {
|
|
("issue", "fetched") => "issues_fetched",
|
|
("issue", "ingested") => "issues_ingested",
|
|
("mr", "fetched") => "mrs_fetched",
|
|
("mr", "ingested") => "mrs_ingested",
|
|
("issue" | "mr", "skipped_stale") => "skipped_stale",
|
|
("doc", "regenerated") => "docs_regenerated",
|
|
("doc", "embedded") => "docs_embedded",
|
|
(_, "warning") => "warnings_count",
|
|
_ => return Ok(()),
|
|
};
|
|
// Column name is from a hardcoded match, not user input — safe to interpolate.
|
|
let sql = format!("UPDATE sync_runs SET {column} = {column} + 1 WHERE id = ?1");
|
|
conn.execute(&sql, rusqlite::params![self.row_id])?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Marks the run as cancelled with a reason. Consumes self (terminal state).
|
|
pub fn cancel(self, conn: &Connection, reason: &str) -> Result<()> {
|
|
let now = now_ms();
|
|
conn.execute(
|
|
"UPDATE sync_runs
|
|
SET status = 'cancelled', error = ?1, cancelled_at = ?2, finished_at = ?3
|
|
WHERE id = ?4",
|
|
rusqlite::params![reason, now, now, self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn succeed(
|
|
self,
|
|
conn: &Connection,
|
|
metrics: &[StageTiming],
|
|
total_items: usize,
|
|
total_errors: usize,
|
|
) -> Result<()> {
|
|
let now = now_ms();
|
|
let metrics_json = serde_json::to_string(metrics).unwrap_or_else(|_| "[]".to_string());
|
|
conn.execute(
|
|
"UPDATE sync_runs
|
|
SET finished_at = ?1, status = 'succeeded',
|
|
metrics_json = ?2, total_items_processed = ?3, total_errors = ?4
|
|
WHERE id = ?5",
|
|
rusqlite::params![
|
|
now,
|
|
metrics_json,
|
|
total_items as i64,
|
|
total_errors as i64,
|
|
self.row_id
|
|
],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn fail(
|
|
self,
|
|
conn: &Connection,
|
|
error: &str,
|
|
metrics: Option<&[StageTiming]>,
|
|
) -> Result<()> {
|
|
let now = now_ms();
|
|
let metrics_json =
|
|
metrics.map(|m| serde_json::to_string(m).unwrap_or_else(|_| "[]".to_string()));
|
|
conn.execute(
|
|
"UPDATE sync_runs
|
|
SET finished_at = ?1, status = 'failed', error = ?2,
|
|
metrics_json = ?3
|
|
WHERE id = ?4",
|
|
rusqlite::params![now, error, metrics_json, self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
#[path = "sync_run_tests.rs"]
|
|
mod tests;
|