feat(surgical-sync): add per-IID surgical sync pipeline with preflight validation

Add the ability to sync specific issues or merge requests by IID without
running a full incremental sync. This enables fast, targeted data refresh
for individual entities — useful for agent workflows, debugging, and
real-time investigation of specific issues or MRs.

Architecture:
- New CLI flags: --issue <IID> and --mr <IID> (repeatable, up to 100 total)
  scoped to a single project via -p/--project
- Preflight phase validates all IIDs exist on GitLab before any DB writes,
  with TOCTOU-aware soft verification at ingest time
- 6-stage pipeline: preflight -> fetch -> ingest -> dependents -> docs -> embed
- Each stage is cancellation-aware via ShutdownSignal
- Dedicated SyncRunRecorder extensions track surgical-specific counters
  (issues_fetched, mrs_ingested, docs_regenerated, etc.)

New modules:
- src/ingestion/surgical.rs: Core surgical fetch/ingest/dependent logic
  with preflight_fetch(), ingest_issue_by_iid(), ingest_mr_by_iid(),
  and fetch_dependents_for_{issue,mr}()
- src/cli/commands/sync_surgical.rs: Full CLI orchestrator with progress
  spinners, human/robot output, and cancellation handling
- src/embedding/pipeline.rs: embed_documents_by_ids() for scoped embedding
- src/documents/regenerator.rs: regenerate_dirty_documents_for_sources()
  for scoped document regeneration

Database changes:
- Migration 027: Extends sync_runs with mode, phase, surgical_iids_json,
  per-entity counters, and cancelled_at column
- New indexes: idx_sync_runs_mode_started, idx_sync_runs_status_phase_started

GitLab client:
- get_issue_by_iid() and get_mr_by_iid() single-entity fetch methods

Error handling:
- New SurgicalPreflightFailed error variant with entity_type, iid, project,
  and reason fields. Shares exit code 6 with GitLabNotFound.

Includes comprehensive test coverage:
- 645 lines of surgical ingestion tests (wiremock-based)
- 184 lines of scoped embedding tests
- 85 lines of scoped regeneration tests
- 113 lines of GitLab client single-entity tests
- 236 lines of sync_run surgical column/counter tests
- Unit tests for SyncOptions, error codes, and CLI validation
This commit is contained in:
teernisse
2026-02-18 16:27:59 -05:00
parent ea6e45e43f
commit 9ec1344945
25 changed files with 3354 additions and 37 deletions

View File

@@ -16,6 +16,7 @@ use super::ingest::{
DryRunPreview, IngestDisplay, ProjectStatusEnrichment, ProjectSummary, run_ingest,
run_ingest_dry_run,
};
use super::sync_surgical::run_sync_surgical;
#[derive(Debug, Default)]
pub struct SyncOptions {
@@ -26,6 +27,35 @@ pub struct SyncOptions {
pub no_events: bool,
pub robot_mode: bool,
pub dry_run: bool,
pub issue_iids: Vec<u64>,
pub mr_iids: Vec<u64>,
pub project: Option<String>,
pub preflight_only: bool,
}
impl SyncOptions {
pub const MAX_SURGICAL_TARGETS: usize = 100;
pub fn is_surgical(&self) -> bool {
!self.issue_iids.is_empty() || !self.mr_iids.is_empty()
}
}
#[derive(Debug, Default, Serialize)]
pub struct SurgicalIids {
pub issues: Vec<u64>,
pub merge_requests: Vec<u64>,
}
#[derive(Debug, Serialize)]
pub struct EntitySyncResult {
pub entity_type: String,
pub iid: u64,
pub outcome: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub toctou_reason: Option<String>,
}
#[derive(Debug, Default, Serialize)]
@@ -45,19 +75,23 @@ pub struct SyncResult {
pub embedding_failed: usize,
pub status_enrichment_errors: usize,
pub statuses_enriched: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub surgical_mode: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub surgical_iids: Option<SurgicalIids>,
#[serde(skip_serializing_if = "Option::is_none")]
pub entity_results: Option<Vec<EntitySyncResult>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub preflight_only: Option<bool>,
#[serde(skip)]
pub issue_projects: Vec<ProjectSummary>,
#[serde(skip)]
pub mr_projects: Vec<ProjectSummary>,
}
/// Apply semantic color to a stage-completion icon glyph.
/// Alias for [`Theme::color_icon`] to keep call sites concise.
fn color_icon(icon: &str, has_errors: bool) -> String {
if has_errors {
Theme::warning().render(icon)
} else {
Theme::success().render(icon)
}
Theme::color_icon(icon, has_errors)
}
pub async fn run_sync(
@@ -66,6 +100,11 @@ pub async fn run_sync(
run_id: Option<&str>,
signal: &ShutdownSignal,
) -> Result<SyncResult> {
// Surgical dispatch: if any IIDs specified, route to surgical pipeline
if options.is_surgical() {
return run_sync_surgical(config, options, run_id, signal).await;
}
let generated_id;
let run_id = match run_id {
Some(id) => id,
@@ -893,6 +932,22 @@ pub fn print_sync_dry_run_json(result: &SyncDryRunResult) {
mod tests {
use super::*;
fn default_options() -> SyncOptions {
SyncOptions {
full: false,
force: false,
no_embed: false,
no_docs: false,
no_events: false,
robot_mode: false,
dry_run: false,
issue_iids: vec![],
mr_iids: vec![],
project: None,
preflight_only: false,
}
}
#[test]
fn append_failures_skips_zeroes() {
let mut summary = "base".to_string();
@@ -1035,4 +1090,112 @@ mod tests {
assert!(rows[0].contains("0 statuses updated"));
assert!(rows[0].contains("skipped (disabled)"));
}
#[test]
fn is_surgical_with_issues() {
let opts = SyncOptions {
issue_iids: vec![1],
..default_options()
};
assert!(opts.is_surgical());
}
#[test]
fn is_surgical_with_mrs() {
let opts = SyncOptions {
mr_iids: vec![10],
..default_options()
};
assert!(opts.is_surgical());
}
#[test]
fn is_surgical_empty() {
let opts = default_options();
assert!(!opts.is_surgical());
}
#[test]
fn max_surgical_targets_is_100() {
assert_eq!(SyncOptions::MAX_SURGICAL_TARGETS, 100);
}
#[test]
fn sync_result_default_omits_surgical_fields() {
let result = SyncResult::default();
let json = serde_json::to_value(&result).unwrap();
assert!(json.get("surgical_mode").is_none());
assert!(json.get("surgical_iids").is_none());
assert!(json.get("entity_results").is_none());
assert!(json.get("preflight_only").is_none());
}
#[test]
fn sync_result_with_surgical_fields_serializes_correctly() {
let result = SyncResult {
surgical_mode: Some(true),
surgical_iids: Some(SurgicalIids {
issues: vec![7, 42],
merge_requests: vec![10],
}),
entity_results: Some(vec![
EntitySyncResult {
entity_type: "issue".to_string(),
iid: 7,
outcome: "synced".to_string(),
error: None,
toctou_reason: None,
},
EntitySyncResult {
entity_type: "issue".to_string(),
iid: 42,
outcome: "skipped_toctou".to_string(),
error: None,
toctou_reason: Some("updated_at changed".to_string()),
},
]),
preflight_only: Some(false),
..SyncResult::default()
};
let json = serde_json::to_value(&result).unwrap();
assert_eq!(json["surgical_mode"], true);
assert_eq!(json["surgical_iids"]["issues"], serde_json::json!([7, 42]));
assert_eq!(json["entity_results"].as_array().unwrap().len(), 2);
assert_eq!(json["entity_results"][1]["outcome"], "skipped_toctou");
assert_eq!(json["preflight_only"], false);
}
#[test]
fn entity_sync_result_omits_none_fields() {
let entity = EntitySyncResult {
entity_type: "merge_request".to_string(),
iid: 10,
outcome: "synced".to_string(),
error: None,
toctou_reason: None,
};
let json = serde_json::to_value(&entity).unwrap();
assert!(json.get("error").is_none());
assert!(json.get("toctou_reason").is_none());
assert!(json.get("entity_type").is_some());
}
#[test]
fn is_surgical_with_both_issues_and_mrs() {
let opts = SyncOptions {
issue_iids: vec![1, 2],
mr_iids: vec![10],
..default_options()
};
assert!(opts.is_surgical());
}
#[test]
fn is_not_surgical_with_only_project() {
let opts = SyncOptions {
project: Some("group/repo".to_string()),
..default_options()
};
assert!(!opts.is_surgical());
}
}