feat(surgical-sync): add per-IID surgical sync pipeline with preflight validation
Add the ability to sync specific issues or merge requests by IID without
running a full incremental sync. This enables fast, targeted data refresh
for individual entities — useful for agent workflows, debugging, and
real-time investigation of specific issues or MRs.
Architecture:
- New CLI flags: --issue <IID> and --mr <IID> (repeatable, up to 100 total)
scoped to a single project via -p/--project
- Preflight phase validates all IIDs exist on GitLab before any DB writes,
with TOCTOU-aware soft verification at ingest time
- 6-stage pipeline: preflight -> fetch -> ingest -> dependents -> docs -> embed
- Each stage is cancellation-aware via ShutdownSignal
- Dedicated SyncRunRecorder extensions track surgical-specific counters
(issues_fetched, mrs_ingested, docs_regenerated, etc.)
New modules:
- src/ingestion/surgical.rs: Core surgical fetch/ingest/dependent logic
with preflight_fetch(), ingest_issue_by_iid(), ingest_mr_by_iid(),
and fetch_dependents_for_{issue,mr}()
- src/cli/commands/sync_surgical.rs: Full CLI orchestrator with progress
spinners, human/robot output, and cancellation handling
- src/embedding/pipeline.rs: embed_documents_by_ids() for scoped embedding
- src/documents/regenerator.rs: regenerate_dirty_documents_for_sources()
for scoped document regeneration
Database changes:
- Migration 027: Extends sync_runs with mode, phase, surgical_iids_json,
per-entity counters, and cancelled_at column
- New indexes: idx_sync_runs_mode_started, idx_sync_runs_status_phase_started
GitLab client:
- get_issue_by_iid() and get_mr_by_iid() single-entity fetch methods
Error handling:
- New SurgicalPreflightFailed error variant with entity_type, iid, project,
and reason fields. Shares exit code 6 with GitLabNotFound.
Includes comprehensive test coverage:
- 645 lines of surgical ingestion tests (wiremock-based)
- 184 lines of scoped embedding tests
- 85 lines of scoped regeneration tests
- 113 lines of GitLab client single-entity tests
- 236 lines of sync_run surgical column/counter tests
- Unit tests for SyncOptions, error codes, and CLI validation
This commit is contained in:
@@ -518,3 +518,88 @@ fn test_note_regeneration_cache_invalidates_across_parents() {
|
||||
assert!(beta_content.contains("parent_iid: 99"));
|
||||
assert!(beta_content.contains("parent_title: Issue Beta"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scoped_regen_only_processes_specified_sources() {
|
||||
let conn = setup_db();
|
||||
// Insert two issues
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'First Issue', 'opened', 1000, 2000, 3000)",
|
||||
[],
|
||||
).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (2, 20, 1, 43, 'Second Issue', 'opened', 1000, 2000, 3000)",
|
||||
[],
|
||||
).unwrap();
|
||||
|
||||
// Mark both dirty
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
mark_dirty(&conn, SourceType::Issue, 2).unwrap();
|
||||
|
||||
// Regenerate only issue 1
|
||||
let result = regenerate_dirty_documents_for_sources(&conn, &[(SourceType::Issue, 1)]).unwrap();
|
||||
|
||||
assert_eq!(result.regenerated, 1);
|
||||
assert_eq!(result.errored, 0);
|
||||
|
||||
// Issue 1 should be regenerated and cleared from dirty
|
||||
let doc_count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM documents WHERE source_type = 'issue' AND source_id = 1",
|
||||
[],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(doc_count, 1);
|
||||
|
||||
// Issue 2 should still be dirty
|
||||
let dirty_count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'issue' AND source_id = 2",
|
||||
[],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(dirty_count, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scoped_regen_returns_document_ids() {
|
||||
let conn = setup_db();
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 1000, 2000, 3000)",
|
||||
[],
|
||||
).unwrap();
|
||||
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
||||
|
||||
let result = regenerate_dirty_documents_for_sources(&conn, &[(SourceType::Issue, 1)]).unwrap();
|
||||
|
||||
assert_eq!(result.document_ids.len(), 1);
|
||||
|
||||
// Verify returned ID matches the actual document
|
||||
let actual_id: i64 = conn
|
||||
.query_row(
|
||||
"SELECT id FROM documents WHERE source_type = 'issue' AND source_id = 1",
|
||||
[],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(result.document_ids[0], actual_id);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scoped_regen_handles_missing_source() {
|
||||
let conn = setup_db();
|
||||
// Don't insert any issues — source_id 999 doesn't exist
|
||||
// But mark it dirty so the function tries to process it
|
||||
mark_dirty(&conn, SourceType::Issue, 999).unwrap();
|
||||
|
||||
let result =
|
||||
regenerate_dirty_documents_for_sources(&conn, &[(SourceType::Issue, 999)]).unwrap();
|
||||
|
||||
// Source doesn't exist, so regenerate_one returns Ok(true) deleting the doc.
|
||||
// No document_id to collect since there's nothing in the documents table.
|
||||
assert_eq!(result.regenerated, 1);
|
||||
assert_eq!(result.errored, 0);
|
||||
assert!(result.document_ids.is_empty());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user