feat(surgical-sync): add per-IID surgical sync pipeline with preflight validation

Add the ability to sync specific issues or merge requests by IID without
running a full incremental sync. This enables fast, targeted data refresh
for individual entities — useful for agent workflows, debugging, and
real-time investigation of specific issues or MRs.

Architecture:
- New CLI flags: --issue <IID> and --mr <IID> (repeatable, up to 100 total)
  scoped to a single project via -p/--project
- Preflight phase validates all IIDs exist on GitLab before any DB writes,
  with TOCTOU-aware soft verification at ingest time
- 6-stage pipeline: preflight -> fetch -> ingest -> dependents -> docs -> embed
- Each stage is cancellation-aware via ShutdownSignal
- Dedicated SyncRunRecorder extensions track surgical-specific counters
  (issues_fetched, mrs_ingested, docs_regenerated, etc.)

New modules:
- src/ingestion/surgical.rs: Core surgical fetch/ingest/dependent logic
  with preflight_fetch(), ingest_issue_by_iid(), ingest_mr_by_iid(),
  and fetch_dependents_for_{issue,mr}()
- src/cli/commands/sync_surgical.rs: Full CLI orchestrator with progress
  spinners, human/robot output, and cancellation handling
- src/embedding/pipeline.rs: embed_documents_by_ids() for scoped embedding
- src/documents/regenerator.rs: regenerate_dirty_documents_for_sources()
  for scoped document regeneration

Database changes:
- Migration 027: Extends sync_runs with mode, phase, surgical_iids_json,
  per-entity counters, and cancelled_at column
- New indexes: idx_sync_runs_mode_started, idx_sync_runs_status_phase_started

GitLab client:
- get_issue_by_iid() and get_mr_by_iid() single-entity fetch methods

Error handling:
- New SurgicalPreflightFailed error variant with entity_type, iid, project,
  and reason fields. Shares exit code 6 with GitLabNotFound.

Includes comprehensive test coverage:
- 645 lines of surgical ingestion tests (wiremock-based)
- 184 lines of scoped embedding tests
- 85 lines of scoped regeneration tests
- 113 lines of GitLab client single-entity tests
- 236 lines of sync_run surgical column/counter tests
- Unit tests for SyncOptions, error codes, and CLI validation
This commit is contained in:
teernisse
2026-02-18 16:27:59 -05:00
parent ea6e45e43f
commit 9ec1344945
25 changed files with 3354 additions and 37 deletions

View File

@@ -112,6 +112,18 @@ impl GitLabClient {
self.request("/api/v4/version").await
}
pub async fn get_issue_by_iid(&self, project_id: i64, iid: i64) -> Result<GitLabIssue> {
self.request(&format!("/api/v4/projects/{project_id}/issues/{iid}"))
.await
}
pub async fn get_mr_by_iid(&self, project_id: i64, iid: i64) -> Result<GitLabMergeRequest> {
self.request(&format!(
"/api/v4/projects/{project_id}/merge_requests/{iid}"
))
.await
}
const MAX_RETRIES: u32 = 3;
async fn request<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> {
@@ -763,6 +775,10 @@ fn ms_to_iso8601(ms: i64) -> Option<String> {
.map(|dt| dt.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string())
}
#[cfg(test)]
#[path = "client_tests.rs"]
mod client_tests;
#[cfg(test)]
mod tests {
use super::*;

113
src/gitlab/client_tests.rs Normal file
View File

@@ -0,0 +1,113 @@
use super::*;
use crate::core::error::LoreError;
use wiremock::matchers::{header, method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
#[tokio::test]
async fn get_issue_by_iid_success() {
let server = MockServer::start().await;
let issue_json = serde_json::json!({
"id": 1001,
"iid": 42,
"project_id": 5,
"title": "Fix login bug",
"state": "opened",
"created_at": "2026-01-15T10:00:00Z",
"updated_at": "2026-02-01T14:30:00Z",
"author": { "id": 1, "username": "dev1", "name": "Developer One" },
"web_url": "https://gitlab.example.com/group/repo/-/issues/42",
"labels": [],
"milestone": null,
"assignees": [],
"closed_at": null,
"description": "Login fails on mobile"
});
Mock::given(method("GET"))
.and(path("/api/v4/projects/5/issues/42"))
.and(header("PRIVATE-TOKEN", "test-token"))
.respond_with(ResponseTemplate::new(200).set_body_json(&issue_json))
.mount(&server)
.await;
let client = GitLabClient::new(&server.uri(), "test-token", Some(100.0));
let issue = client.get_issue_by_iid(5, 42).await.unwrap();
assert_eq!(issue.iid, 42);
assert_eq!(issue.title, "Fix login bug");
}
#[tokio::test]
async fn get_issue_by_iid_not_found() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/api/v4/projects/5/issues/999"))
.respond_with(
ResponseTemplate::new(404)
.set_body_json(serde_json::json!({"message": "404 Not Found"})),
)
.mount(&server)
.await;
let client = GitLabClient::new(&server.uri(), "test-token", Some(100.0));
let err = client.get_issue_by_iid(5, 999).await.unwrap_err();
assert!(matches!(err, LoreError::GitLabNotFound { .. }));
}
#[tokio::test]
async fn get_mr_by_iid_success() {
let server = MockServer::start().await;
let mr_json = serde_json::json!({
"id": 2001,
"iid": 101,
"project_id": 5,
"title": "Add caching layer",
"state": "merged",
"created_at": "2026-01-20T09:00:00Z",
"updated_at": "2026-02-10T16:00:00Z",
"author": { "id": 2, "username": "dev2", "name": "Developer Two" },
"web_url": "https://gitlab.example.com/group/repo/-/merge_requests/101",
"source_branch": "feature/caching",
"target_branch": "main",
"draft": false,
"labels": [],
"milestone": null,
"assignees": [],
"reviewers": [],
"merged_by": null,
"merged_at": null,
"closed_at": null,
"description": "Adds Redis caching"
});
Mock::given(method("GET"))
.and(path("/api/v4/projects/5/merge_requests/101"))
.and(header("PRIVATE-TOKEN", "test-token"))
.respond_with(ResponseTemplate::new(200).set_body_json(&mr_json))
.mount(&server)
.await;
let client = GitLabClient::new(&server.uri(), "test-token", Some(100.0));
let mr = client.get_mr_by_iid(5, 101).await.unwrap();
assert_eq!(mr.iid, 101);
assert_eq!(mr.title, "Add caching layer");
assert_eq!(mr.source_branch, "feature/caching");
}
#[tokio::test]
async fn get_mr_by_iid_not_found() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/api/v4/projects/5/merge_requests/999"))
.respond_with(
ResponseTemplate::new(404)
.set_body_json(serde_json::json!({"message": "404 Not Found"})),
)
.mount(&server)
.await;
let client = GitLabClient::new(&server.uri(), "test-token", Some(100.0));
let err = client.get_mr_by_iid(5, 999).await.unwrap_err();
assert!(matches!(err, LoreError::GitLabNotFound { .. }));
}