feat(surgical-sync): add per-IID surgical sync pipeline with preflight validation
Add the ability to sync specific issues or merge requests by IID without
running a full incremental sync. This enables fast, targeted data refresh
for individual entities — useful for agent workflows, debugging, and
real-time investigation of specific issues or MRs.
Architecture:
- New CLI flags: --issue <IID> and --mr <IID> (repeatable, up to 100 total)
scoped to a single project via -p/--project
- Preflight phase validates all IIDs exist on GitLab before any DB writes,
with TOCTOU-aware soft verification at ingest time
- 6-stage pipeline: preflight -> fetch -> ingest -> dependents -> docs -> embed
- Each stage is cancellation-aware via ShutdownSignal
- Dedicated SyncRunRecorder extensions track surgical-specific counters
(issues_fetched, mrs_ingested, docs_regenerated, etc.)
New modules:
- src/ingestion/surgical.rs: Core surgical fetch/ingest/dependent logic
with preflight_fetch(), ingest_issue_by_iid(), ingest_mr_by_iid(),
and fetch_dependents_for_{issue,mr}()
- src/cli/commands/sync_surgical.rs: Full CLI orchestrator with progress
spinners, human/robot output, and cancellation handling
- src/embedding/pipeline.rs: embed_documents_by_ids() for scoped embedding
- src/documents/regenerator.rs: regenerate_dirty_documents_for_sources()
for scoped document regeneration
Database changes:
- Migration 027: Extends sync_runs with mode, phase, surgical_iids_json,
per-entity counters, and cancelled_at column
- New indexes: idx_sync_runs_mode_started, idx_sync_runs_status_phase_started
GitLab client:
- get_issue_by_iid() and get_mr_by_iid() single-entity fetch methods
Error handling:
- New SurgicalPreflightFailed error variant with entity_type, iid, project,
and reason fields. Shares exit code 6 with GitLabNotFound.
Includes comprehensive test coverage:
- 645 lines of surgical ingestion tests (wiremock-based)
- 184 lines of scoped embedding tests
- 85 lines of scoped regeneration tests
- 113 lines of GitLab client single-entity tests
- 236 lines of sync_run surgical column/counter tests
- Unit tests for SyncOptions, error codes, and CLI validation
This commit is contained in:
@@ -89,6 +89,10 @@ const MIGRATIONS: &[(&str, &str)] = &[
|
||||
"026",
|
||||
include_str!("../../migrations/026_scoring_indexes.sql"),
|
||||
),
|
||||
(
|
||||
"027",
|
||||
include_str!("../../migrations/027_surgical_sync_runs.sql"),
|
||||
),
|
||||
];
|
||||
|
||||
pub fn create_connection(db_path: &Path) -> Result<Connection> {
|
||||
|
||||
@@ -21,6 +21,7 @@ pub enum ErrorCode {
|
||||
EmbeddingFailed,
|
||||
NotFound,
|
||||
Ambiguous,
|
||||
SurgicalPreflightFailed,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ErrorCode {
|
||||
@@ -44,6 +45,7 @@ impl std::fmt::Display for ErrorCode {
|
||||
Self::EmbeddingFailed => "EMBEDDING_FAILED",
|
||||
Self::NotFound => "NOT_FOUND",
|
||||
Self::Ambiguous => "AMBIGUOUS",
|
||||
Self::SurgicalPreflightFailed => "SURGICAL_PREFLIGHT_FAILED",
|
||||
};
|
||||
write!(f, "{code}")
|
||||
}
|
||||
@@ -70,6 +72,9 @@ impl ErrorCode {
|
||||
Self::EmbeddingFailed => 16,
|
||||
Self::NotFound => 17,
|
||||
Self::Ambiguous => 18,
|
||||
// Shares exit code 6 with GitLabNotFound — same semantic category (resource not found).
|
||||
// Robot consumers distinguish via ErrorCode string, not exit code.
|
||||
Self::SurgicalPreflightFailed => 6,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -111,7 +116,7 @@ pub enum LoreError {
|
||||
source: Option<rusqlite::Error>,
|
||||
},
|
||||
|
||||
#[error("GitLab token not set. Export {env_var} environment variable.")]
|
||||
#[error("GitLab token not set. Run 'lore token set' or export {env_var}.")]
|
||||
TokenNotSet { env_var: String },
|
||||
|
||||
#[error("Database error: {0}")]
|
||||
@@ -153,6 +158,14 @@ pub enum LoreError {
|
||||
|
||||
#[error("No embeddings found. Run: lore embed")]
|
||||
EmbeddingsNotBuilt,
|
||||
|
||||
#[error("Surgical preflight failed for {entity_type} !{iid} in {project}: {reason}")]
|
||||
SurgicalPreflightFailed {
|
||||
entity_type: String,
|
||||
iid: u64,
|
||||
project: String,
|
||||
reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl LoreError {
|
||||
@@ -179,6 +192,7 @@ impl LoreError {
|
||||
Self::OllamaModelNotFound { .. } => ErrorCode::OllamaModelNotFound,
|
||||
Self::EmbeddingFailed { .. } => ErrorCode::EmbeddingFailed,
|
||||
Self::EmbeddingsNotBuilt => ErrorCode::EmbeddingFailed,
|
||||
Self::SurgicalPreflightFailed { .. } => ErrorCode::SurgicalPreflightFailed,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,7 +221,7 @@ impl LoreError {
|
||||
"Check database file permissions or reset with 'lore reset'.\n\n Example:\n lore migrate\n lore reset --yes",
|
||||
),
|
||||
Self::TokenNotSet { .. } => Some(
|
||||
"Export the token to your shell:\n\n export GITLAB_TOKEN=glpat-xxxxxxxxxxxx\n\n Your token needs the read_api scope.",
|
||||
"Set your token:\n\n lore token set\n\n Or export to your shell:\n\n export GITLAB_TOKEN=glpat-xxxxxxxxxxxx\n\n Your token needs the read_api scope.",
|
||||
),
|
||||
Self::Database(_) => Some(
|
||||
"Check database file permissions or reset with 'lore reset'.\n\n Example:\n lore doctor\n lore reset --yes",
|
||||
@@ -227,6 +241,9 @@ impl LoreError {
|
||||
Some("Check Ollama logs or retry with 'lore embed --retry-failed'")
|
||||
}
|
||||
Self::EmbeddingsNotBuilt => Some("Generate embeddings first: lore embed"),
|
||||
Self::SurgicalPreflightFailed { .. } => Some(
|
||||
"Verify the IID exists in the project and you have access.\n\n Example:\n lore issues -p <project>\n lore mrs -p <project>",
|
||||
),
|
||||
Self::Json(_) | Self::Io(_) | Self::Transform(_) | Self::Other(_) => None,
|
||||
}
|
||||
}
|
||||
@@ -246,7 +263,7 @@ impl LoreError {
|
||||
Self::GitLabAuthFailed => {
|
||||
vec!["export GITLAB_TOKEN=glpat-xxx", "lore auth"]
|
||||
}
|
||||
Self::TokenNotSet { .. } => vec!["export GITLAB_TOKEN=glpat-xxx"],
|
||||
Self::TokenNotSet { .. } => vec!["lore token set", "export GITLAB_TOKEN=glpat-xxx"],
|
||||
Self::OllamaUnavailable { .. } => vec!["ollama serve"],
|
||||
Self::OllamaModelNotFound { .. } => vec!["ollama pull nomic-embed-text"],
|
||||
Self::DatabaseLocked { .. } => vec!["lore ingest --force"],
|
||||
@@ -254,6 +271,9 @@ impl LoreError {
|
||||
Self::EmbeddingFailed { .. } => vec!["lore embed --retry-failed"],
|
||||
Self::MigrationFailed { .. } => vec!["lore migrate"],
|
||||
Self::GitLabNetworkError { .. } => vec!["lore doctor"],
|
||||
Self::SurgicalPreflightFailed { .. } => {
|
||||
vec!["lore issues -p <project>", "lore mrs -p <project>"]
|
||||
}
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
@@ -293,3 +313,40 @@ impl From<&LoreError> for RobotErrorOutput {
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, LoreError>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn surgical_preflight_failed_display() {
|
||||
let err = LoreError::SurgicalPreflightFailed {
|
||||
entity_type: "issue".to_string(),
|
||||
iid: 42,
|
||||
project: "group/repo".to_string(),
|
||||
reason: "not found on GitLab".to_string(),
|
||||
};
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("issue"), "missing entity_type: {msg}");
|
||||
assert!(msg.contains("42"), "missing iid: {msg}");
|
||||
assert!(msg.contains("group/repo"), "missing project: {msg}");
|
||||
assert!(msg.contains("not found on GitLab"), "missing reason: {msg}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn surgical_preflight_failed_error_code() {
|
||||
let code = ErrorCode::SurgicalPreflightFailed;
|
||||
assert_eq!(code.exit_code(), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn surgical_preflight_failed_code_mapping() {
|
||||
let err = LoreError::SurgicalPreflightFailed {
|
||||
entity_type: "merge_request".to_string(),
|
||||
iid: 99,
|
||||
project: "ns/proj".to_string(),
|
||||
reason: "404".to_string(),
|
||||
};
|
||||
assert_eq!(err.code(), ErrorCode::SurgicalPreflightFailed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,75 @@ impl SyncRunRecorder {
|
||||
Ok(Self { row_id })
|
||||
}
|
||||
|
||||
/// Returns the database row ID of this sync run.
|
||||
pub fn row_id(&self) -> i64 {
|
||||
self.row_id
|
||||
}
|
||||
|
||||
/// Sets surgical-mode metadata on the run (mode, phase, IID manifest).
|
||||
pub fn set_surgical_metadata(
|
||||
&self,
|
||||
conn: &Connection,
|
||||
mode: &str,
|
||||
phase: &str,
|
||||
surgical_iids_json: &str,
|
||||
) -> Result<()> {
|
||||
conn.execute(
|
||||
"UPDATE sync_runs
|
||||
SET mode = ?1, phase = ?2, surgical_iids_json = ?3
|
||||
WHERE id = ?4",
|
||||
rusqlite::params![mode, phase, surgical_iids_json, self.row_id],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Updates the current phase and refreshes the heartbeat timestamp.
|
||||
pub fn update_phase(&self, conn: &Connection, phase: &str) -> Result<()> {
|
||||
let now = now_ms();
|
||||
conn.execute(
|
||||
"UPDATE sync_runs SET phase = ?1, heartbeat_at = ?2 WHERE id = ?3",
|
||||
rusqlite::params![phase, now, self.row_id],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Increments a counter column by 1 based on entity type and stage.
|
||||
/// Unknown (entity_type, stage) combinations are silently ignored.
|
||||
pub fn record_entity_result(
|
||||
&self,
|
||||
conn: &Connection,
|
||||
entity_type: &str,
|
||||
stage: &str,
|
||||
) -> Result<()> {
|
||||
let column = match (entity_type, stage) {
|
||||
("issue", "fetched") => "issues_fetched",
|
||||
("issue", "ingested") => "issues_ingested",
|
||||
("mr", "fetched") => "mrs_fetched",
|
||||
("mr", "ingested") => "mrs_ingested",
|
||||
("issue" | "mr", "skipped_stale") => "skipped_stale",
|
||||
("doc", "regenerated") => "docs_regenerated",
|
||||
("doc", "embedded") => "docs_embedded",
|
||||
(_, "warning") => "warnings_count",
|
||||
_ => return Ok(()),
|
||||
};
|
||||
// Column name is from a hardcoded match, not user input — safe to interpolate.
|
||||
let sql = format!("UPDATE sync_runs SET {column} = {column} + 1 WHERE id = ?1");
|
||||
conn.execute(&sql, rusqlite::params![self.row_id])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Marks the run as cancelled with a reason. Consumes self (terminal state).
|
||||
pub fn cancel(self, conn: &Connection, reason: &str) -> Result<()> {
|
||||
let now = now_ms();
|
||||
conn.execute(
|
||||
"UPDATE sync_runs
|
||||
SET status = 'cancelled', error = ?1, cancelled_at = ?2, finished_at = ?3
|
||||
WHERE id = ?4",
|
||||
rusqlite::params![reason, now, now, self.row_id],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn succeed(
|
||||
self,
|
||||
conn: &Connection,
|
||||
|
||||
@@ -146,3 +146,239 @@ fn test_sync_run_recorder_fail_with_partial_metrics() {
|
||||
assert_eq!(parsed.len(), 1);
|
||||
assert_eq!(parsed[0].name, "ingest_issues");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_run_surgical_columns_exist() {
|
||||
let conn = setup_test_db();
|
||||
conn.execute(
|
||||
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, phase, surgical_iids_json)
|
||||
VALUES (1000, 1000, 'running', 'sync', 'surgical', 'preflight', '{\"issues\":[7],\"mrs\":[]}')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
let (mode, phase, iids_json): (String, String, String) = conn
|
||||
.query_row(
|
||||
"SELECT mode, phase, surgical_iids_json FROM sync_runs WHERE mode = 'surgical'",
|
||||
[],
|
||||
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(mode, "surgical");
|
||||
assert_eq!(phase, "preflight");
|
||||
assert!(iids_json.contains("7"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_run_counter_defaults_are_zero() {
|
||||
let conn = setup_test_db();
|
||||
conn.execute(
|
||||
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command)
|
||||
VALUES (2000, 2000, 'running', 'sync')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
let row_id = conn.last_insert_rowid();
|
||||
let (issues_fetched, mrs_fetched, docs_regenerated, warnings_count): (i64, i64, i64, i64) =
|
||||
conn.query_row(
|
||||
"SELECT issues_fetched, mrs_fetched, docs_regenerated, warnings_count FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(issues_fetched, 0);
|
||||
assert_eq!(mrs_fetched, 0);
|
||||
assert_eq!(docs_regenerated, 0);
|
||||
assert_eq!(warnings_count, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_run_nullable_columns_default_to_null() {
|
||||
let conn = setup_test_db();
|
||||
conn.execute(
|
||||
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command)
|
||||
VALUES (3000, 3000, 'running', 'sync')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
let row_id = conn.last_insert_rowid();
|
||||
let (mode, phase, cancelled_at): (Option<String>, Option<String>, Option<i64>) = conn
|
||||
.query_row(
|
||||
"SELECT mode, phase, cancelled_at FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(mode.is_none());
|
||||
assert!(phase.is_none());
|
||||
assert!(cancelled_at.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_run_counter_round_trip() {
|
||||
let conn = setup_test_db();
|
||||
conn.execute(
|
||||
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, issues_fetched, mrs_ingested, docs_embedded)
|
||||
VALUES (4000, 4000, 'succeeded', 'sync', 'surgical', 3, 2, 5)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
let row_id = conn.last_insert_rowid();
|
||||
let (issues_fetched, mrs_ingested, docs_embedded): (i64, i64, i64) = conn
|
||||
.query_row(
|
||||
"SELECT issues_fetched, mrs_ingested, docs_embedded FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(issues_fetched, 3);
|
||||
assert_eq!(mrs_ingested, 2);
|
||||
assert_eq!(docs_embedded, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn surgical_lifecycle_start_metadata_succeed() {
|
||||
let conn = setup_test_db();
|
||||
let recorder = SyncRunRecorder::start(&conn, "sync", "surg001").unwrap();
|
||||
let row_id = recorder.row_id();
|
||||
|
||||
recorder
|
||||
.set_surgical_metadata(
|
||||
&conn,
|
||||
"surgical",
|
||||
"preflight",
|
||||
r#"{"issues":[7,8],"mrs":[101]}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
recorder.update_phase(&conn, "ingest").unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "issue", "fetched")
|
||||
.unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "issue", "fetched")
|
||||
.unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "issue", "ingested")
|
||||
.unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "mr", "fetched")
|
||||
.unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "mr", "ingested")
|
||||
.unwrap();
|
||||
|
||||
recorder.succeed(&conn, &[], 3, 0).unwrap();
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
let (mode, phase, iids, issues_fetched, mrs_fetched, issues_ingested, mrs_ingested, status): (
|
||||
String,
|
||||
String,
|
||||
String,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
String,
|
||||
) = conn
|
||||
.query_row(
|
||||
"SELECT mode, phase, surgical_iids_json, issues_fetched, mrs_fetched, \
|
||||
issues_ingested, mrs_ingested, status \
|
||||
FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|r| {
|
||||
Ok((
|
||||
r.get(0)?,
|
||||
r.get(1)?,
|
||||
r.get(2)?,
|
||||
r.get(3)?,
|
||||
r.get(4)?,
|
||||
r.get(5)?,
|
||||
r.get(6)?,
|
||||
r.get(7)?,
|
||||
))
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(mode, "surgical");
|
||||
assert_eq!(phase, "ingest");
|
||||
assert!(iids.contains("101"));
|
||||
assert_eq!(issues_fetched, 2);
|
||||
assert_eq!(mrs_fetched, 1);
|
||||
assert_eq!(issues_ingested, 1);
|
||||
assert_eq!(mrs_ingested, 1);
|
||||
assert_eq!(status, "succeeded");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn surgical_lifecycle_cancel() {
|
||||
let conn = setup_test_db();
|
||||
let recorder = SyncRunRecorder::start(&conn, "sync", "cancel01").unwrap();
|
||||
let row_id = recorder.row_id();
|
||||
|
||||
recorder
|
||||
.set_surgical_metadata(&conn, "surgical", "preflight", "{}")
|
||||
.unwrap();
|
||||
recorder
|
||||
.cancel(&conn, "User requested cancellation")
|
||||
.unwrap();
|
||||
|
||||
let (status, error, cancelled_at, finished_at): (
|
||||
String,
|
||||
Option<String>,
|
||||
Option<i64>,
|
||||
Option<i64>,
|
||||
) = conn
|
||||
.query_row(
|
||||
"SELECT status, error, cancelled_at, finished_at FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(status, "cancelled");
|
||||
assert_eq!(error.as_deref(), Some("User requested cancellation"));
|
||||
assert!(cancelled_at.is_some());
|
||||
assert!(finished_at.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_entity_result_ignores_unknown() {
|
||||
let conn = setup_test_db();
|
||||
let recorder = SyncRunRecorder::start(&conn, "sync", "unk001").unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "widget", "exploded")
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_entity_result_doc_counters() {
|
||||
let conn = setup_test_db();
|
||||
let recorder = SyncRunRecorder::start(&conn, "sync", "cnt001").unwrap();
|
||||
let row_id = recorder.row_id();
|
||||
|
||||
recorder
|
||||
.record_entity_result(&conn, "doc", "regenerated")
|
||||
.unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "doc", "regenerated")
|
||||
.unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "doc", "embedded")
|
||||
.unwrap();
|
||||
recorder
|
||||
.record_entity_result(&conn, "issue", "skipped_stale")
|
||||
.unwrap();
|
||||
|
||||
let (docs_regen, docs_embed, skipped): (i64, i64, i64) = conn
|
||||
.query_row(
|
||||
"SELECT docs_regenerated, docs_embedded, skipped_stale FROM sync_runs WHERE id = ?1",
|
||||
[row_id],
|
||||
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(docs_regen, 2);
|
||||
assert_eq!(docs_embed, 1);
|
||||
assert_eq!(skipped, 1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user