feat(surgical-sync): add per-IID surgical sync pipeline with preflight validation
Add the ability to sync specific issues or merge requests by IID without
running a full incremental sync. This enables fast, targeted data refresh
for individual entities — useful for agent workflows, debugging, and
real-time investigation of specific issues or MRs.
Architecture:
- New CLI flags: --issue <IID> and --mr <IID> (repeatable, up to 100 total)
scoped to a single project via -p/--project
- Preflight phase validates all IIDs exist on GitLab before any DB writes,
with TOCTOU-aware soft verification at ingest time
- 6-stage pipeline: preflight -> fetch -> ingest -> dependents -> docs -> embed
- Each stage is cancellation-aware via ShutdownSignal
- Dedicated SyncRunRecorder extensions track surgical-specific counters
(issues_fetched, mrs_ingested, docs_regenerated, etc.)
New modules:
- src/ingestion/surgical.rs: Core surgical fetch/ingest/dependent logic
with preflight_fetch(), ingest_issue_by_iid(), ingest_mr_by_iid(),
and fetch_dependents_for_{issue,mr}()
- src/cli/commands/sync_surgical.rs: Full CLI orchestrator with progress
spinners, human/robot output, and cancellation handling
- src/embedding/pipeline.rs: embed_documents_by_ids() for scoped embedding
- src/documents/regenerator.rs: regenerate_dirty_documents_for_sources()
for scoped document regeneration
Database changes:
- Migration 027: Extends sync_runs with mode, phase, surgical_iids_json,
per-entity counters, and cancelled_at column
- New indexes: idx_sync_runs_mode_started, idx_sync_runs_status_phase_started
GitLab client:
- get_issue_by_iid() and get_mr_by_iid() single-entity fetch methods
Error handling:
- New SurgicalPreflightFailed error variant with entity_type, iid, project,
and reason fields. Shares exit code 6 with GitLabNotFound.
Includes comprehensive test coverage:
- 645 lines of surgical ingestion tests (wiremock-based)
- 184 lines of scoped embedding tests
- 85 lines of scoped regeneration tests
- 113 lines of GitLab client single-entity tests
- 236 lines of sync_run surgical column/counter tests
- Unit tests for SyncOptions, error codes, and CLI validation
This commit is contained in:
252
src/main.rs
252
src/main.rs
@@ -26,14 +26,14 @@ use lore::cli::commands::{
|
||||
run_cron_status, run_cron_uninstall, run_doctor, run_drift, run_embed, run_file_history,
|
||||
run_generate_docs, run_ingest, run_ingest_dry_run, run_init, run_list_issues, run_list_mrs,
|
||||
run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, run_timeline,
|
||||
run_who,
|
||||
run_token_set, run_token_show, run_who,
|
||||
};
|
||||
use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme};
|
||||
use lore::cli::robot::{RobotMeta, strip_schemas};
|
||||
use lore::cli::{
|
||||
Cli, Commands, CountArgs, CronAction, CronArgs, EmbedArgs, FileHistoryArgs, GenerateDocsArgs,
|
||||
IngestArgs, IssuesArgs, MrsArgs, NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs,
|
||||
TraceArgs, WhoArgs,
|
||||
TokenAction, TokenArgs, TraceArgs, WhoArgs,
|
||||
};
|
||||
use lore::core::db::{
|
||||
LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations,
|
||||
@@ -207,6 +207,7 @@ async fn main() {
|
||||
}
|
||||
Some(Commands::Trace(args)) => handle_trace(cli.config.as_deref(), args, robot_mode),
|
||||
Some(Commands::Cron(args)) => handle_cron(cli.config.as_deref(), args, robot_mode),
|
||||
Some(Commands::Token(args)) => handle_token(cli.config.as_deref(), args, robot_mode).await,
|
||||
Some(Commands::Drift {
|
||||
entity_type,
|
||||
iid,
|
||||
@@ -2154,6 +2155,14 @@ async fn handle_sync_cmd(
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let dry_run = args.dry_run && !args.no_dry_run;
|
||||
|
||||
// Dedup and sort IIDs
|
||||
let mut issue_iids = args.issue;
|
||||
let mut mr_iids = args.mr;
|
||||
issue_iids.sort_unstable();
|
||||
issue_iids.dedup();
|
||||
mr_iids.sort_unstable();
|
||||
mr_iids.dedup();
|
||||
|
||||
let mut config = Config::load(config_override)?;
|
||||
if args.no_events {
|
||||
config.sync.fetch_resource_events = false;
|
||||
@@ -2172,10 +2181,56 @@ async fn handle_sync_cmd(
|
||||
no_events: args.no_events,
|
||||
robot_mode,
|
||||
dry_run,
|
||||
issue_iids,
|
||||
mr_iids,
|
||||
project: args.project,
|
||||
preflight_only: args.preflight_only,
|
||||
};
|
||||
|
||||
// For dry run, skip recording and just show the preview
|
||||
if dry_run {
|
||||
// Validation: preflight_only requires surgical mode
|
||||
if options.preflight_only && !options.is_surgical() {
|
||||
return Err("--preflight-only requires --issue or --mr".into());
|
||||
}
|
||||
|
||||
// Validation: full + surgical are incompatible
|
||||
if options.full && options.is_surgical() {
|
||||
return Err("--full and --issue/--mr are incompatible".into());
|
||||
}
|
||||
|
||||
// Validation: surgical mode requires a project (via -p or config defaultProject)
|
||||
if options.is_surgical()
|
||||
&& config
|
||||
.effective_project(options.project.as_deref())
|
||||
.is_none()
|
||||
{
|
||||
return Err("--issue/--mr requires -p/--project (or set defaultProject in config)".into());
|
||||
}
|
||||
|
||||
// Validation: hard cap on total surgical targets
|
||||
let total_targets = options.issue_iids.len() + options.mr_iids.len();
|
||||
if total_targets > SyncOptions::MAX_SURGICAL_TARGETS {
|
||||
return Err(format!(
|
||||
"Too many surgical targets ({total_targets}); maximum is {}",
|
||||
SyncOptions::MAX_SURGICAL_TARGETS
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
// Surgical + dry-run → treat as preflight-only
|
||||
let mut options = options;
|
||||
if dry_run && options.is_surgical() {
|
||||
options.preflight_only = true;
|
||||
}
|
||||
|
||||
// Resolve effective project for surgical mode: when -p is not passed but
|
||||
// defaultProject is set in config, populate options.project so the surgical
|
||||
// orchestrator receives the resolved project path.
|
||||
if options.is_surgical() && options.project.is_none() {
|
||||
options.project = config.default_project.clone();
|
||||
}
|
||||
|
||||
// For non-surgical dry run, skip recording and just show the preview
|
||||
if dry_run && !options.is_surgical() {
|
||||
let signal = ShutdownSignal::new();
|
||||
run_sync(&config, options, None, &signal).await?;
|
||||
return Ok(());
|
||||
@@ -2199,6 +2254,34 @@ async fn handle_sync_cmd(
|
||||
None
|
||||
};
|
||||
|
||||
// Surgical mode: run_sync_surgical manages its own recorder, signal, and recording.
|
||||
// Skip the normal recorder setup and let the dispatch handle everything.
|
||||
if options.is_surgical() {
|
||||
let signal = ShutdownSignal::new();
|
||||
let signal_for_handler = signal.clone();
|
||||
tokio::spawn(async move {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
eprintln!("\nInterrupted, finishing current batch... (Ctrl+C again to force quit)");
|
||||
signal_for_handler.cancel();
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
std::process::exit(130);
|
||||
});
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
match run_sync(&config, options, None, &signal).await {
|
||||
Ok(result) => {
|
||||
let elapsed = start.elapsed();
|
||||
if robot_mode {
|
||||
print_sync_json(&result, elapsed.as_millis() as u64, Some(metrics));
|
||||
} else {
|
||||
print_sync(&result, elapsed, Some(metrics), args.timings);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||
let recorder_conn = create_connection(&db_path)?;
|
||||
let run_id = uuid::Uuid::new_v4().simple().to_string();
|
||||
@@ -2287,6 +2370,29 @@ fn handle_cron(
|
||||
} else {
|
||||
print_cron_install(&result);
|
||||
}
|
||||
// Warn if no stored token — cron runs in a minimal shell with no env vars
|
||||
if let Ok(config) = Config::load(config_override)
|
||||
&& config
|
||||
.gitlab
|
||||
.token
|
||||
.as_ref()
|
||||
.is_none_or(|t| t.trim().is_empty())
|
||||
{
|
||||
if robot_mode {
|
||||
eprintln!(
|
||||
"{{\"warning\":\"No stored token found. Cron sync requires a stored token. Run: lore token set\"}}"
|
||||
);
|
||||
} else {
|
||||
eprintln!();
|
||||
eprintln!(
|
||||
" {} No stored token found. Cron sync requires a stored token.",
|
||||
lore::cli::render::Theme::warning()
|
||||
.render(lore::cli::render::Icons::warning()),
|
||||
);
|
||||
eprintln!(" Run: lore token set");
|
||||
eprintln!();
|
||||
}
|
||||
}
|
||||
}
|
||||
CronAction::Uninstall => {
|
||||
let result = run_cron_uninstall()?;
|
||||
@@ -2312,6 +2418,74 @@ fn handle_cron(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_token(
|
||||
config_override: Option<&str>,
|
||||
args: TokenArgs,
|
||||
robot_mode: bool,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
match args.action {
|
||||
TokenAction::Set { token } => {
|
||||
let result = run_token_set(config_override, token).await?;
|
||||
let elapsed_ms = start.elapsed().as_millis() as u64;
|
||||
if robot_mode {
|
||||
let output = serde_json::json!({
|
||||
"ok": true,
|
||||
"data": {
|
||||
"action": "set",
|
||||
"username": result.username,
|
||||
"config_path": result.config_path,
|
||||
},
|
||||
"meta": { "elapsed_ms": elapsed_ms },
|
||||
});
|
||||
println!("{}", serde_json::to_string(&output)?);
|
||||
} else {
|
||||
println!(
|
||||
" {} Token stored and validated (authenticated as @{})",
|
||||
lore::cli::render::Theme::success().render(lore::cli::render::Icons::success()),
|
||||
result.username
|
||||
);
|
||||
println!(
|
||||
" {} {}",
|
||||
lore::cli::render::Theme::dim().render("config:"),
|
||||
result.config_path
|
||||
);
|
||||
println!();
|
||||
}
|
||||
}
|
||||
TokenAction::Show { unmask } => {
|
||||
let result = run_token_show(config_override, unmask)?;
|
||||
let elapsed_ms = start.elapsed().as_millis() as u64;
|
||||
if robot_mode {
|
||||
let output = serde_json::json!({
|
||||
"ok": true,
|
||||
"data": {
|
||||
"token": result.token,
|
||||
"source": result.source,
|
||||
},
|
||||
"meta": { "elapsed_ms": elapsed_ms },
|
||||
});
|
||||
println!("{}", serde_json::to_string(&output)?);
|
||||
} else {
|
||||
println!(
|
||||
" {} {}",
|
||||
lore::cli::render::Theme::dim().render("token:"),
|
||||
result.token
|
||||
);
|
||||
println!(
|
||||
" {} {}",
|
||||
lore::cli::render::Theme::dim().render("source:"),
|
||||
result.source
|
||||
);
|
||||
println!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct HealthOutput {
|
||||
ok: bool,
|
||||
@@ -2513,13 +2687,31 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
|
||||
}
|
||||
},
|
||||
"sync": {
|
||||
"description": "Full sync pipeline: ingest -> generate-docs -> embed",
|
||||
"flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--dry-run", "--no-dry-run"],
|
||||
"description": "Full sync pipeline: ingest -> generate-docs -> embed. Supports surgical per-IID mode.",
|
||||
"flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--dry-run", "--no-dry-run", "-t/--timings", "--lock", "--issue <IID>", "--mr <IID>", "-p/--project <path>", "--preflight-only"],
|
||||
"example": "lore --robot sync",
|
||||
"surgical_mode": {
|
||||
"description": "Sync specific issues or MRs by IID. Runs a scoped pipeline: preflight -> TOCTOU check -> ingest -> dependents -> docs -> embed.",
|
||||
"flags": ["--issue <IID> (repeatable)", "--mr <IID> (repeatable)", "-p/--project <path> (required)", "--preflight-only"],
|
||||
"examples": [
|
||||
"lore --robot sync --issue 7 -p group/project",
|
||||
"lore --robot sync --issue 7 --issue 42 --mr 10 -p group/project",
|
||||
"lore --robot sync --issue 7 -p group/project --preflight-only"
|
||||
],
|
||||
"constraints": ["--issue/--mr requires -p/--project (or defaultProject in config)", "--full and --issue/--mr are incompatible", "--preflight-only requires --issue or --mr", "Max 100 total targets"],
|
||||
"entity_result_outcomes": ["synced", "skipped_stale", "not_found", "preflight_failed", "error"]
|
||||
},
|
||||
"response_schema": {
|
||||
"ok": "bool",
|
||||
"data": {"issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "resource_events_synced": "int", "resource_events_failed": "int"},
|
||||
"meta": {"elapsed_ms": "int", "stages?": "[{name:string, elapsed_ms:int, items_processed:int}]"}
|
||||
"normal": {
|
||||
"ok": "bool",
|
||||
"data": {"issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "resource_events_synced": "int", "resource_events_failed": "int"},
|
||||
"meta": {"elapsed_ms": "int", "stages?": "[{name:string, elapsed_ms:int, items_processed:int}]"}
|
||||
},
|
||||
"surgical": {
|
||||
"ok": "bool",
|
||||
"data": {"surgical_mode": "true", "surgical_iids": "{issues:[int], merge_requests:[int]}", "entity_results": "[{entity_type:string, iid:int, outcome:string, error?:string, toctou_reason?:string}]", "preflight_only?": "bool", "issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "discussions_fetched": "int"},
|
||||
"meta": {"elapsed_ms": "int"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"issues": {
|
||||
@@ -2668,7 +2860,7 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
|
||||
},
|
||||
"who": {
|
||||
"description": "People intelligence: experts, workload, active discussions, overlap, review patterns",
|
||||
"flags": ["<target>", "--path <path>", "--active", "--overlap <path>", "--reviews", "--since <duration>", "-p/--project", "-n/--limit", "--fields <list>", "--detail", "--no-detail", "--as-of <date>", "--explain-score", "--include-bots", "--all-history"],
|
||||
"flags": ["<target>", "--path <path>", "--active", "--overlap <path>", "--reviews", "--since <duration>", "-p/--project", "-n/--limit", "--fields <list>", "--detail", "--no-detail", "--as-of <date>", "--explain-score", "--include-bots", "--include-closed", "--all-history"],
|
||||
"modes": {
|
||||
"expert": "lore who <file-path> -- Who knows about this area? (also: --path for root files)",
|
||||
"workload": "lore who <username> -- What is someone working on?",
|
||||
@@ -2726,7 +2918,7 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
|
||||
},
|
||||
"notes": {
|
||||
"description": "List notes from discussions with rich filtering",
|
||||
"flags": ["--limit/-n <N>", "--author/-a <username>", "--note-type <type>", "--contains <text>", "--for-issue <iid>", "--for-mr <iid>", "-p/--project <path>", "--since <period>", "--until <period>", "--path <filepath>", "--resolution <any|unresolved|resolved>", "--sort <created|updated>", "--asc", "--include-system", "--note-id <id>", "--gitlab-note-id <id>", "--discussion-id <id>", "--format <table|json|jsonl|csv>", "--fields <list|minimal>", "--open"],
|
||||
"flags": ["--limit/-n <N>", "--author/-a <username>", "--note-type <type>", "--contains <text>", "--for-issue <iid>", "--for-mr <iid>", "-p/--project <path>", "--since <period>", "--until <period>", "--path <filepath>", "--resolution <any|unresolved|resolved>", "--sort <created|updated>", "--asc", "--include-system", "--note-id <id>", "--gitlab-note-id <id>", "--discussion-id <id>", "--fields <list|minimal>", "--open"],
|
||||
"robot_flags": ["--format json", "--fields minimal"],
|
||||
"example": "lore --robot notes --author jdefting --since 1y --format json --fields minimal",
|
||||
"response_schema": {
|
||||
@@ -2735,6 +2927,33 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
|
||||
"meta": {"elapsed_ms": "int"}
|
||||
}
|
||||
},
|
||||
"cron": {
|
||||
"description": "Manage cron-based automatic syncing (Unix only)",
|
||||
"subcommands": {
|
||||
"install": {"flags": ["--interval <minutes>"], "default_interval": 8},
|
||||
"uninstall": {"flags": []},
|
||||
"status": {"flags": []}
|
||||
},
|
||||
"example": "lore --robot cron status",
|
||||
"response_schema": {
|
||||
"ok": "bool",
|
||||
"data": {"action": "string (install|uninstall|status)", "installed?": "bool", "interval_minutes?": "int", "entry?": "string", "log_path?": "string", "replaced?": "bool", "was_installed?": "bool", "last_run_iso?": "string"},
|
||||
"meta": {"elapsed_ms": "int"}
|
||||
}
|
||||
},
|
||||
"token": {
|
||||
"description": "Manage stored GitLab token",
|
||||
"subcommands": {
|
||||
"set": {"flags": ["--token <value>"], "note": "Reads from stdin if --token omitted in non-interactive mode"},
|
||||
"show": {"flags": ["--unmask"]}
|
||||
},
|
||||
"example": "lore --robot token show",
|
||||
"response_schema": {
|
||||
"ok": "bool",
|
||||
"data": {"action": "string (set|show)", "token_masked?": "string", "token?": "string", "valid?": "bool", "username?": "string"},
|
||||
"meta": {"elapsed_ms": "int"}
|
||||
}
|
||||
},
|
||||
"robot-docs": {
|
||||
"description": "This command (agent self-discovery manifest)",
|
||||
"flags": ["--brief"],
|
||||
@@ -2756,10 +2975,14 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
|
||||
"search: FTS5 + vector hybrid search across all entities",
|
||||
"who: Expert/workload/reviews analysis per file path or person",
|
||||
"timeline: Chronological event reconstruction across entities",
|
||||
"trace: Code provenance chains (file -> MR -> issue -> discussion)",
|
||||
"file-history: MR history per file with rename resolution",
|
||||
"notes: Rich note listing with author, type, resolution, path, and discussion filters",
|
||||
"stats: Database statistics with document/note/discussion counts",
|
||||
"count: Entity counts with state breakdowns",
|
||||
"embed: Generate vector embeddings for semantic search via Ollama"
|
||||
"embed: Generate vector embeddings for semantic search via Ollama",
|
||||
"cron: Automated sync scheduling (Unix)",
|
||||
"token: Secure token management with masked display"
|
||||
],
|
||||
"read_write_split": "lore = ALL reads (issues, MRs, search, who, timeline, intelligence). glab = ALL writes (create, update, approve, merge, CI/CD)."
|
||||
});
|
||||
@@ -2821,6 +3044,11 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
|
||||
"lore --robot who --active --since 7d",
|
||||
"lore --robot who --overlap src/path/",
|
||||
"lore --robot who --path README.md"
|
||||
],
|
||||
"surgical_sync": [
|
||||
"lore --robot sync --issue 7 -p group/project",
|
||||
"lore --robot sync --issue 7 --mr 10 -p group/project",
|
||||
"lore --robot sync --issue 7 -p group/project --preflight-only"
|
||||
]
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user