feat(surgical-sync): add per-IID surgical sync pipeline with preflight validation

Add the ability to sync specific issues or merge requests by IID without
running a full incremental sync. This enables fast, targeted data refresh
for individual entities — useful for agent workflows, debugging, and
real-time investigation of specific issues or MRs.

Architecture:
- New CLI flags: --issue <IID> and --mr <IID> (repeatable, up to 100 total)
  scoped to a single project via -p/--project
- Preflight phase validates all IIDs exist on GitLab before any DB writes,
  with TOCTOU-aware soft verification at ingest time
- 6-stage pipeline: preflight -> fetch -> ingest -> dependents -> docs -> embed
- Each stage is cancellation-aware via ShutdownSignal
- Dedicated SyncRunRecorder extensions track surgical-specific counters
  (issues_fetched, mrs_ingested, docs_regenerated, etc.)

New modules:
- src/ingestion/surgical.rs: Core surgical fetch/ingest/dependent logic
  with preflight_fetch(), ingest_issue_by_iid(), ingest_mr_by_iid(),
  and fetch_dependents_for_{issue,mr}()
- src/cli/commands/sync_surgical.rs: Full CLI orchestrator with progress
  spinners, human/robot output, and cancellation handling
- src/embedding/pipeline.rs: embed_documents_by_ids() for scoped embedding
- src/documents/regenerator.rs: regenerate_dirty_documents_for_sources()
  for scoped document regeneration

Database changes:
- Migration 027: Extends sync_runs with mode, phase, surgical_iids_json,
  per-entity counters, and cancelled_at column
- New indexes: idx_sync_runs_mode_started, idx_sync_runs_status_phase_started

GitLab client:
- get_issue_by_iid() and get_mr_by_iid() single-entity fetch methods

Error handling:
- New SurgicalPreflightFailed error variant with entity_type, iid, project,
  and reason fields. Shares exit code 6 with GitLabNotFound.

Includes comprehensive test coverage:
- 645 lines of surgical ingestion tests (wiremock-based)
- 184 lines of scoped embedding tests
- 85 lines of scoped regeneration tests
- 113 lines of GitLab client single-entity tests
- 236 lines of sync_run surgical column/counter tests
- Unit tests for SyncOptions, error codes, and CLI validation
This commit is contained in:
teernisse
2026-02-18 16:27:59 -05:00
parent ea6e45e43f
commit 9ec1344945
25 changed files with 3354 additions and 37 deletions

View File

@@ -140,7 +140,7 @@ fn passes_cursor_filter_with_ts(gitlab_id: i64, issue_ts: i64, cursor: &SyncCurs
true
}
fn process_single_issue(
pub(crate) fn process_single_issue(
conn: &Connection,
config: &Config,
project_id: i64,

View File

@@ -135,13 +135,13 @@ pub async fn ingest_merge_requests(
Ok(result)
}
struct ProcessMrResult {
labels_created: usize,
assignees_linked: usize,
reviewers_linked: usize,
pub(crate) struct ProcessMrResult {
pub(crate) labels_created: usize,
pub(crate) assignees_linked: usize,
pub(crate) reviewers_linked: usize,
}
fn process_single_mr(
pub(crate) fn process_single_mr(
conn: &Connection,
config: &Config,
project_id: i64,

View File

@@ -6,6 +6,7 @@ pub mod merge_requests;
pub mod mr_diffs;
pub mod mr_discussions;
pub mod orchestrator;
pub(crate) mod surgical;
pub use discussions::{IngestDiscussionsResult, ingest_issue_discussions};
pub use issues::{IngestIssuesResult, IssueForDiscussionSync, ingest_issues};

View File

@@ -1097,7 +1097,7 @@ async fn drain_resource_events(
}
/// Store resource events using the provided connection (caller manages the transaction).
fn store_resource_events(
pub(crate) fn store_resource_events(
conn: &Connection,
project_id: i64,
entity_type: &str,
@@ -1406,7 +1406,7 @@ async fn drain_mr_closes_issues(
Ok(result)
}
fn store_closes_issues_refs(
pub(crate) fn store_closes_issues_refs(
conn: &Connection,
project_id: i64,
mr_local_id: i64,

469
src/ingestion/surgical.rs Normal file
View File

@@ -0,0 +1,469 @@
use futures::stream::StreamExt;
use rusqlite::Connection;
use rusqlite::OptionalExtension;
use tracing::{debug, warn};
use crate::Config;
use crate::core::error::{LoreError, Result};
use crate::documents::SourceType;
use crate::gitlab::GitLabClient;
use crate::gitlab::types::{GitLabIssue, GitLabMergeRequest};
use crate::ingestion::dirty_tracker;
use crate::ingestion::discussions::ingest_issue_discussions;
use crate::ingestion::issues::{IssueForDiscussionSync, process_single_issue};
use crate::ingestion::merge_requests::{MrForDiscussionSync, process_single_mr};
use crate::ingestion::mr_diffs::upsert_mr_file_changes;
use crate::ingestion::mr_discussions::ingest_mr_discussions;
use crate::ingestion::orchestrator::{store_closes_issues_refs, store_resource_events};
// ---------------------------------------------------------------------------
// Result types
// ---------------------------------------------------------------------------
#[derive(Debug)]
pub(crate) struct IngestIssueResult {
pub skipped_stale: bool,
pub dirty_source_keys: Vec<(SourceType, i64)>,
}
#[derive(Debug)]
pub(crate) struct IngestMrResult {
pub skipped_stale: bool,
pub dirty_source_keys: Vec<(SourceType, i64)>,
}
#[derive(Debug)]
pub(crate) struct PreflightResult {
pub issues: Vec<GitLabIssue>,
pub merge_requests: Vec<GitLabMergeRequest>,
pub failures: Vec<PreflightFailure>,
}
#[derive(Debug)]
pub(crate) struct PreflightFailure {
pub entity_type: String,
pub iid: i64,
pub error: LoreError,
}
// ---------------------------------------------------------------------------
// TOCTOU guard
// ---------------------------------------------------------------------------
/// Returns `true` if the payload is stale (same age or older than what the DB
/// already has). Returns `false` when the entity is new (no DB row) or when
/// the payload is strictly newer.
pub(crate) fn is_stale(payload_updated_at: &str, db_updated_at_ms: Option<i64>) -> Result<bool> {
let Some(db_ms) = db_updated_at_ms else {
return Ok(false);
};
let payload_ms = chrono::DateTime::parse_from_rfc3339(payload_updated_at)
.map(|dt| dt.timestamp_millis())
.map_err(|e| {
LoreError::Other(format!(
"Failed to parse timestamp '{}': {}",
payload_updated_at, e
))
})?;
Ok(payload_ms <= db_ms)
}
// ---------------------------------------------------------------------------
// Ingestion wrappers
// ---------------------------------------------------------------------------
/// Ingest a single issue by IID with TOCTOU guard and dirty marking.
pub(crate) fn ingest_issue_by_iid(
conn: &Connection,
config: &Config,
project_id: i64,
issue: &GitLabIssue,
) -> Result<IngestIssueResult> {
let db_updated_at = get_db_updated_at(conn, "issues", issue.iid, project_id)?;
if is_stale(&issue.updated_at, db_updated_at)? {
debug!(iid = issue.iid, "Skipping stale issue (TOCTOU guard)");
return Ok(IngestIssueResult {
skipped_stale: true,
dirty_source_keys: vec![],
});
}
process_single_issue(conn, config, project_id, issue)?;
let local_id: i64 = conn.query_row(
"SELECT id FROM issues WHERE project_id = ? AND iid = ?",
(project_id, issue.iid),
|row| row.get(0),
)?;
dirty_tracker::mark_dirty(conn, SourceType::Issue, local_id)?;
Ok(IngestIssueResult {
skipped_stale: false,
dirty_source_keys: vec![(SourceType::Issue, local_id)],
})
}
/// Ingest a single merge request by IID with TOCTOU guard and dirty marking.
pub(crate) fn ingest_mr_by_iid(
conn: &Connection,
config: &Config,
project_id: i64,
mr: &GitLabMergeRequest,
) -> Result<IngestMrResult> {
let db_updated_at = get_db_updated_at(conn, "merge_requests", mr.iid, project_id)?;
if is_stale(&mr.updated_at, db_updated_at)? {
debug!(iid = mr.iid, "Skipping stale MR (TOCTOU guard)");
return Ok(IngestMrResult {
skipped_stale: true,
dirty_source_keys: vec![],
});
}
process_single_mr(conn, config, project_id, mr)?;
let local_id: i64 = conn.query_row(
"SELECT id FROM merge_requests WHERE project_id = ? AND iid = ?",
(project_id, mr.iid),
|row| row.get(0),
)?;
dirty_tracker::mark_dirty(conn, SourceType::MergeRequest, local_id)?;
Ok(IngestMrResult {
skipped_stale: false,
dirty_source_keys: vec![(SourceType::MergeRequest, local_id)],
})
}
// ---------------------------------------------------------------------------
// Preflight fetch
// ---------------------------------------------------------------------------
/// Fetch specific issues and MRs by IID from GitLab. Collects successes and
/// failures without aborting on individual 404s.
///
/// Requests are dispatched concurrently (up to 10 in-flight at once) to avoid
/// sequential round-trip latency when syncing many IIDs.
pub(crate) async fn preflight_fetch(
client: &GitLabClient,
gitlab_project_id: i64,
targets: &[(String, i64)],
) -> PreflightResult {
/// Max concurrent HTTP requests during preflight.
const PREFLIGHT_CONCURRENCY: usize = 10;
#[allow(clippy::large_enum_variant)]
enum FetchOutcome {
Issue(std::result::Result<GitLabIssue, (String, i64, LoreError)>),
MergeRequest(std::result::Result<GitLabMergeRequest, (String, i64, LoreError)>),
UnknownType(String, i64),
}
let mut result = PreflightResult {
issues: Vec::new(),
merge_requests: Vec::new(),
failures: Vec::new(),
};
let mut stream = futures::stream::iter(targets.iter().map(|(entity_type, iid)| {
let entity_type = entity_type.clone();
let iid = *iid;
async move {
match entity_type.as_str() {
"issue" => FetchOutcome::Issue(
client
.get_issue_by_iid(gitlab_project_id, iid)
.await
.map_err(|e| (entity_type, iid, e)),
),
"merge_request" => FetchOutcome::MergeRequest(
client
.get_mr_by_iid(gitlab_project_id, iid)
.await
.map_err(|e| (entity_type, iid, e)),
),
_ => FetchOutcome::UnknownType(entity_type, iid),
}
}
}))
.buffer_unordered(PREFLIGHT_CONCURRENCY);
while let Some(outcome) = stream.next().await {
match outcome {
FetchOutcome::Issue(Ok(issue)) => result.issues.push(issue),
FetchOutcome::Issue(Err((et, iid, e))) => {
result.failures.push(PreflightFailure {
entity_type: et,
iid,
error: e,
});
}
FetchOutcome::MergeRequest(Ok(mr)) => result.merge_requests.push(mr),
FetchOutcome::MergeRequest(Err((et, iid, e))) => {
result.failures.push(PreflightFailure {
entity_type: et,
iid,
error: e,
});
}
FetchOutcome::UnknownType(et, iid) => {
result.failures.push(PreflightFailure {
entity_type: et.clone(),
iid,
error: LoreError::Other(format!("Unknown entity type: {et}")),
});
}
}
}
result
}
// ---------------------------------------------------------------------------
// Dependent fetch helpers (surgical mode)
// ---------------------------------------------------------------------------
/// Counts returned from fetching dependents for a single entity.
#[derive(Debug, Default)]
pub(crate) struct DependentFetchResult {
pub resource_events_fetched: usize,
pub discussions_fetched: usize,
pub closes_issues_stored: usize,
pub file_changes_stored: usize,
}
/// Fetch and store all dependents for a single issue:
/// resource events (state, label, milestone) and discussions.
pub(crate) async fn fetch_dependents_for_issue(
client: &GitLabClient,
conn: &Connection,
project_id: i64,
gitlab_project_id: i64,
iid: i64,
local_id: i64,
config: &Config,
) -> Result<DependentFetchResult> {
let mut result = DependentFetchResult::default();
// --- Resource events ---
match client
.fetch_all_resource_events(gitlab_project_id, "issue", iid)
.await
{
Ok((state_events, label_events, milestone_events)) => {
let count = state_events.len() + label_events.len() + milestone_events.len();
let tx = conn.unchecked_transaction()?;
store_resource_events(
&tx,
project_id,
"issue",
local_id,
&state_events,
&label_events,
&milestone_events,
)?;
tx.execute(
"UPDATE issues SET resource_events_synced_for_updated_at = updated_at WHERE id = ?",
[local_id],
)?;
tx.commit()?;
result.resource_events_fetched = count;
}
Err(e) => {
warn!(
iid,
error = %e,
"Failed to fetch resource events for issue, continuing"
);
}
}
// --- Discussions ---
let sync_item = IssueForDiscussionSync {
local_issue_id: local_id,
iid,
updated_at: 0, // not used for filtering in surgical mode
};
match ingest_issue_discussions(
conn,
client,
config,
gitlab_project_id,
project_id,
&[sync_item],
)
.await
{
Ok(disc_result) => {
result.discussions_fetched = disc_result.discussions_fetched;
}
Err(e) => {
warn!(
iid,
error = %e,
"Failed to ingest discussions for issue, continuing"
);
}
}
Ok(result)
}
/// Fetch and store all dependents for a single merge request:
/// resource events, discussions, closes-issues references, and file changes (diffs).
pub(crate) async fn fetch_dependents_for_mr(
client: &GitLabClient,
conn: &Connection,
project_id: i64,
gitlab_project_id: i64,
iid: i64,
local_id: i64,
config: &Config,
) -> Result<DependentFetchResult> {
let mut result = DependentFetchResult::default();
// --- Resource events ---
match client
.fetch_all_resource_events(gitlab_project_id, "merge_request", iid)
.await
{
Ok((state_events, label_events, milestone_events)) => {
let count = state_events.len() + label_events.len() + milestone_events.len();
let tx = conn.unchecked_transaction()?;
store_resource_events(
&tx,
project_id,
"merge_request",
local_id,
&state_events,
&label_events,
&milestone_events,
)?;
tx.execute(
"UPDATE merge_requests SET resource_events_synced_for_updated_at = updated_at WHERE id = ?",
[local_id],
)?;
tx.commit()?;
result.resource_events_fetched = count;
}
Err(e) => {
warn!(
iid,
error = %e,
"Failed to fetch resource events for MR, continuing"
);
}
}
// --- Discussions ---
let sync_item = MrForDiscussionSync {
local_mr_id: local_id,
iid,
updated_at: 0,
};
match ingest_mr_discussions(
conn,
client,
config,
gitlab_project_id,
project_id,
&[sync_item],
)
.await
{
Ok(disc_result) => {
result.discussions_fetched = disc_result.discussions_fetched;
}
Err(e) => {
warn!(
iid,
error = %e,
"Failed to ingest discussions for MR, continuing"
);
}
}
// --- Closes issues ---
match client.fetch_mr_closes_issues(gitlab_project_id, iid).await {
Ok(closes_issues) => {
let count = closes_issues.len();
let tx = conn.unchecked_transaction()?;
store_closes_issues_refs(&tx, project_id, local_id, &closes_issues)?;
tx.execute(
"UPDATE merge_requests SET closes_issues_synced_for_updated_at = updated_at WHERE id = ?",
[local_id],
)?;
tx.commit()?;
result.closes_issues_stored = count;
}
Err(e) => {
warn!(
iid,
error = %e,
"Failed to fetch closes_issues for MR, continuing"
);
}
}
// --- File changes (diffs) ---
match client.fetch_mr_diffs(gitlab_project_id, iid).await {
Ok(diffs) => {
let tx = conn.unchecked_transaction()?;
let stored = upsert_mr_file_changes(&tx, local_id, project_id, &diffs)?;
tx.execute(
"UPDATE merge_requests SET diffs_synced_for_updated_at = updated_at WHERE id = ?",
[local_id],
)?;
tx.commit()?;
result.file_changes_stored = stored;
}
Err(e) => {
warn!(
iid,
error = %e,
"Failed to fetch diffs for MR, continuing"
);
}
}
Ok(result)
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
fn get_db_updated_at(
conn: &Connection,
table: &str,
iid: i64,
project_id: i64,
) -> Result<Option<i64>> {
// Using a match on known table names avoids SQL injection from the table parameter.
let sql = match table {
"issues" => "SELECT updated_at FROM issues WHERE project_id = ?1 AND iid = ?2",
"merge_requests" => {
"SELECT updated_at FROM merge_requests WHERE project_id = ?1 AND iid = ?2"
}
_ => {
return Err(LoreError::Other(format!(
"Unknown table for updated_at lookup: {table}"
)));
}
};
let result: Option<i64> = conn
.query_row(sql, (project_id, iid), |row| row.get(0))
.optional()?;
Ok(result)
}
#[cfg(test)]
#[path = "surgical_tests.rs"]
mod tests;

View File

@@ -0,0 +1,645 @@
use std::path::Path;
use super::*;
use crate::core::config::{
Config, EmbeddingConfig, GitLabConfig, LoggingConfig, ProjectConfig, ScoringConfig,
StorageConfig, SyncConfig,
};
use crate::core::db::{create_connection, run_migrations};
use crate::gitlab::types::{GitLabAuthor, GitLabMergeRequest};
// ---------------------------------------------------------------------------
// Test helpers
// ---------------------------------------------------------------------------
fn setup_db() -> rusqlite::Connection {
let conn = create_connection(Path::new(":memory:")).expect("in-memory DB");
run_migrations(&conn).expect("migrations");
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)
VALUES (100, 'group/repo', 'https://example.com/group/repo')",
[],
)
.expect("insert project");
conn
}
fn test_config() -> Config {
Config {
gitlab: GitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
token: None,
},
projects: vec![ProjectConfig {
path: "group/repo".to_string(),
}],
default_project: None,
sync: SyncConfig::default(),
storage: StorageConfig::default(),
embedding: EmbeddingConfig::default(),
logging: LoggingConfig::default(),
scoring: ScoringConfig::default(),
}
}
fn make_test_issue(iid: i64, updated_at: &str) -> GitLabIssue {
GitLabIssue {
id: iid * 1000, // unique gitlab_id
iid,
project_id: 100,
title: format!("Test issue {iid}"),
description: Some("Description".to_string()),
state: "opened".to_string(),
created_at: "2026-01-01T00:00:00.000+00:00".to_string(),
updated_at: updated_at.to_string(),
closed_at: None,
author: GitLabAuthor {
id: 1,
username: "testuser".to_string(),
name: "Test User".to_string(),
},
assignees: vec![],
labels: vec![],
milestone: None,
due_date: None,
web_url: format!("https://example.com/group/repo/-/issues/{iid}"),
}
}
fn make_test_mr(iid: i64, updated_at: &str) -> GitLabMergeRequest {
GitLabMergeRequest {
id: iid * 1000,
iid,
project_id: 100,
title: format!("Test MR {iid}"),
description: Some("MR description".to_string()),
state: "opened".to_string(),
draft: false,
work_in_progress: false,
source_branch: "feature".to_string(),
target_branch: "main".to_string(),
sha: Some("abc123".to_string()),
references: None,
detailed_merge_status: None,
merge_status_legacy: None,
created_at: "2026-01-01T00:00:00.000+00:00".to_string(),
updated_at: updated_at.to_string(),
merged_at: None,
closed_at: None,
author: GitLabAuthor {
id: 1,
username: "testuser".to_string(),
name: "Test User".to_string(),
},
merge_user: None,
merged_by: None,
labels: vec![],
assignees: vec![],
reviewers: vec![],
web_url: format!("https://example.com/group/repo/-/merge_requests/{iid}"),
merge_commit_sha: None,
squash_commit_sha: None,
}
}
fn get_db_updated_at_helper(conn: &rusqlite::Connection, table: &str, iid: i64) -> Option<i64> {
let sql = match table {
"issues" => "SELECT updated_at FROM issues WHERE project_id = 1 AND iid = ?1",
"merge_requests" => {
"SELECT updated_at FROM merge_requests WHERE project_id = 1 AND iid = ?1"
}
_ => return None,
};
conn.query_row(sql, [iid], |row| row.get(0)).ok()
}
fn get_dirty_keys(conn: &rusqlite::Connection) -> Vec<(String, i64)> {
let mut stmt = conn
.prepare("SELECT source_type, source_id FROM dirty_sources ORDER BY source_type, source_id")
.expect("prepare dirty_sources query");
stmt.query_map([], |row| {
let st: String = row.get(0)?;
let id: i64 = row.get(1)?;
Ok((st, id))
})
.expect("query dirty_sources")
.collect::<std::result::Result<Vec<_>, _>>()
.expect("collect dirty_sources")
}
// ---------------------------------------------------------------------------
// is_stale unit tests
// ---------------------------------------------------------------------------
#[test]
fn test_is_stale_parses_iso8601() {
// 2026-02-17T12:00:00.000+00:00 -> 1771243200000 ms
let result = is_stale("2026-02-17T12:00:00.000+00:00", Some(1_771_329_600_000));
assert!(result.is_ok());
// Same timestamp => stale
assert!(result.unwrap());
}
#[test]
fn test_is_stale_handles_none_db_value() {
let result = is_stale("2026-02-17T12:00:00.000+00:00", None);
assert!(result.is_ok());
assert!(!result.unwrap(), "no DB row means not stale");
}
#[test]
fn test_is_stale_with_z_suffix() {
let result = is_stale("2026-02-17T12:00:00Z", Some(1_771_329_600_000));
assert!(result.is_ok());
assert!(result.unwrap(), "Z suffix should parse same as +00:00");
}
// ---------------------------------------------------------------------------
// Issue ingestion tests
// ---------------------------------------------------------------------------
#[test]
fn test_ingest_issue_by_iid_upserts_and_marks_dirty() {
let conn = setup_db();
let config = test_config();
let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00");
let result = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap();
assert!(!result.skipped_stale);
assert!(!result.skipped_stale);
assert!(!result.dirty_source_keys.is_empty());
// Verify DB row exists
let db_ts = get_db_updated_at_helper(&conn, "issues", 42);
assert!(db_ts.is_some(), "issue should exist in DB");
// Verify dirty marking
let dirty = get_dirty_keys(&conn);
assert!(
dirty.iter().any(|(t, _)| t == "issue"),
"dirty_sources should contain an issue entry"
);
}
#[test]
fn test_toctou_skips_stale_issue() {
let conn = setup_db();
let config = test_config();
let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00");
// First ingest succeeds
let r1 = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap();
assert!(!r1.skipped_stale);
// Clear dirty to check second ingest doesn't re-mark
conn.execute("DELETE FROM dirty_sources", []).unwrap();
// Second ingest with same timestamp should be skipped
let r2 = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap();
assert!(r2.skipped_stale);
assert!(r2.skipped_stale);
assert!(r2.dirty_source_keys.is_empty());
// No new dirty mark
let dirty = get_dirty_keys(&conn);
assert!(dirty.is_empty(), "stale skip should not create dirty marks");
}
#[test]
fn test_toctou_allows_newer_issue() {
let conn = setup_db();
let config = test_config();
// Ingest at T1
let issue_t1 = make_test_issue(42, "2026-02-17T12:00:00.000+00:00");
ingest_issue_by_iid(&conn, &config, 1, &issue_t1).unwrap();
conn.execute("DELETE FROM dirty_sources", []).unwrap();
// Ingest at T2 (newer) — should succeed
let issue_t2 = make_test_issue(42, "2026-02-17T13:00:00.000+00:00");
let result = ingest_issue_by_iid(&conn, &config, 1, &issue_t2).unwrap();
assert!(!result.skipped_stale);
assert!(!result.skipped_stale);
}
#[test]
fn test_ingest_issue_returns_dirty_source_keys() {
let conn = setup_db();
let config = test_config();
let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00");
let result = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap();
assert_eq!(result.dirty_source_keys.len(), 1);
let (source_type, local_id) = &result.dirty_source_keys[0];
assert_eq!(source_type.as_str(), "issue");
assert!(*local_id > 0, "local_id should be positive");
}
#[test]
fn test_ingest_issue_updates_existing() {
let conn = setup_db();
let config = test_config();
let issue_v1 = make_test_issue(42, "2026-02-17T12:00:00.000+00:00");
ingest_issue_by_iid(&conn, &config, 1, &issue_v1).unwrap();
let ts1 = get_db_updated_at_helper(&conn, "issues", 42).unwrap();
// Newer version
let issue_v2 = make_test_issue(42, "2026-02-17T14:00:00.000+00:00");
let result = ingest_issue_by_iid(&conn, &config, 1, &issue_v2).unwrap();
assert!(!result.skipped_stale);
let ts2 = get_db_updated_at_helper(&conn, "issues", 42).unwrap();
assert!(ts2 > ts1, "DB timestamp should increase after update");
}
// ---------------------------------------------------------------------------
// MR ingestion tests
// ---------------------------------------------------------------------------
#[test]
fn test_ingest_mr_by_iid_upserts_and_marks_dirty() {
let conn = setup_db();
let config = test_config();
let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00");
let result = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap();
assert!(!result.skipped_stale);
assert!(!result.skipped_stale);
assert!(!result.dirty_source_keys.is_empty());
let db_ts = get_db_updated_at_helper(&conn, "merge_requests", 101);
assert!(db_ts.is_some(), "MR should exist in DB");
let dirty = get_dirty_keys(&conn);
assert!(
dirty.iter().any(|(t, _)| t == "merge_request"),
"dirty_sources should contain a merge_request entry"
);
}
#[test]
fn test_toctou_skips_stale_mr() {
let conn = setup_db();
let config = test_config();
let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00");
let r1 = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap();
assert!(!r1.skipped_stale);
conn.execute("DELETE FROM dirty_sources", []).unwrap();
let r2 = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap();
assert!(r2.skipped_stale);
assert!(r2.skipped_stale);
assert!(r2.dirty_source_keys.is_empty());
}
#[test]
fn test_toctou_allows_newer_mr() {
let conn = setup_db();
let config = test_config();
let mr_t1 = make_test_mr(101, "2026-02-17T12:00:00.000+00:00");
ingest_mr_by_iid(&conn, &config, 1, &mr_t1).unwrap();
conn.execute("DELETE FROM dirty_sources", []).unwrap();
let mr_t2 = make_test_mr(101, "2026-02-17T13:00:00.000+00:00");
let result = ingest_mr_by_iid(&conn, &config, 1, &mr_t2).unwrap();
assert!(!result.skipped_stale);
assert!(!result.skipped_stale);
}
#[test]
fn test_ingest_mr_returns_dirty_source_keys() {
let conn = setup_db();
let config = test_config();
let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00");
let result = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap();
assert_eq!(result.dirty_source_keys.len(), 1);
let (source_type, local_id) = &result.dirty_source_keys[0];
assert_eq!(source_type.as_str(), "merge_request");
assert!(*local_id > 0);
}
#[test]
fn test_ingest_mr_updates_existing() {
let conn = setup_db();
let config = test_config();
let mr_v1 = make_test_mr(101, "2026-02-17T12:00:00.000+00:00");
ingest_mr_by_iid(&conn, &config, 1, &mr_v1).unwrap();
let ts1 = get_db_updated_at_helper(&conn, "merge_requests", 101).unwrap();
let mr_v2 = make_test_mr(101, "2026-02-17T14:00:00.000+00:00");
let result = ingest_mr_by_iid(&conn, &config, 1, &mr_v2).unwrap();
assert!(!result.skipped_stale);
let ts2 = get_db_updated_at_helper(&conn, "merge_requests", 101).unwrap();
assert!(ts2 > ts1, "DB timestamp should increase after update");
}
// ---------------------------------------------------------------------------
// Preflight fetch test (wiremock)
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_preflight_fetch_returns_issues_and_mrs() {
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
let mock_server = MockServer::start().await;
// Issue fixture
let issue_json = serde_json::json!({
"id": 42000,
"iid": 42,
"project_id": 100,
"title": "Test issue 42",
"description": "desc",
"state": "opened",
"created_at": "2026-01-01T00:00:00.000+00:00",
"updated_at": "2026-02-17T12:00:00.000+00:00",
"author": {"id": 1, "username": "testuser", "name": "Test User"},
"assignees": [],
"labels": [],
"web_url": "https://example.com/group/repo/-/issues/42"
});
// MR fixture
let mr_json = serde_json::json!({
"id": 101000,
"iid": 101,
"project_id": 100,
"title": "Test MR 101",
"description": "mr desc",
"state": "opened",
"draft": false,
"work_in_progress": false,
"source_branch": "feature",
"target_branch": "main",
"sha": "abc123",
"created_at": "2026-01-01T00:00:00.000+00:00",
"updated_at": "2026-02-17T12:00:00.000+00:00",
"author": {"id": 1, "username": "testuser", "name": "Test User"},
"labels": [],
"assignees": [],
"reviewers": [],
"web_url": "https://example.com/group/repo/-/merge_requests/101"
});
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/issues/42"))
.respond_with(ResponseTemplate::new(200).set_body_json(&issue_json))
.mount(&mock_server)
.await;
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/merge_requests/101"))
.respond_with(ResponseTemplate::new(200).set_body_json(&mr_json))
.mount(&mock_server)
.await;
let client = GitLabClient::new(&mock_server.uri(), "test-token", None);
let targets = vec![
("issue".to_string(), 42i64),
("merge_request".to_string(), 101i64),
];
let result = preflight_fetch(&client, 100, &targets).await;
assert_eq!(result.issues.len(), 1);
assert_eq!(result.issues[0].iid, 42);
assert_eq!(result.merge_requests.len(), 1);
assert_eq!(result.merge_requests[0].iid, 101);
assert!(result.failures.is_empty());
}
// ---------------------------------------------------------------------------
// Dependent helper tests (bd-kanh)
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_fetch_dependents_for_issue_empty_events() {
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
let mock_server = MockServer::start().await;
let conn = setup_db();
let config = test_config();
// Insert an issue so we have a local_id
let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00");
ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap();
let local_id: i64 = conn
.query_row(
"SELECT id FROM issues WHERE project_id = 1 AND iid = 42",
[],
|row| row.get(0),
)
.unwrap();
// Mock empty resource event endpoints
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/issues/42/resource_state_events"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/issues/42/resource_label_events"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
Mock::given(method("GET"))
.and(path(
"/api/v4/projects/100/issues/42/resource_milestone_events",
))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
// Mock empty discussions endpoint
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/issues/42/discussions"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
let client = GitLabClient::new(&mock_server.uri(), "test-token", None);
let result = fetch_dependents_for_issue(&client, &conn, 1, 100, 42, local_id, &config)
.await
.unwrap();
assert_eq!(result.resource_events_fetched, 0);
assert_eq!(result.discussions_fetched, 0);
}
#[tokio::test]
async fn test_fetch_dependents_for_mr_empty_events() {
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
let mock_server = MockServer::start().await;
let conn = setup_db();
let config = test_config();
// Insert an MR so we have a local_id
let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00");
ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap();
let local_id: i64 = conn
.query_row(
"SELECT id FROM merge_requests WHERE project_id = 1 AND iid = 101",
[],
|row| row.get(0),
)
.unwrap();
// Mock empty resource event endpoints for MR
Mock::given(method("GET"))
.and(path(
"/api/v4/projects/100/merge_requests/101/resource_state_events",
))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
Mock::given(method("GET"))
.and(path(
"/api/v4/projects/100/merge_requests/101/resource_label_events",
))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
Mock::given(method("GET"))
.and(path(
"/api/v4/projects/100/merge_requests/101/resource_milestone_events",
))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
// Mock empty discussions endpoint for MR
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/merge_requests/101/discussions"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
// Mock empty closes_issues endpoint
Mock::given(method("GET"))
.and(path(
"/api/v4/projects/100/merge_requests/101/closes_issues",
))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
// Mock empty diffs endpoint
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/merge_requests/101/diffs"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
let client = GitLabClient::new(&mock_server.uri(), "test-token", None);
let result = fetch_dependents_for_mr(&client, &conn, 1, 100, 101, local_id, &config)
.await
.unwrap();
assert_eq!(result.resource_events_fetched, 0);
assert_eq!(result.discussions_fetched, 0);
assert_eq!(result.closes_issues_stored, 0);
assert_eq!(result.file_changes_stored, 0);
}
#[tokio::test]
async fn test_fetch_dependents_for_mr_with_closes_issues() {
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
let mock_server = MockServer::start().await;
let conn = setup_db();
let config = test_config();
// Insert issue and MR so references can resolve
let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00");
ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap();
let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00");
ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap();
let mr_local_id: i64 = conn
.query_row(
"SELECT id FROM merge_requests WHERE project_id = 1 AND iid = 101",
[],
|row| row.get(0),
)
.unwrap();
// Mock empty resource events
for endpoint in [
"resource_state_events",
"resource_label_events",
"resource_milestone_events",
] {
Mock::given(method("GET"))
.and(path(format!(
"/api/v4/projects/100/merge_requests/101/{endpoint}"
)))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
}
// Mock empty discussions
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/merge_requests/101/discussions"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
// Mock closes_issues with one reference
Mock::given(method("GET"))
.and(path(
"/api/v4/projects/100/merge_requests/101/closes_issues",
))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([
{
"id": 42000,
"iid": 42,
"project_id": 100,
"title": "Test issue 42",
"state": "opened",
"web_url": "https://example.com/group/repo/-/issues/42"
}
])))
.mount(&mock_server)
.await;
// Mock empty diffs
Mock::given(method("GET"))
.and(path("/api/v4/projects/100/merge_requests/101/diffs"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
let client = GitLabClient::new(&mock_server.uri(), "test-token", None);
let result = fetch_dependents_for_mr(&client, &conn, 1, 100, 101, mr_local_id, &config)
.await
.unwrap();
assert_eq!(result.closes_issues_stored, 1);
}