feat: Add commit SHAs, closes_issues watermark, and PRD alignment

Migration 015 adds merge_commit_sha/squash_commit_sha to merge_requests
(Gate 4/5 prerequisites), closes_issues_synced_for_updated_at watermark
for incremental sync, and the missing idx_label_events_label index.

The MR transformer and ingestion pipeline now populate commit SHAs during
sync. The orchestrator uses watermark-based filtering for closes_issues
jobs instead of re-enqueuing all MRs every sync.

The Phase B PRD is updated to match the actual codebase: corrected
migration numbering (011-015), documented nullable label/milestone
fields (migration 012), watermark patterns (013), observability
infrastructure (014), simplified source_method values, and updated
entity_references schema to match implementation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 15:29:51 -05:00
parent ddcfff1026
commit 233eb546af
7 changed files with 189 additions and 63 deletions

View File

@@ -24,6 +24,8 @@ pub struct NormalizedMergeRequest {
pub closed_at: Option<i64>,
pub last_seen_at: i64,
pub web_url: String,
pub merge_commit_sha: Option<String>,
pub squash_commit_sha: Option<String>,
}
#[derive(Debug, Clone)]
@@ -100,6 +102,8 @@ pub fn transform_merge_request(
closed_at,
last_seen_at: now_ms(),
web_url: gitlab_mr.web_url.clone(),
merge_commit_sha: gitlab_mr.merge_commit_sha.clone(),
squash_commit_sha: gitlab_mr.squash_commit_sha.clone(),
},
label_names: gitlab_mr.labels.clone(),
assignee_usernames,

View File

@@ -247,4 +247,6 @@ pub struct GitLabMergeRequest {
#[serde(default)]
pub reviewers: Vec<GitLabReviewer>,
pub web_url: String,
pub merge_commit_sha: Option<String>,
pub squash_commit_sha: Option<String>,
}

View File

@@ -180,8 +180,9 @@ fn process_mr_in_transaction(
author_username, source_branch, target_branch, head_sha,
references_short, references_full, detailed_merge_status,
merge_user_username, created_at, updated_at, merged_at, closed_at,
last_seen_at, web_url, raw_payload_id
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22)
last_seen_at, web_url, raw_payload_id,
merge_commit_sha, squash_commit_sha
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24)
ON CONFLICT(gitlab_id) DO UPDATE SET
title = excluded.title,
description = excluded.description,
@@ -200,7 +201,9 @@ fn process_mr_in_transaction(
closed_at = excluded.closed_at,
last_seen_at = excluded.last_seen_at,
web_url = excluded.web_url,
raw_payload_id = excluded.raw_payload_id",
raw_payload_id = excluded.raw_payload_id,
merge_commit_sha = excluded.merge_commit_sha,
squash_commit_sha = excluded.squash_commit_sha",
params![
mr_row.gitlab_id,
project_id,
@@ -224,6 +227,8 @@ fn process_mr_in_transaction(
now,
&mr_row.web_url,
payload_id,
&mr_row.merge_commit_sha,
&mr_row.squash_commit_sha,
],
)?;
@@ -368,7 +373,8 @@ fn reset_discussion_watermarks(conn: &Connection, project_id: i64) -> Result<()>
SET discussions_synced_for_updated_at = NULL,
discussions_sync_attempts = 0,
discussions_sync_last_error = NULL,
resource_events_synced_for_updated_at = NULL
resource_events_synced_for_updated_at = NULL,
closes_issues_synced_for_updated_at = NULL
WHERE project_id = ?",
[project_id],
)?;

View File

@@ -785,9 +785,32 @@ fn update_resource_event_watermark(
Ok(())
}
fn update_closes_issues_watermark(conn: &Connection, mr_local_id: i64) -> Result<()> {
conn.execute(
"UPDATE merge_requests SET closes_issues_synced_for_updated_at = updated_at WHERE id = ?",
[mr_local_id],
)?;
Ok(())
}
fn enqueue_mr_closes_issues_jobs(conn: &Connection, project_id: i64) -> Result<usize> {
let mut stmt =
conn.prepare_cached("SELECT id, iid FROM merge_requests WHERE project_id = ?1")?;
// Remove stale jobs for MRs that haven't changed since their last closes_issues sync
conn.execute(
"DELETE FROM pending_dependent_fetches \
WHERE project_id = ?1 AND entity_type = 'merge_request' AND job_type = 'mr_closes_issues' \
AND entity_local_id IN ( \
SELECT id FROM merge_requests \
WHERE project_id = ?1 \
AND updated_at <= COALESCE(closes_issues_synced_for_updated_at, 0) \
)",
[project_id],
)?;
let mut stmt = conn.prepare_cached(
"SELECT id, iid FROM merge_requests \
WHERE project_id = ?1 \
AND updated_at > COALESCE(closes_issues_synced_for_updated_at, 0)",
)?;
let entities: Vec<(i64, i64)> = stmt
.query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
.collect::<std::result::Result<Vec<_>, _>>()?;
@@ -886,6 +909,7 @@ async fn drain_mr_closes_issues(
match store_result {
Ok(()) => {
complete_job(conn, job.id)?;
update_closes_issues_watermark(conn, job.entity_local_id)?;
result.fetched += 1;
}
Err(e) => {
@@ -907,6 +931,7 @@ async fn drain_mr_closes_issues(
"Permanent API error for closes_issues, marking complete"
);
complete_job(conn, job.id)?;
update_closes_issues_watermark(conn, job.entity_local_id)?;
result.skipped_not_found += 1;
} else {
warn!(