feat: Add commit SHAs, closes_issues watermark, and PRD alignment

Migration 015 adds merge_commit_sha/squash_commit_sha to merge_requests (Gate 4/5 prerequisites), closes_issues_synced_for_updated_at watermark for incremental sync, and the missing idx_label_events_label index. The MR transformer and ingestion pipeline now populate commit SHAs during sync. The orchestrator uses watermark-based filtering for closes_issues jobs instead of re-enqueuing all MRs every sync. The Phase B PRD is updated to match the actual codebase: corrected migration numbering (011-015), documented nullable label/milestone fields (migration 012), watermark patterns (013), observability infrastructure (014), simplified source_method values, and updated entity_references schema to match implementation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 15:29:51 -05:00
parent ddcfff1026
commit 233eb546af
7 changed files with 189 additions and 63 deletions
--- a/docs/phase-b-temporal-intelligence.md
+++ b/docs/phase-b-temporal-intelligence.md
@@ -39,7 +39,7 @@ Five gates, each independently verifiable and shippable:
 - **Opt-in event ingestion.** New config flag `sync.fetchResourceEvents` (default `true`) controls whether the sync pipeline fetches event data. Users who don't need temporal features skip the additional API calls.
 - **Application-level graph traversal.** Cross-reference expansion uses BFS in Rust, not recursive SQL CTEs. Capped at configurable depth (default 1) for predictable performance.
 - **Evolutionary library extraction.** New commands are built with typed return structs from day one. Old commands are not retrofitted until a concrete consumer (MCP server, web UI) requires it.
- **Phase A fields cherry-picked as needed.** `merge_commit_sha` and `squash_commit_sha` are added in this phase's migration. Remaining Phase A fields are handled in their own migration later.
+- **Phase A fields cherry-picked as needed.** `merge_commit_sha` and `squash_commit_sha` are added in migration 015 and populated during MR ingestion. Remaining Phase A fields are handled in their own migration later.
 ### Scope Boundaries
@@ -71,9 +71,9 @@ The original approach was to parse system note body text with regex to extract s
 System note parsing is still used for events without structured APIs (see Gate 2), but with the explicit understanding that it's best-effort and fragile for non-English instances.
-### 1.2 Schema (Migration 010)
+### 1.2 Schema (Migration 011)
-**File:** `migrations/010_resource_events.sql`
+**File:** `migrations/011_resource_events.sql`
 ```sql
 -- State change events (opened, closed, reopened, merged, locked)
@@ -89,16 +89,16 @@ CREATE TABLE resource_state_events (
  actor_gitlab_id INTEGER,            -- GitLab user ID (stable; usernames can change)
  actor_username TEXT,                -- display/search convenience
  created_at INTEGER NOT NULL,        -- ms epoch UTC
  -- "closed by MR" link: structured by GitLab, not parsed from text
  source_merge_request_id INTEGER,    -- GitLab's MR iid that caused this state change
  source_commit TEXT,                 -- commit SHA that caused this state change
-  UNIQUE(gitlab_id, project_id),
+  source_merge_request_iid INTEGER,   -- iid from source_merge_request ref
  CHECK (
    (issue_id IS NOT NULL AND merge_request_id IS NULL)
    OR (issue_id IS NULL AND merge_request_id IS NOT NULL)
  )
 );
 CREATE UNIQUE INDEX uq_state_events_gitlab ON resource_state_events(gitlab_id, project_id);
 CREATE INDEX idx_state_events_issue ON resource_state_events(issue_id)
  WHERE issue_id IS NOT NULL;
 CREATE INDEX idx_state_events_mr ON resource_state_events(merge_request_id)
@@ -114,24 +114,25 @@ CREATE TABLE resource_label_events (
  project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
  issue_id INTEGER REFERENCES issues(id) ON DELETE CASCADE,
  merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,
  label_name TEXT NOT NULL,
  action TEXT NOT NULL CHECK (action IN ('add', 'remove')),
-  actor_gitlab_id INTEGER,            -- GitLab user ID (stable; usernames can change)
+  label_name TEXT,                    -- nullable: GitLab returns null for deleted labels (see §1.2.1)
-  actor_username TEXT,                -- display/search convenience
+  actor_gitlab_id INTEGER,
  actor_username TEXT,
  created_at INTEGER NOT NULL,        -- ms epoch UTC
  UNIQUE(gitlab_id, project_id),
  CHECK (
    (issue_id IS NOT NULL AND merge_request_id IS NULL)
    OR (issue_id IS NULL AND merge_request_id IS NOT NULL)
  )
 );
 CREATE UNIQUE INDEX uq_label_events_gitlab ON resource_label_events(gitlab_id, project_id);
 CREATE INDEX idx_label_events_issue ON resource_label_events(issue_id)
  WHERE issue_id IS NOT NULL;
 CREATE INDEX idx_label_events_mr ON resource_label_events(merge_request_id)
  WHERE merge_request_id IS NOT NULL;
 CREATE INDEX idx_label_events_created ON resource_label_events(created_at);
-CREATE INDEX idx_label_events_label ON resource_label_events(label_name);
+-- Note: idx_label_events_label was added in migration 015 (not in the original 011)
 -- Milestone change events (add, remove)
 -- Source: GET /projects/:id/issues/:iid/resource_milestone_events
@@ -142,19 +143,20 @@ CREATE TABLE resource_milestone_events (
  project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
  issue_id INTEGER REFERENCES issues(id) ON DELETE CASCADE,
  merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,
  milestone_title TEXT NOT NULL,
  milestone_id INTEGER,
  action TEXT NOT NULL CHECK (action IN ('add', 'remove')),
-  actor_gitlab_id INTEGER,            -- GitLab user ID (stable; usernames can change)
+  milestone_title TEXT,               -- nullable: GitLab returns null for deleted milestones (see §1.2.1)
-  actor_username TEXT,                -- display/search convenience
+  milestone_id INTEGER,
  actor_gitlab_id INTEGER,
  actor_username TEXT,
  created_at INTEGER NOT NULL,        -- ms epoch UTC
  UNIQUE(gitlab_id, project_id),
  CHECK (
    (issue_id IS NOT NULL AND merge_request_id IS NULL)
    OR (issue_id IS NULL AND merge_request_id IS NOT NULL)
  )
 );
 CREATE UNIQUE INDEX uq_milestone_events_gitlab ON resource_milestone_events(gitlab_id, project_id);
 CREATE INDEX idx_milestone_events_issue ON resource_milestone_events(issue_id)
  WHERE issue_id IS NOT NULL;
 CREATE INDEX idx_milestone_events_mr ON resource_milestone_events(merge_request_id)
@@ -162,6 +164,27 @@ CREATE INDEX idx_milestone_events_mr ON resource_milestone_events(merge_request_
 CREATE INDEX idx_milestone_events_created ON resource_milestone_events(created_at);
 ```
 #### 1.2.1 Nullable Label and Milestone Fields (Migration 012)
 GitLab returns `null` for `label` and `milestone` in Resource Events when the referenced label or milestone has been deleted from the project. This was discovered in production after the initial schema deployed with `NOT NULL` constraints.
 **Migration 012** recreates `resource_label_events` and `resource_milestone_events` with nullable `label_name` and `milestone_title` columns. The table-swap approach (create new → copy → drop old → rename) is required because SQLite doesn't support `ALTER COLUMN`.
 Timeline queries that encounter null labels/milestones display `"[deleted label]"` or `"[deleted milestone]"` in human output and omit the name field in robot JSON.
 #### 1.2.2 Resource Event Watermarks (Migration 013)
 To avoid re-fetching resource events for every entity on every sync, a watermark column tracks the `updated_at` value at the time of last successful event fetch:
 ```sql
 ALTER TABLE issues ADD COLUMN resource_events_synced_for_updated_at INTEGER;
 ALTER TABLE merge_requests ADD COLUMN resource_events_synced_for_updated_at INTEGER;
 ```
 **Incremental behavior:** During sync, only entities where `updated_at > COALESCE(resource_events_synced_for_updated_at, 0)` are enqueued for resource event fetching. On `--full` sync, these watermarks are reset to `NULL`, causing all entities to be re-enqueued.
 This mirrors the existing `discussions_synced_for_updated_at` pattern and works in conjunction with the dependent fetch queue.
 ### 1.3 Config Extension
 **File:** `src/core/config.rs`
@@ -223,7 +246,7 @@ pub struct GitLabLabelEvent {
    pub created_at: String,
    pub resource_type: String,
    pub resource_id: i64,
-    pub label: GitLabLabelRef,
+    pub label: Option<GitLabLabelRef>,  // nullable: deleted labels return null
    pub action: String,              // "add" | "remove"
 }
@@ -234,7 +257,7 @@ pub struct GitLabMilestoneEvent {
    pub created_at: String,
    pub resource_type: String,
    pub resource_id: i64,
-    pub milestone: GitLabMilestoneRef,
+    pub milestone: Option<GitLabMilestoneRef>,  // nullable: deleted milestones return null
    pub action: String,              // "add" | "remove"
 }
 ```
@@ -243,7 +266,7 @@ pub struct GitLabMilestoneEvent {
 **Architecture:** Generic dependent-fetch queue, generalizing the `pending_discussion_fetches` pattern. A single queue table serves all dependent resource types across Gates 1, 2, and 4, avoiding schema churn as new fetch types are added.
-**New queue table (in migration 010):**
+**New queue table (in migration 011):**
 ```sql
 -- Generic queue for all dependent resource fetches (events, closes_issues, diffs)
@@ -302,16 +325,32 @@ Acceptable for initial sync. Incremental sync adds negligible overhead.
 ### 1.7 Acceptance Criteria
- [ ] Migration 010 creates all three event tables + generic dependent fetch queue
+- [x] Migration 011 creates all three event tables + generic dependent fetch queue
- [ ] `lore sync` fetches resource events for changed entities when `fetchResourceEvents` is true
+- [x] `lore sync` fetches resource events for changed entities when `fetchResourceEvents` is true
- [ ] `lore sync --no-events` skips event fetching
+- [x] `lore sync --no-events` skips event fetching
- [ ] Event fetch failures are queued for retry with exponential backoff
+- [x] Event fetch failures are queued for retry with exponential backoff
- [ ] Stale locks (crashed sync) automatically reclaimed on next run
+- [x] Stale locks (crashed sync) automatically reclaimed on next run
- [ ] `lore count events` shows event counts by type
+- [x] `lore count events` shows event counts by type
 - [ ] `lore stats --check` validates event table referential integrity
 - [ ] `lore stats --check` validates dependent job queue health (no stuck locks, retryable jobs visible)
 - [ ] Robot mode JSON for all new commands
 ### 1.8 Observability Infrastructure (Migration 014)
 The sync pipeline includes lightweight observability via `sync_runs` enrichment. Migration 014 adds:
 ```sql
 ALTER TABLE sync_runs ADD COLUMN run_id TEXT;                  -- correlation ID for log tracing
 ALTER TABLE sync_runs ADD COLUMN total_items_processed INTEGER DEFAULT 0;
 ALTER TABLE sync_runs ADD COLUMN total_errors INTEGER DEFAULT 0;
 CREATE INDEX IF NOT EXISTS idx_sync_runs_run_id ON sync_runs(run_id);
 ```
 **Purpose:** The `run_id` column correlates log entries (via `tracing`) with sync run records. `total_items_processed` and `total_errors` provide aggregate counts for `lore sync-status` and robot mode health checks without requiring log parsing.
 This is separate from the event tables but supports the same operational workflow — answering "did the last sync succeed?" and "how many entities were processed?" programmatically.
 ---
 ## Gate 2: Cross-Reference Extraction
@@ -320,10 +359,10 @@ Acceptable for initial sync. Incremental sync adds negligible overhead.
 Temporal queries need to follow links between entities: "MR !567 closed issue #234", "issue #234 mentioned in MR !567", "#299 was opened as a follow-up to !567". These relationships are captured in two places:
-1. **Structured API:** `GET /projects/:id/merge_requests/:iid/closes_issues` returns issues that close when the MR merges. Also, `resource_state_events` includes `source_merge_request_id` for "closed by MR" events.
+1. **Structured API:** `GET /projects/:id/merge_requests/:iid/closes_issues` returns issues that close when the MR merges. Also, `resource_state_events` includes `source_merge_request_iid` for "closed by MR" events.
 2. **System notes:** Cross-references like "mentioned in !456" and "closed by !789" appear in system note body text.
-### 2.2 Schema (in Migration 010)
+### 2.2 Schema (in Migration 011)
 ```sql
 -- Cross-references between entities
@@ -340,33 +379,49 @@ Temporal queries need to follow links between entities: "MR !567 closed issue #2
 -- silently dropping them. Timeline output marks these as "[external]".
 CREATE TABLE entity_references (
  id INTEGER PRIMARY KEY,
  project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
  source_entity_type TEXT NOT NULL CHECK (source_entity_type IN ('issue', 'merge_request')),
  source_entity_id INTEGER NOT NULL,   -- local DB id
  target_entity_type TEXT NOT NULL CHECK (target_entity_type IN ('issue', 'merge_request')),
  target_entity_id INTEGER,            -- local DB id (NULL when target is unresolved/external)
  target_project_path TEXT,            -- e.g. "group/other-repo" (populated for cross-project refs)
  target_entity_iid INTEGER,           -- GitLab iid (populated when target_entity_id is NULL)
-  reference_type TEXT NOT NULL,        -- 'closes' | 'mentioned' | 'related'
+  reference_type TEXT NOT NULL CHECK (reference_type IN ('closes', 'mentioned', 'related')),
-  source_method TEXT NOT NULL,         -- 'api_closes_issues' | 'api_state_event' | 'system_note_parse'
+  source_method TEXT NOT NULL CHECK (source_method IN ('api', 'note_parse', 'description_parse')),
-  created_at INTEGER,                  -- when the reference was created (if known)
+  created_at INTEGER NOT NULL          -- ms epoch UTC
  UNIQUE(source_entity_type, source_entity_id, target_entity_type,
         COALESCE(target_entity_id, -1), COALESCE(target_project_path, ''),
         COALESCE(target_entity_iid, -1), reference_type)
 );
-CREATE INDEX idx_refs_source ON entity_references(source_entity_type, source_entity_id);
+-- Unique constraint includes source_method: the same relationship can be discovered by
-CREATE INDEX idx_refs_target ON entity_references(target_entity_type, target_entity_id)
+-- multiple methods (e.g., closes_issues API and a state event), and we store both for provenance.
 CREATE UNIQUE INDEX uq_entity_refs ON entity_references(
  project_id, source_entity_type, source_entity_id, target_entity_type,
  COALESCE(target_entity_id, -1), COALESCE(target_project_path, ''),
  COALESCE(target_entity_iid, -1), reference_type, source_method
 );
 CREATE INDEX idx_entity_refs_source ON entity_references(source_entity_type, source_entity_id);
 CREATE INDEX idx_entity_refs_target ON entity_references(target_entity_id)
  WHERE target_entity_id IS NOT NULL;
-CREATE INDEX idx_refs_unresolved ON entity_references(target_project_path, target_entity_iid)
+CREATE INDEX idx_entity_refs_unresolved ON entity_references(target_project_path, target_entity_iid)
  WHERE target_entity_id IS NULL;
 ```
 **`source_method` values:**
 | Value | Meaning |
 |-------|---------|
 | `'api'` | Populated from structured GitLab APIs (`closes_issues`, `resource_state_events`) |
 | `'note_parse'` | Extracted from system note body text (best-effort, English only) |
 | `'description_parse'` | Extracted from issue/MR description body text (future) |
 The original design used more granular values (`'api_closes_issues'`, `'api_state_event'`, `'system_note_parse'`). In practice, the API-sourced references don't need sub-method distinction — the `reference_type` already captures the semantic relationship — so the implementation simplified to three values.
 ### 2.3 Population Strategy
 **Tier 1 — Structured APIs (reliable):**
-1. **`closes_issues` endpoint:** After MR ingestion, fetch `GET /projects/:id/merge_requests/:iid/closes_issues`. Insert `reference_type = 'closes'`, `source_method = 'api_closes_issues'`. Source = MR, target = issue.
+1. **`closes_issues` endpoint:** After MR ingestion, fetch `GET /projects/:id/merge_requests/:iid/closes_issues`. Insert `reference_type = 'closes'`, `source_method = 'api'`. Source = MR, target = issue.
-2. **State events:** When `resource_state_events` contains `source_merge_request_id`, insert `reference_type = 'closes'`, `source_method = 'api_state_event'`. Source = MR (referenced by iid), target = issue (that received the state change).
+2. **State events:** When `resource_state_events` contains `source_merge_request_iid`, insert `reference_type = 'closes'`, `source_method = 'api'`. Source = MR (referenced by iid), target = issue (that received the state change).
 **Tier 2 — System note parsing (best-effort):**
@@ -385,14 +440,14 @@ closed by #{iid}
 **Cross-project references:** When a system note references `{group}/{project}#{iid}` and the target project is not synced locally, store with `target_entity_id = NULL`, `target_project_path = '{group}/{project}'`, `target_entity_iid = {iid}`. These unresolved references are still valuable for timeline narratives — they indicate external dependencies and decision context even when we can't traverse further.
-Insert with `source_method = 'system_note_parse'`. Accept that:
+Insert with `source_method = 'note_parse'`. Accept that:
 - This breaks on non-English GitLab instances
 - Format may vary across GitLab versions
 - Log parse failures at `debug` level for monitoring
-**Tier 3 — Description/body parsing (deferred):**
+**Tier 3 — Description/body parsing (`source_method = 'description_parse'`, deferred):**
-Issue and MR descriptions often contain `#123` or `!456` references. Parsing these is lower confidence (mentions != relationships) and is deferred to a future iteration.
+Issue and MR descriptions often contain `#123` or `!456` references. Parsing these is lower confidence (mentions != relationships) and is deferred to a future iteration. The `source_method` value `'description_parse'` is reserved in the CHECK constraint for this future work.
 ### 2.4 Ingestion Flow
@@ -401,6 +456,8 @@ The `closes_issues` fetch uses the generic dependent fetch queue (`job_type = 'm
 - One additional API call per MR: `GET /projects/:id/merge_requests/:iid/closes_issues`
 - Cross-reference parsing from system notes runs as a local post-processing step (no API calls) after all dependent fetches complete
 **Watermark pattern (migration 015):** A `closes_issues_synced_for_updated_at` column on `merge_requests` tracks the last `updated_at` value at which closes_issues data was fetched. Only MRs where `updated_at > COALESCE(closes_issues_synced_for_updated_at, 0)` are enqueued for re-fetching. The watermark is updated after successful fetch or after a permanent API error (e.g., 404 for external MRs). On `--full` sync, the watermark is reset to `NULL`.
 ### 2.5 Acceptance Criteria
 - [ ] `entity_references` table populated from `closes_issues` API for all synced MRs
@@ -562,7 +619,7 @@ Evidence notes (`NOTE` events) show the first ~200 characters of FTS5-matched no
        "via": {
          "from": { "type": "merge_request", "iid": 567, "project": "group/repo" },
          "reference_type": "closes",
-          "source_method": "api_closes_issues"
+          "source_method": "api"
        }
      }
    ],
@@ -639,9 +696,13 @@ Evidence notes (`NOTE` events) show the first ~200 characters of FTS5-matched no
 ## Gate 4: File Decision History (`lore file-history`)
-### 4.1 Schema (Migration 011)
+### 4.1 Schema
-**File:** `migrations/011_file_changes.sql`
+**Commit SHAs (Migration 015 — already applied):**
 `merge_commit_sha` and `squash_commit_sha` were added to `merge_requests` in migration 015. These are now populated during MR ingestion and available for Gate 4/5 queries.
 **File changes table (future migration — not yet created):**
 ```sql
 -- Files changed by each merge request
@@ -660,11 +721,6 @@ CREATE INDEX idx_mr_files_new_path ON mr_file_changes(new_path);
 CREATE INDEX idx_mr_files_old_path ON mr_file_changes(old_path)
  WHERE old_path IS NOT NULL;
 CREATE INDEX idx_mr_files_mr ON mr_file_changes(merge_request_id);
 -- Add commit SHAs to merge_requests (cherry-picked from Phase A)
 -- These link MRs to actual git history
 ALTER TABLE merge_requests ADD COLUMN merge_commit_sha TEXT;
 ALTER TABLE merge_requests ADD COLUMN squash_commit_sha TEXT;
 ```
 ### 4.2 Config Extension
@@ -881,14 +937,16 @@ When git integration is added:
 ### Migration Numbering
-Phase B uses migration numbers starting at 010:
+Phase B uses migration numbers 011–015. The original plan assumed migration 010 was available, but chunk config (`010_chunk_config.sql`) was implemented first, shifting everything by +1.
-| Migration | Content | Gate |
+| Migration | File | Content | Gate |
-|-----------|---------|------|
+|-----------|------|---------|------|
-| 010 | Resource event tables, generic dependent fetch queue, entity_references | Gates 1, 2 |
+| 011 | `011_resource_events.sql` | Resource event tables (state, label, milestone), entity_references, generic dependent fetch queue | Gates 1, 2 |
-| 011 | mr_file_changes, merge_commit_sha, squash_commit_sha | Gate 4 |
+| 012 | `012_nullable_label_milestone.sql` | Make `label_name` and `milestone_title` nullable for deleted labels/milestones | Gate 1 (fix) |
-
+| 013 | `013_resource_event_watermarks.sql` | Add `resource_events_synced_for_updated_at` to issues and merge_requests | Gate 1 (optimization) |
-Phase A's complete field capture migration should use 012+ when implemented, skipping fields already added by 011 (`merge_commit_sha`, `squash_commit_sha`).
+| 014 | `014_sync_runs_enrichment.sql` | Observability: `run_id`, `total_items_processed`, `total_errors` on sync_runs | Observability |
 | 015 | `015_commit_shas_and_closes_watermark.sql` | `merge_commit_sha`, `squash_commit_sha`, `closes_issues_synced_for_updated_at` on merge_requests; `idx_label_events_label` index | Gates 2, 4 |
 | TBD | — | `mr_file_changes` table for MR diff data | Gate 4 |
 ### Backward Compatibility
@@ -909,7 +967,7 @@ Phase A's complete field capture migration should use 012+ when implemented, ski
 | GitLab diffs API returns large payloads | Low | Extract file metadata only, discard diff content |
 | Cross-reference graph traversal unbounded | Medium | BFS depth capped at configurable limit (default 1); `mentioned` edges excluded by default |
 | Cross-project references lost when target not synced | Medium | Unresolved references stored with `target_entity_id = NULL`; still appear in timeline output |
-| Phase A migration numbering conflict | Low | Phase B uses 010-011; Phase A uses 012+ |
+| Phase A migration numbering conflict | Low | Resolved: chunk config took 010; Phase B shifted to 011-015 |
 | Timeline output lacks "why" evidence | Medium | Evidence-bearing notes from FTS5 included as first-class timeline events |
 | Squash commits break blame-to-MR mapping | Medium | Tier 2 (git integration) deferred; Tier 1 uses file-level MR matching |
--- a/migrations/015_commit_shas_and_closes_watermark.sql
+++ b/migrations/015_commit_shas_and_closes_watermark.sql
@@ -0,0 +1,17 @@
 -- Migration 015: Add commit SHAs to merge_requests, closes_issues watermark,
 -- and missing label_name index on resource_label_events.
 -- Commit SHAs link MRs to actual git history (needed for Gate 4: file-history, Gate 5: trace)
 ALTER TABLE merge_requests ADD COLUMN merge_commit_sha TEXT;
 ALTER TABLE merge_requests ADD COLUMN squash_commit_sha TEXT;
 -- Watermark for closes_issues sync (same pattern as resource_events_synced_for_updated_at)
 -- Prevents re-fetching closes_issues for MRs that haven't changed since last sync
 ALTER TABLE merge_requests ADD COLUMN closes_issues_synced_for_updated_at INTEGER;
 -- Missing index from original spec: enables efficient label-name filtering in timeline queries
 CREATE INDEX IF NOT EXISTS idx_label_events_label ON resource_label_events(label_name);
 -- Update schema version
 INSERT INTO schema_version (version, applied_at, description)
 VALUES (15, strftime('%s', 'now') * 1000, 'Add commit SHAs, closes_issues watermark, and label event index');
--- a/src/gitlab/transformers/merge_request.rs
+++ b/src/gitlab/transformers/merge_request.rs
@@ -24,6 +24,8 @@ pub struct NormalizedMergeRequest {
    pub closed_at: Option<i64>,
    pub last_seen_at: i64,
    pub web_url: String,
    pub merge_commit_sha: Option<String>,
    pub squash_commit_sha: Option<String>,
 }
 #[derive(Debug, Clone)]
@@ -100,6 +102,8 @@ pub fn transform_merge_request(
            closed_at,
            last_seen_at: now_ms(),
            web_url: gitlab_mr.web_url.clone(),
            merge_commit_sha: gitlab_mr.merge_commit_sha.clone(),
            squash_commit_sha: gitlab_mr.squash_commit_sha.clone(),
        },
        label_names: gitlab_mr.labels.clone(),
        assignee_usernames,
--- a/src/gitlab/types.rs
+++ b/src/gitlab/types.rs
@@ -247,4 +247,6 @@ pub struct GitLabMergeRequest {
    #[serde(default)]
    pub reviewers: Vec<GitLabReviewer>,
    pub web_url: String,
    pub merge_commit_sha: Option<String>,
    pub squash_commit_sha: Option<String>,
 }
--- a/src/ingestion/merge_requests.rs
+++ b/src/ingestion/merge_requests.rs
@@ -180,8 +180,9 @@ fn process_mr_in_transaction(
            author_username, source_branch, target_branch, head_sha,
            references_short, references_full, detailed_merge_status,
            merge_user_username, created_at, updated_at, merged_at, closed_at,
-            last_seen_at, web_url, raw_payload_id
+            last_seen_at, web_url, raw_payload_id,
-        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22)
+            merge_commit_sha, squash_commit_sha
        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24)
        ON CONFLICT(gitlab_id) DO UPDATE SET
            title = excluded.title,
            description = excluded.description,
@@ -200,7 +201,9 @@ fn process_mr_in_transaction(
            closed_at = excluded.closed_at,
            last_seen_at = excluded.last_seen_at,
            web_url = excluded.web_url,
-            raw_payload_id = excluded.raw_payload_id",
+            raw_payload_id = excluded.raw_payload_id,
            merge_commit_sha = excluded.merge_commit_sha,
            squash_commit_sha = excluded.squash_commit_sha",
        params![
            mr_row.gitlab_id,
            project_id,
@@ -224,6 +227,8 @@ fn process_mr_in_transaction(
            now,
            &mr_row.web_url,
            payload_id,
            &mr_row.merge_commit_sha,
            &mr_row.squash_commit_sha,
        ],
    )?;
@@ -368,7 +373,8 @@ fn reset_discussion_watermarks(conn: &Connection, project_id: i64) -> Result<()>
         SET discussions_synced_for_updated_at = NULL,
             discussions_sync_attempts = 0,
             discussions_sync_last_error = NULL,
-             resource_events_synced_for_updated_at = NULL
+             resource_events_synced_for_updated_at = NULL,
             closes_issues_synced_for_updated_at = NULL
         WHERE project_id = ?",
        [project_id],
    )?;
--- a/src/ingestion/orchestrator.rs
+++ b/src/ingestion/orchestrator.rs
@@ -785,9 +785,32 @@ fn update_resource_event_watermark(
    Ok(())
 }
 fn update_closes_issues_watermark(conn: &Connection, mr_local_id: i64) -> Result<()> {
    conn.execute(
        "UPDATE merge_requests SET closes_issues_synced_for_updated_at = updated_at WHERE id = ?",
        [mr_local_id],
    )?;
    Ok(())
 }
 fn enqueue_mr_closes_issues_jobs(conn: &Connection, project_id: i64) -> Result<usize> {
-    let mut stmt =
+    // Remove stale jobs for MRs that haven't changed since their last closes_issues sync
-        conn.prepare_cached("SELECT id, iid FROM merge_requests WHERE project_id = ?1")?;
+    conn.execute(
        "DELETE FROM pending_dependent_fetches \
         WHERE project_id = ?1 AND entity_type = 'merge_request' AND job_type = 'mr_closes_issues' \
           AND entity_local_id IN ( \
               SELECT id FROM merge_requests \
               WHERE project_id = ?1 \
                 AND updated_at <= COALESCE(closes_issues_synced_for_updated_at, 0) \
           )",
        [project_id],
    )?;
    let mut stmt = conn.prepare_cached(
        "SELECT id, iid FROM merge_requests \
         WHERE project_id = ?1 \
           AND updated_at > COALESCE(closes_issues_synced_for_updated_at, 0)",
    )?;
    let entities: Vec<(i64, i64)> = stmt
        .query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
        .collect::<std::result::Result<Vec<_>, _>>()?;
@@ -886,6 +909,7 @@ async fn drain_mr_closes_issues(
                    match store_result {
                        Ok(()) => {
                            complete_job(conn, job.id)?;
                            update_closes_issues_watermark(conn, job.entity_local_id)?;
                            result.fetched += 1;
                        }
                        Err(e) => {
@@ -907,6 +931,7 @@ async fn drain_mr_closes_issues(
                            "Permanent API error for closes_issues, marking complete"
                        );
                        complete_job(conn, job.id)?;
                        update_closes_issues_watermark(conn, job.entity_local_id)?;
                        result.skipped_not_found += 1;
                    } else {
                        warn!(
--- a/tests/mr_transformer_tests.rs
+++ b/tests/mr_transformer_tests.rs
@@ -51,6 +51,8 @@ fn make_test_mr() -> GitLabMergeRequest {
            name: "Alice Wong".to_string(),
        }],
        web_url: "https://gitlab.example.com/group/project/-/merge_requests/42".to_string(),
        merge_commit_sha: Some("merge1234567890".to_string()),
        squash_commit_sha: None,
    }
 }
@@ -98,6 +100,18 @@ fn transforms_mr_with_all_fields() {
    );
 }
 #[test]
 fn preserves_commit_shas() {
    let mr = make_test_mr();
    let result = transform_merge_request(&mr, 200).unwrap();
    assert_eq!(
        result.merge_request.merge_commit_sha,
        Some("merge1234567890".to_string())
    );
    assert!(result.merge_request.squash_commit_sha.is_none());
 }
 #[test]
 fn parses_timestamps_to_ms_epoch() {
    let mr = make_test_mr();