2 Commits

Author SHA1 Message Date
Taylor Eernisse
a7d5d1c99f chore(beads): Update issue tracker metadata
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 15:30:01 -05:00
Taylor Eernisse
233eb546af feat: Add commit SHAs, closes_issues watermark, and PRD alignment
Migration 015 adds merge_commit_sha/squash_commit_sha to merge_requests
(Gate 4/5 prerequisites), closes_issues_synced_for_updated_at watermark
for incremental sync, and the missing idx_label_events_label index.

The MR transformer and ingestion pipeline now populate commit SHAs during
sync. The orchestrator uses watermark-based filtering for closes_issues
jobs instead of re-enqueuing all MRs every sync.

The Phase B PRD is updated to match the actual codebase: corrected
migration numbering (011-015), documented nullable label/milestone
fields (migration 012), watermark patterns (013), observability
infrastructure (014), simplified source_method values, and updated
entity_references schema to match implementation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 15:29:51 -05:00
9 changed files with 196 additions and 70 deletions

File diff suppressed because one or more lines are too long

View File

@@ -1 +1 @@
bd-dty
bd-2fc

View File

@@ -39,7 +39,7 @@ Five gates, each independently verifiable and shippable:
- **Opt-in event ingestion.** New config flag `sync.fetchResourceEvents` (default `true`) controls whether the sync pipeline fetches event data. Users who don't need temporal features skip the additional API calls.
- **Application-level graph traversal.** Cross-reference expansion uses BFS in Rust, not recursive SQL CTEs. Capped at configurable depth (default 1) for predictable performance.
- **Evolutionary library extraction.** New commands are built with typed return structs from day one. Old commands are not retrofitted until a concrete consumer (MCP server, web UI) requires it.
- **Phase A fields cherry-picked as needed.** `merge_commit_sha` and `squash_commit_sha` are added in this phase's migration. Remaining Phase A fields are handled in their own migration later.
- **Phase A fields cherry-picked as needed.** `merge_commit_sha` and `squash_commit_sha` are added in migration 015 and populated during MR ingestion. Remaining Phase A fields are handled in their own migration later.
### Scope Boundaries
@@ -71,9 +71,9 @@ The original approach was to parse system note body text with regex to extract s
System note parsing is still used for events without structured APIs (see Gate 2), but with the explicit understanding that it's best-effort and fragile for non-English instances.
### 1.2 Schema (Migration 010)
### 1.2 Schema (Migration 011)
**File:** `migrations/010_resource_events.sql`
**File:** `migrations/011_resource_events.sql`
```sql
-- State change events (opened, closed, reopened, merged, locked)
@@ -89,16 +89,16 @@ CREATE TABLE resource_state_events (
actor_gitlab_id INTEGER, -- GitLab user ID (stable; usernames can change)
actor_username TEXT, -- display/search convenience
created_at INTEGER NOT NULL, -- ms epoch UTC
-- "closed by MR" link: structured by GitLab, not parsed from text
source_merge_request_id INTEGER, -- GitLab's MR iid that caused this state change
source_commit TEXT, -- commit SHA that caused this state change
UNIQUE(gitlab_id, project_id),
source_merge_request_iid INTEGER, -- iid from source_merge_request ref
CHECK (
(issue_id IS NOT NULL AND merge_request_id IS NULL)
OR (issue_id IS NULL AND merge_request_id IS NOT NULL)
)
);
CREATE UNIQUE INDEX uq_state_events_gitlab ON resource_state_events(gitlab_id, project_id);
CREATE INDEX idx_state_events_issue ON resource_state_events(issue_id)
WHERE issue_id IS NOT NULL;
CREATE INDEX idx_state_events_mr ON resource_state_events(merge_request_id)
@@ -114,24 +114,25 @@ CREATE TABLE resource_label_events (
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
issue_id INTEGER REFERENCES issues(id) ON DELETE CASCADE,
merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,
label_name TEXT NOT NULL,
action TEXT NOT NULL CHECK (action IN ('add', 'remove')),
actor_gitlab_id INTEGER, -- GitLab user ID (stable; usernames can change)
actor_username TEXT, -- display/search convenience
label_name TEXT, -- nullable: GitLab returns null for deleted labels (see §1.2.1)
actor_gitlab_id INTEGER,
actor_username TEXT,
created_at INTEGER NOT NULL, -- ms epoch UTC
UNIQUE(gitlab_id, project_id),
CHECK (
(issue_id IS NOT NULL AND merge_request_id IS NULL)
OR (issue_id IS NULL AND merge_request_id IS NOT NULL)
)
);
CREATE UNIQUE INDEX uq_label_events_gitlab ON resource_label_events(gitlab_id, project_id);
CREATE INDEX idx_label_events_issue ON resource_label_events(issue_id)
WHERE issue_id IS NOT NULL;
CREATE INDEX idx_label_events_mr ON resource_label_events(merge_request_id)
WHERE merge_request_id IS NOT NULL;
CREATE INDEX idx_label_events_created ON resource_label_events(created_at);
CREATE INDEX idx_label_events_label ON resource_label_events(label_name);
-- Note: idx_label_events_label was added in migration 015 (not in the original 011)
-- Milestone change events (add, remove)
-- Source: GET /projects/:id/issues/:iid/resource_milestone_events
@@ -142,19 +143,20 @@ CREATE TABLE resource_milestone_events (
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
issue_id INTEGER REFERENCES issues(id) ON DELETE CASCADE,
merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,
milestone_title TEXT NOT NULL,
milestone_id INTEGER,
action TEXT NOT NULL CHECK (action IN ('add', 'remove')),
actor_gitlab_id INTEGER, -- GitLab user ID (stable; usernames can change)
actor_username TEXT, -- display/search convenience
milestone_title TEXT, -- nullable: GitLab returns null for deleted milestones (see §1.2.1)
milestone_id INTEGER,
actor_gitlab_id INTEGER,
actor_username TEXT,
created_at INTEGER NOT NULL, -- ms epoch UTC
UNIQUE(gitlab_id, project_id),
CHECK (
(issue_id IS NOT NULL AND merge_request_id IS NULL)
OR (issue_id IS NULL AND merge_request_id IS NOT NULL)
)
);
CREATE UNIQUE INDEX uq_milestone_events_gitlab ON resource_milestone_events(gitlab_id, project_id);
CREATE INDEX idx_milestone_events_issue ON resource_milestone_events(issue_id)
WHERE issue_id IS NOT NULL;
CREATE INDEX idx_milestone_events_mr ON resource_milestone_events(merge_request_id)
@@ -162,6 +164,27 @@ CREATE INDEX idx_milestone_events_mr ON resource_milestone_events(merge_request_
CREATE INDEX idx_milestone_events_created ON resource_milestone_events(created_at);
```
#### 1.2.1 Nullable Label and Milestone Fields (Migration 012)
GitLab returns `null` for `label` and `milestone` in Resource Events when the referenced label or milestone has been deleted from the project. This was discovered in production after the initial schema deployed with `NOT NULL` constraints.
**Migration 012** recreates `resource_label_events` and `resource_milestone_events` with nullable `label_name` and `milestone_title` columns. The table-swap approach (create new → copy → drop old → rename) is required because SQLite doesn't support `ALTER COLUMN`.
Timeline queries that encounter null labels/milestones display `"[deleted label]"` or `"[deleted milestone]"` in human output and omit the name field in robot JSON.
#### 1.2.2 Resource Event Watermarks (Migration 013)
To avoid re-fetching resource events for every entity on every sync, a watermark column tracks the `updated_at` value at the time of last successful event fetch:
```sql
ALTER TABLE issues ADD COLUMN resource_events_synced_for_updated_at INTEGER;
ALTER TABLE merge_requests ADD COLUMN resource_events_synced_for_updated_at INTEGER;
```
**Incremental behavior:** During sync, only entities where `updated_at > COALESCE(resource_events_synced_for_updated_at, 0)` are enqueued for resource event fetching. On `--full` sync, these watermarks are reset to `NULL`, causing all entities to be re-enqueued.
This mirrors the existing `discussions_synced_for_updated_at` pattern and works in conjunction with the dependent fetch queue.
### 1.3 Config Extension
**File:** `src/core/config.rs`
@@ -223,7 +246,7 @@ pub struct GitLabLabelEvent {
pub created_at: String,
pub resource_type: String,
pub resource_id: i64,
pub label: GitLabLabelRef,
pub label: Option<GitLabLabelRef>, // nullable: deleted labels return null
pub action: String, // "add" | "remove"
}
@@ -234,7 +257,7 @@ pub struct GitLabMilestoneEvent {
pub created_at: String,
pub resource_type: String,
pub resource_id: i64,
pub milestone: GitLabMilestoneRef,
pub milestone: Option<GitLabMilestoneRef>, // nullable: deleted milestones return null
pub action: String, // "add" | "remove"
}
```
@@ -243,7 +266,7 @@ pub struct GitLabMilestoneEvent {
**Architecture:** Generic dependent-fetch queue, generalizing the `pending_discussion_fetches` pattern. A single queue table serves all dependent resource types across Gates 1, 2, and 4, avoiding schema churn as new fetch types are added.
**New queue table (in migration 010):**
**New queue table (in migration 011):**
```sql
-- Generic queue for all dependent resource fetches (events, closes_issues, diffs)
@@ -302,16 +325,32 @@ Acceptable for initial sync. Incremental sync adds negligible overhead.
### 1.7 Acceptance Criteria
- [ ] Migration 010 creates all three event tables + generic dependent fetch queue
- [ ] `lore sync` fetches resource events for changed entities when `fetchResourceEvents` is true
- [ ] `lore sync --no-events` skips event fetching
- [ ] Event fetch failures are queued for retry with exponential backoff
- [ ] Stale locks (crashed sync) automatically reclaimed on next run
- [ ] `lore count events` shows event counts by type
- [x] Migration 011 creates all three event tables + generic dependent fetch queue
- [x] `lore sync` fetches resource events for changed entities when `fetchResourceEvents` is true
- [x] `lore sync --no-events` skips event fetching
- [x] Event fetch failures are queued for retry with exponential backoff
- [x] Stale locks (crashed sync) automatically reclaimed on next run
- [x] `lore count events` shows event counts by type
- [ ] `lore stats --check` validates event table referential integrity
- [ ] `lore stats --check` validates dependent job queue health (no stuck locks, retryable jobs visible)
- [ ] Robot mode JSON for all new commands
### 1.8 Observability Infrastructure (Migration 014)
The sync pipeline includes lightweight observability via `sync_runs` enrichment. Migration 014 adds:
```sql
ALTER TABLE sync_runs ADD COLUMN run_id TEXT; -- correlation ID for log tracing
ALTER TABLE sync_runs ADD COLUMN total_items_processed INTEGER DEFAULT 0;
ALTER TABLE sync_runs ADD COLUMN total_errors INTEGER DEFAULT 0;
CREATE INDEX IF NOT EXISTS idx_sync_runs_run_id ON sync_runs(run_id);
```
**Purpose:** The `run_id` column correlates log entries (via `tracing`) with sync run records. `total_items_processed` and `total_errors` provide aggregate counts for `lore sync-status` and robot mode health checks without requiring log parsing.
This is separate from the event tables but supports the same operational workflow — answering "did the last sync succeed?" and "how many entities were processed?" programmatically.
---
## Gate 2: Cross-Reference Extraction
@@ -320,10 +359,10 @@ Acceptable for initial sync. Incremental sync adds negligible overhead.
Temporal queries need to follow links between entities: "MR !567 closed issue #234", "issue #234 mentioned in MR !567", "#299 was opened as a follow-up to !567". These relationships are captured in two places:
1. **Structured API:** `GET /projects/:id/merge_requests/:iid/closes_issues` returns issues that close when the MR merges. Also, `resource_state_events` includes `source_merge_request_id` for "closed by MR" events.
1. **Structured API:** `GET /projects/:id/merge_requests/:iid/closes_issues` returns issues that close when the MR merges. Also, `resource_state_events` includes `source_merge_request_iid` for "closed by MR" events.
2. **System notes:** Cross-references like "mentioned in !456" and "closed by !789" appear in system note body text.
### 2.2 Schema (in Migration 010)
### 2.2 Schema (in Migration 011)
```sql
-- Cross-references between entities
@@ -340,33 +379,49 @@ Temporal queries need to follow links between entities: "MR !567 closed issue #2
-- silently dropping them. Timeline output marks these as "[external]".
CREATE TABLE entity_references (
id INTEGER PRIMARY KEY,
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
source_entity_type TEXT NOT NULL CHECK (source_entity_type IN ('issue', 'merge_request')),
source_entity_id INTEGER NOT NULL, -- local DB id
target_entity_type TEXT NOT NULL CHECK (target_entity_type IN ('issue', 'merge_request')),
target_entity_id INTEGER, -- local DB id (NULL when target is unresolved/external)
target_project_path TEXT, -- e.g. "group/other-repo" (populated for cross-project refs)
target_entity_iid INTEGER, -- GitLab iid (populated when target_entity_id is NULL)
reference_type TEXT NOT NULL, -- 'closes' | 'mentioned' | 'related'
source_method TEXT NOT NULL, -- 'api_closes_issues' | 'api_state_event' | 'system_note_parse'
created_at INTEGER, -- when the reference was created (if known)
UNIQUE(source_entity_type, source_entity_id, target_entity_type,
COALESCE(target_entity_id, -1), COALESCE(target_project_path, ''),
COALESCE(target_entity_iid, -1), reference_type)
reference_type TEXT NOT NULL CHECK (reference_type IN ('closes', 'mentioned', 'related')),
source_method TEXT NOT NULL CHECK (source_method IN ('api', 'note_parse', 'description_parse')),
created_at INTEGER NOT NULL -- ms epoch UTC
);
CREATE INDEX idx_refs_source ON entity_references(source_entity_type, source_entity_id);
CREATE INDEX idx_refs_target ON entity_references(target_entity_type, target_entity_id)
-- Unique constraint includes source_method: the same relationship can be discovered by
-- multiple methods (e.g., closes_issues API and a state event), and we store both for provenance.
CREATE UNIQUE INDEX uq_entity_refs ON entity_references(
project_id, source_entity_type, source_entity_id, target_entity_type,
COALESCE(target_entity_id, -1), COALESCE(target_project_path, ''),
COALESCE(target_entity_iid, -1), reference_type, source_method
);
CREATE INDEX idx_entity_refs_source ON entity_references(source_entity_type, source_entity_id);
CREATE INDEX idx_entity_refs_target ON entity_references(target_entity_id)
WHERE target_entity_id IS NOT NULL;
CREATE INDEX idx_refs_unresolved ON entity_references(target_project_path, target_entity_iid)
CREATE INDEX idx_entity_refs_unresolved ON entity_references(target_project_path, target_entity_iid)
WHERE target_entity_id IS NULL;
```
**`source_method` values:**
| Value | Meaning |
|-------|---------|
| `'api'` | Populated from structured GitLab APIs (`closes_issues`, `resource_state_events`) |
| `'note_parse'` | Extracted from system note body text (best-effort, English only) |
| `'description_parse'` | Extracted from issue/MR description body text (future) |
The original design used more granular values (`'api_closes_issues'`, `'api_state_event'`, `'system_note_parse'`). In practice, the API-sourced references don't need sub-method distinction — the `reference_type` already captures the semantic relationship — so the implementation simplified to three values.
### 2.3 Population Strategy
**Tier 1 — Structured APIs (reliable):**
1. **`closes_issues` endpoint:** After MR ingestion, fetch `GET /projects/:id/merge_requests/:iid/closes_issues`. Insert `reference_type = 'closes'`, `source_method = 'api_closes_issues'`. Source = MR, target = issue.
2. **State events:** When `resource_state_events` contains `source_merge_request_id`, insert `reference_type = 'closes'`, `source_method = 'api_state_event'`. Source = MR (referenced by iid), target = issue (that received the state change).
1. **`closes_issues` endpoint:** After MR ingestion, fetch `GET /projects/:id/merge_requests/:iid/closes_issues`. Insert `reference_type = 'closes'`, `source_method = 'api'`. Source = MR, target = issue.
2. **State events:** When `resource_state_events` contains `source_merge_request_iid`, insert `reference_type = 'closes'`, `source_method = 'api'`. Source = MR (referenced by iid), target = issue (that received the state change).
**Tier 2 — System note parsing (best-effort):**
@@ -385,14 +440,14 @@ closed by #{iid}
**Cross-project references:** When a system note references `{group}/{project}#{iid}` and the target project is not synced locally, store with `target_entity_id = NULL`, `target_project_path = '{group}/{project}'`, `target_entity_iid = {iid}`. These unresolved references are still valuable for timeline narratives — they indicate external dependencies and decision context even when we can't traverse further.
Insert with `source_method = 'system_note_parse'`. Accept that:
Insert with `source_method = 'note_parse'`. Accept that:
- This breaks on non-English GitLab instances
- Format may vary across GitLab versions
- Log parse failures at `debug` level for monitoring
**Tier 3 — Description/body parsing (deferred):**
**Tier 3 — Description/body parsing (`source_method = 'description_parse'`, deferred):**
Issue and MR descriptions often contain `#123` or `!456` references. Parsing these is lower confidence (mentions != relationships) and is deferred to a future iteration.
Issue and MR descriptions often contain `#123` or `!456` references. Parsing these is lower confidence (mentions != relationships) and is deferred to a future iteration. The `source_method` value `'description_parse'` is reserved in the CHECK constraint for this future work.
### 2.4 Ingestion Flow
@@ -401,6 +456,8 @@ The `closes_issues` fetch uses the generic dependent fetch queue (`job_type = 'm
- One additional API call per MR: `GET /projects/:id/merge_requests/:iid/closes_issues`
- Cross-reference parsing from system notes runs as a local post-processing step (no API calls) after all dependent fetches complete
**Watermark pattern (migration 015):** A `closes_issues_synced_for_updated_at` column on `merge_requests` tracks the last `updated_at` value at which closes_issues data was fetched. Only MRs where `updated_at > COALESCE(closes_issues_synced_for_updated_at, 0)` are enqueued for re-fetching. The watermark is updated after successful fetch or after a permanent API error (e.g., 404 for external MRs). On `--full` sync, the watermark is reset to `NULL`.
### 2.5 Acceptance Criteria
- [ ] `entity_references` table populated from `closes_issues` API for all synced MRs
@@ -562,7 +619,7 @@ Evidence notes (`NOTE` events) show the first ~200 characters of FTS5-matched no
"via": {
"from": { "type": "merge_request", "iid": 567, "project": "group/repo" },
"reference_type": "closes",
"source_method": "api_closes_issues"
"source_method": "api"
}
}
],
@@ -639,9 +696,13 @@ Evidence notes (`NOTE` events) show the first ~200 characters of FTS5-matched no
## Gate 4: File Decision History (`lore file-history`)
### 4.1 Schema (Migration 011)
### 4.1 Schema
**File:** `migrations/011_file_changes.sql`
**Commit SHAs (Migration 015 — already applied):**
`merge_commit_sha` and `squash_commit_sha` were added to `merge_requests` in migration 015. These are now populated during MR ingestion and available for Gate 4/5 queries.
**File changes table (future migration — not yet created):**
```sql
-- Files changed by each merge request
@@ -660,11 +721,6 @@ CREATE INDEX idx_mr_files_new_path ON mr_file_changes(new_path);
CREATE INDEX idx_mr_files_old_path ON mr_file_changes(old_path)
WHERE old_path IS NOT NULL;
CREATE INDEX idx_mr_files_mr ON mr_file_changes(merge_request_id);
-- Add commit SHAs to merge_requests (cherry-picked from Phase A)
-- These link MRs to actual git history
ALTER TABLE merge_requests ADD COLUMN merge_commit_sha TEXT;
ALTER TABLE merge_requests ADD COLUMN squash_commit_sha TEXT;
```
### 4.2 Config Extension
@@ -881,14 +937,16 @@ When git integration is added:
### Migration Numbering
Phase B uses migration numbers starting at 010:
Phase B uses migration numbers 011015. The original plan assumed migration 010 was available, but chunk config (`010_chunk_config.sql`) was implemented first, shifting everything by +1.
| Migration | Content | Gate |
|-----------|---------|------|
| 010 | Resource event tables, generic dependent fetch queue, entity_references | Gates 1, 2 |
| 011 | mr_file_changes, merge_commit_sha, squash_commit_sha | Gate 4 |
Phase A's complete field capture migration should use 012+ when implemented, skipping fields already added by 011 (`merge_commit_sha`, `squash_commit_sha`).
| Migration | File | Content | Gate |
|-----------|------|---------|------|
| 011 | `011_resource_events.sql` | Resource event tables (state, label, milestone), entity_references, generic dependent fetch queue | Gates 1, 2 |
| 012 | `012_nullable_label_milestone.sql` | Make `label_name` and `milestone_title` nullable for deleted labels/milestones | Gate 1 (fix) |
| 013 | `013_resource_event_watermarks.sql` | Add `resource_events_synced_for_updated_at` to issues and merge_requests | Gate 1 (optimization) |
| 014 | `014_sync_runs_enrichment.sql` | Observability: `run_id`, `total_items_processed`, `total_errors` on sync_runs | Observability |
| 015 | `015_commit_shas_and_closes_watermark.sql` | `merge_commit_sha`, `squash_commit_sha`, `closes_issues_synced_for_updated_at` on merge_requests; `idx_label_events_label` index | Gates 2, 4 |
| TBD | — | `mr_file_changes` table for MR diff data | Gate 4 |
### Backward Compatibility
@@ -909,7 +967,7 @@ Phase A's complete field capture migration should use 012+ when implemented, ski
| GitLab diffs API returns large payloads | Low | Extract file metadata only, discard diff content |
| Cross-reference graph traversal unbounded | Medium | BFS depth capped at configurable limit (default 1); `mentioned` edges excluded by default |
| Cross-project references lost when target not synced | Medium | Unresolved references stored with `target_entity_id = NULL`; still appear in timeline output |
| Phase A migration numbering conflict | Low | Phase B uses 010-011; Phase A uses 012+ |
| Phase A migration numbering conflict | Low | Resolved: chunk config took 010; Phase B shifted to 011-015 |
| Timeline output lacks "why" evidence | Medium | Evidence-bearing notes from FTS5 included as first-class timeline events |
| Squash commits break blame-to-MR mapping | Medium | Tier 2 (git integration) deferred; Tier 1 uses file-level MR matching |

View File

@@ -0,0 +1,17 @@
-- Migration 015: Add commit SHAs to merge_requests, closes_issues watermark,
-- and missing label_name index on resource_label_events.
-- Commit SHAs link MRs to actual git history (needed for Gate 4: file-history, Gate 5: trace)
ALTER TABLE merge_requests ADD COLUMN merge_commit_sha TEXT;
ALTER TABLE merge_requests ADD COLUMN squash_commit_sha TEXT;
-- Watermark for closes_issues sync (same pattern as resource_events_synced_for_updated_at)
-- Prevents re-fetching closes_issues for MRs that haven't changed since last sync
ALTER TABLE merge_requests ADD COLUMN closes_issues_synced_for_updated_at INTEGER;
-- Missing index from original spec: enables efficient label-name filtering in timeline queries
CREATE INDEX IF NOT EXISTS idx_label_events_label ON resource_label_events(label_name);
-- Update schema version
INSERT INTO schema_version (version, applied_at, description)
VALUES (15, strftime('%s', 'now') * 1000, 'Add commit SHAs, closes_issues watermark, and label event index');

View File

@@ -24,6 +24,8 @@ pub struct NormalizedMergeRequest {
pub closed_at: Option<i64>,
pub last_seen_at: i64,
pub web_url: String,
pub merge_commit_sha: Option<String>,
pub squash_commit_sha: Option<String>,
}
#[derive(Debug, Clone)]
@@ -100,6 +102,8 @@ pub fn transform_merge_request(
closed_at,
last_seen_at: now_ms(),
web_url: gitlab_mr.web_url.clone(),
merge_commit_sha: gitlab_mr.merge_commit_sha.clone(),
squash_commit_sha: gitlab_mr.squash_commit_sha.clone(),
},
label_names: gitlab_mr.labels.clone(),
assignee_usernames,

View File

@@ -247,4 +247,6 @@ pub struct GitLabMergeRequest {
#[serde(default)]
pub reviewers: Vec<GitLabReviewer>,
pub web_url: String,
pub merge_commit_sha: Option<String>,
pub squash_commit_sha: Option<String>,
}

View File

@@ -180,8 +180,9 @@ fn process_mr_in_transaction(
author_username, source_branch, target_branch, head_sha,
references_short, references_full, detailed_merge_status,
merge_user_username, created_at, updated_at, merged_at, closed_at,
last_seen_at, web_url, raw_payload_id
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22)
last_seen_at, web_url, raw_payload_id,
merge_commit_sha, squash_commit_sha
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24)
ON CONFLICT(gitlab_id) DO UPDATE SET
title = excluded.title,
description = excluded.description,
@@ -200,7 +201,9 @@ fn process_mr_in_transaction(
closed_at = excluded.closed_at,
last_seen_at = excluded.last_seen_at,
web_url = excluded.web_url,
raw_payload_id = excluded.raw_payload_id",
raw_payload_id = excluded.raw_payload_id,
merge_commit_sha = excluded.merge_commit_sha,
squash_commit_sha = excluded.squash_commit_sha",
params![
mr_row.gitlab_id,
project_id,
@@ -224,6 +227,8 @@ fn process_mr_in_transaction(
now,
&mr_row.web_url,
payload_id,
&mr_row.merge_commit_sha,
&mr_row.squash_commit_sha,
],
)?;
@@ -368,7 +373,8 @@ fn reset_discussion_watermarks(conn: &Connection, project_id: i64) -> Result<()>
SET discussions_synced_for_updated_at = NULL,
discussions_sync_attempts = 0,
discussions_sync_last_error = NULL,
resource_events_synced_for_updated_at = NULL
resource_events_synced_for_updated_at = NULL,
closes_issues_synced_for_updated_at = NULL
WHERE project_id = ?",
[project_id],
)?;

View File

@@ -785,9 +785,32 @@ fn update_resource_event_watermark(
Ok(())
}
fn update_closes_issues_watermark(conn: &Connection, mr_local_id: i64) -> Result<()> {
conn.execute(
"UPDATE merge_requests SET closes_issues_synced_for_updated_at = updated_at WHERE id = ?",
[mr_local_id],
)?;
Ok(())
}
fn enqueue_mr_closes_issues_jobs(conn: &Connection, project_id: i64) -> Result<usize> {
let mut stmt =
conn.prepare_cached("SELECT id, iid FROM merge_requests WHERE project_id = ?1")?;
// Remove stale jobs for MRs that haven't changed since their last closes_issues sync
conn.execute(
"DELETE FROM pending_dependent_fetches \
WHERE project_id = ?1 AND entity_type = 'merge_request' AND job_type = 'mr_closes_issues' \
AND entity_local_id IN ( \
SELECT id FROM merge_requests \
WHERE project_id = ?1 \
AND updated_at <= COALESCE(closes_issues_synced_for_updated_at, 0) \
)",
[project_id],
)?;
let mut stmt = conn.prepare_cached(
"SELECT id, iid FROM merge_requests \
WHERE project_id = ?1 \
AND updated_at > COALESCE(closes_issues_synced_for_updated_at, 0)",
)?;
let entities: Vec<(i64, i64)> = stmt
.query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
.collect::<std::result::Result<Vec<_>, _>>()?;
@@ -886,6 +909,7 @@ async fn drain_mr_closes_issues(
match store_result {
Ok(()) => {
complete_job(conn, job.id)?;
update_closes_issues_watermark(conn, job.entity_local_id)?;
result.fetched += 1;
}
Err(e) => {
@@ -907,6 +931,7 @@ async fn drain_mr_closes_issues(
"Permanent API error for closes_issues, marking complete"
);
complete_job(conn, job.id)?;
update_closes_issues_watermark(conn, job.entity_local_id)?;
result.skipped_not_found += 1;
} else {
warn!(

View File

@@ -51,6 +51,8 @@ fn make_test_mr() -> GitLabMergeRequest {
name: "Alice Wong".to_string(),
}],
web_url: "https://gitlab.example.com/group/project/-/merge_requests/42".to_string(),
merge_commit_sha: Some("merge1234567890".to_string()),
squash_commit_sha: None,
}
}
@@ -98,6 +100,18 @@ fn transforms_mr_with_all_fields() {
);
}
#[test]
fn preserves_commit_shas() {
let mr = make_test_mr();
let result = transform_merge_request(&mr, 200).unwrap();
assert_eq!(
result.merge_request.merge_commit_sha,
Some("merge1234567890".to_string())
);
assert!(result.merge_request.squash_commit_sha.is_none());
}
#[test]
fn parses_timestamps_to_ms_epoch() {
let mr = make_test_mr();