From c8dece8c60c63934982d141fcc05a3638d002d31 Mon Sep 17 00:00:00 2001 From: teernisse Date: Thu, 19 Feb 2026 08:01:55 -0500 Subject: [PATCH] feat(cli): add 'lore related' semantic similarity command (bd-8con) Adds 'lore related' / 'lore similar' command for discovering semantically related issues and MRs using vector embeddings. Two modes: - Entity mode: find entities similar to a specific issue/MR - Query mode: embed free text and find matching entities Includes distance-to-similarity conversion, label intersection, human and robot output formatters, and 11 unit tests. --- .beads/issues.jsonl | 16 +- .beads/last-touched | 2 +- crates/lore-tui/src/app/tests.rs | 117 +++ crates/lore-tui/src/app/update.rs | 18 + migrations/028_surgical_sync_runs.sql | 20 + src/cli/autocorrect.rs | 4 + src/cli/commands/mod.rs | 4 + src/cli/commands/related.rs | 692 ++++++++++++++++++ src/cli/commands/sync.rs | 131 ++++ src/cli/commands/sync_surgical.rs | 462 ++++++++++++ src/cli/commands/sync_surgical_tests.rs | 323 +++++++++ src/cli/mod.rs | 46 ++ src/core/db.rs | 4 + src/core/error.rs | 87 +++ src/core/sync_run.rs | 73 ++ src/core/sync_run_tests.rs | 244 +++++++ src/documents/mod.rs | 5 +- src/documents/regenerator.rs | 69 ++ src/documents/regenerator_tests.rs | 62 ++ src/gitlab/client.rs | 153 ++++ src/ingestion/issues.rs | 2 +- src/ingestion/merge_requests.rs | 10 +- src/ingestion/mod.rs | 1 + src/ingestion/orchestrator.rs | 4 +- src/ingestion/surgical.rs | 462 ++++++++++++ src/ingestion/surgical_tests.rs | 913 ++++++++++++++++++++++++ src/main.rs | 175 ++++- 27 files changed, 4066 insertions(+), 33 deletions(-) create mode 100644 migrations/028_surgical_sync_runs.sql create mode 100644 src/cli/commands/related.rs create mode 100644 src/cli/commands/sync_surgical.rs create mode 100644 src/cli/commands/sync_surgical_tests.rs create mode 100644 src/ingestion/surgical.rs create mode 100644 src/ingestion/surgical_tests.rs diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index ae4c68b..8bf7113 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -8,7 +8,7 @@ {"id":"bd-13q8","title":"Implement Rust-side decay aggregation with reviewer split","description":"## Background\nThe current accumulation (who.rs ~line 780-810) maps SQL rows directly to Expert structs with integer scores computed in SQL. The new model receives per-signal rows from build_expert_sql() (bd-1hoq) and needs Rust-side decay computation, reviewer split, closed MR multiplier, and deterministic f64 ordering. This bead wires the new SQL into query_expert() and replaces the accumulation logic.\n\n## Approach\nModify query_expert() (who.rs:641) to:\n1. Call build_expert_sql() instead of the inline SQL\n2. Bind 6 params: path, since_ms, project_id, as_of_ms, closed_mr_multiplier, reviewer_min_note_chars\n3. Execute and iterate rows: (username, signal, mr_id, qty, ts, state_mult)\n4. Accumulate into per-user UserAccum structs\n5. Compute decayed scores with deterministic ordering\n6. Build Expert structs from accumulators\n\n### Updated query_expert() signature:\n```rust\n#[allow(clippy::too_many_arguments)]\nfn query_expert(\n conn: &Connection,\n path: &str,\n project_id: Option,\n since_ms: i64,\n as_of_ms: i64,\n limit: usize,\n scoring: &ScoringConfig,\n detail: bool,\n explain_score: bool,\n include_bots: bool,\n) -> Result\n```\n\n### CRITICAL: Existing callsite updates\nChanging the signature from 7 to 10 params breaks ALL existing callers. There are 17 callsites that must be updated:\n\n**Production (1):**\n- run_who() at line ~311: Updated by bd-11mg (CLI flags bead), not this bead. To keep code compiling between bd-13q8 and bd-11mg, update this callsite with default values: `query_expert(conn, path, project_id, since_ms, now_ms(), limit, scoring, detail, false, false)`\n\n**Tests (16):**\nUpdate ALL test callsites to the new 10-param signature. The new params use defaults that preserve current behavior:\n- `as_of_ms` = `now_ms() + 1000` (slightly in future, ensures all test data is within window)\n- `explain_score` = `false`\n- `include_bots` = `false`\n\nLines to update (current line numbers):\n2879, 3127, 3208, 3214, 3226, 3252, 3291, 3325, 3345, 3398, 3563, 3572, 3588, 3625, 3651, 3658\n\nPattern: replace `query_expert(&conn, path, None, 0, limit, &scoring, detail)` with `query_expert(&conn, path, None, 0, now_ms() + 1000, limit, &scoring, detail, false, false)`\n\n### Per-user accumulator:\n```rust\nstruct UserAccum {\n author_mrs: HashMap, // mr_id -> (max_ts, state_mult)\n reviewer_participated: HashMap, // mr_id -> (max_ts, state_mult)\n reviewer_assigned: HashMap, // mr_id -> (max_ts, state_mult)\n notes_per_mr: HashMap, // mr_id -> (count, max_ts, state_mult)\n last_seen: i64,\n components: Option<[f64; 4]>, // when explain_score: [author, participated, assigned, notes]\n}\n```\n\n**Key**: state_mult is f64 from SQL (computed in mr_activity CTE), NOT computed from mr_state string in Rust.\n\n### Signal routing:\n- `diffnote_author` / `file_author` -> author_mrs (max ts + state_mult per mr_id)\n- `diffnote_reviewer` / `file_reviewer_participated` -> reviewer_participated\n- `file_reviewer_assigned` -> reviewer_assigned (skip if mr_id already in reviewer_participated)\n- `note_group` -> notes_per_mr (qty from SQL row, max ts + state_mult)\n\n### Deterministic score computation:\nSort each HashMap entries into a Vec sorted by mr_id ASC, then sum:\n```\nraw_score =\n sum(author_weight * state_mult * decay(as_of_ms - ts, author_hl) for (mr, ts, sm) in author_mrs sorted)\n + sum(reviewer_weight * state_mult * decay(as_of_ms - ts, reviewer_hl) for ... sorted)\n + sum(reviewer_assignment_weight * state_mult * decay(as_of_ms - ts, reviewer_assignment_hl) for ... sorted)\n + sum(note_bonus * state_mult * log2(1 + count) * decay(as_of_ms - ts, note_hl) for ... sorted)\n```\n\n### Expert struct additions (who.rs:141-154):\n```rust\npub score_raw: Option, // unrounded f64, only when explain_score\npub components: Option, // only when explain_score\n```\n\nAdd new struct:\n```rust\npub struct ScoreComponents {\n pub author: f64,\n pub reviewer_participated: f64,\n pub reviewer_assigned: f64,\n pub notes: f64,\n}\n```\n\n### Bot filtering:\nPost-query: if !include_bots, filter out usernames in scoring.excluded_usernames (case-insensitive via .to_lowercase() comparison).\n\n## TDD Loop\n\n### RED (write these 13 tests first):\n\n**Core decay integration:**\n- test_expert_scores_decay_with_time: recent (10d) vs old (360d), recent scores ~24, old ~6\n- test_expert_reviewer_decays_faster_than_author: same MR at 90d, author > reviewer\n- test_reviewer_participated_vs_assigned_only: participated ~10*decay vs assigned ~3*decay\n- test_note_diminishing_returns_per_mr: 20-note/1-note ratio ~4.4x not 20x\n- test_file_change_timestamp_uses_merged_at: merged MR uses merged_at not updated_at\n- test_open_mr_uses_updated_at: opened MR uses updated_at\n- test_old_path_match_credits_expertise: query old path -> author appears\n- test_closed_mr_multiplier: closed MR at 0.5x merged (state_mult from SQL)\n- test_trivial_note_does_not_count_as_participation: 4-char LGTM -> assigned-only\n- test_null_timestamp_fallback_to_created_at: merged with NULL merged_at\n- test_row_order_independence: different insert order -> identical rankings\n- test_reviewer_split_is_exhaustive: every reviewer in exactly one bucket\n- test_deterministic_accumulation_order: 100 runs, bit-identical f64\n\nAll tests use insert_mr_at/insert_diffnote_at from bd-2yu5 for timestamp control, and call the NEW query_expert() with 10 params.\n\n### GREEN: Wire build_expert_sql into query_expert, implement UserAccum + scoring loop, update all 17 existing callsites.\n### VERIFY: cargo test -p lore -- test_expert_scores test_reviewer_participated test_note_diminishing\n\n## Acceptance Criteria\n- [ ] All 13 new tests pass green\n- [ ] All 16 existing test callsites updated to 10-param signature\n- [ ] Production caller (run_who at ~line 311) updated with default values\n- [ ] Existing who tests pass unchanged (decay ~1.0 for now_ms() data)\n- [ ] state_mult comes from SQL f64 column, NOT from string matching on mr_state\n- [ ] reviewer_assigned excludes mr_ids already in reviewer_participated\n- [ ] Deterministic: 100 runs produce bit-identical f64 (sorted by mr_id)\n- [ ] Bot filtering applied when include_bots=false\n- [ ] cargo check --all-targets passes (no broken callers)\n\n## Files\n- MODIFY: src/cli/commands/who.rs (query_expert at line 641, Expert struct at line 141, all test callsites)\n\n## Edge Cases\n- log2(1.0 + 0) = 0.0 — zero notes contribute nothing\n- f64 NaN: half_life_decay guards hl=0\n- HashMap to sorted Vec for deterministic summing\n- as_of_ms: use passed value, not now_ms()\n- state_mult is always 1.0 or closed_mr_multiplier (from SQL) — no other values possible\n- Production caller uses now_ms() as as_of_ms default until bd-11mg adds --as-of flag","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-09T17:00:01.764110Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.412694Z","closed_at":"2026-02-12T20:43:04.412646Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"],"dependencies":[{"issue_id":"bd-13q8","depends_on_id":"bd-1hoq","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-13q8","depends_on_id":"bd-1soz","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-13q8","depends_on_id":"bd-2yu5","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} {"id":"bd-140","title":"[CP1] Database migration 002_issues.sql","description":"Create migration file with tables for issues, labels, issue_labels, discussions, and notes.\n\nTables to create:\n- issues: gitlab_id, project_id, iid, title, description, state, author_username, timestamps, web_url, raw_payload_id\n- labels: gitlab_id, project_id, name, color, description (unique on project_id+name)\n- issue_labels: junction table\n- discussions: gitlab_discussion_id, project_id, issue_id, noteable_type, individual_note, timestamps, resolvable/resolved\n- notes: gitlab_id, discussion_id, project_id, type, is_system, author_username, body, timestamps, position, resolution fields, DiffNote position fields\n\nInclude appropriate indexes:\n- idx_issues_project_updated, idx_issues_author, uq_issues_project_iid\n- uq_labels_project_name, idx_labels_name\n- idx_issue_labels_label\n- uq_discussions_project_discussion_id, idx_discussions_issue/mr/last_note\n- idx_notes_discussion/author/system\n\nFiles: migrations/002_issues.sql\nDone when: Migration applies cleanly on top of 001_initial.sql","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:18:53.954039Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.154936Z","closed_at":"2026-01-25T15:21:35.154936Z","deleted_at":"2026-01-25T15:21:35.154934Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-14hv","title":"Implement soak test + concurrent pagination/write race tests","description":"## Background\nThe 30-minute soak test verifies no panic, deadlock, or memory leak under sustained use. Concurrent pagination/write race tests prove browse snapshot fences prevent duplicate or skipped rows during sync writes.\n\n## Approach\nSoak test:\n- Automated script that drives the TUI for 30 minutes: random navigation, filter changes, sync starts/cancels, search queries\n- Monitors: no panic (exit code), no deadlock (watchdog timer), memory growth < 5% (RSS sampling)\n- Uses FakeClock with accelerated time for time-dependent features\n\nConcurrent pagination/write race:\n- Thread A: paginating through Issue List (fetching pages via keyset cursor)\n- Thread B: writing new issues to DB (simulating sync)\n- Assert: no duplicate rows across pages, no skipped rows within a browse snapshot fence\n- BrowseSnapshot token ensures stable ordering until explicit refresh\n\n## Acceptance Criteria\n- [ ] 30-min soak: no panic\n- [ ] 30-min soak: no deadlock (watchdog detects)\n- [ ] 30-min soak: memory growth < 5%\n- [ ] Concurrent pagination: no duplicate rows across pages\n- [ ] Concurrent pagination: no skipped rows within snapshot fence\n- [ ] BrowseSnapshot invalidated on manual refresh, not on background writes\n\n## Files\n- CREATE: crates/lore-tui/tests/soak_test.rs\n- CREATE: crates/lore-tui/tests/pagination_race_test.rs\n\n## TDD Anchor\nRED: Write test_pagination_no_duplicates that runs paginator and writer concurrently for 1000 iterations, collects all returned row IDs, asserts no duplicates.\nGREEN: Implement browse snapshot fence in keyset pagination.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml test_pagination_no_duplicates\n\n## Edge Cases\n- Soak test needs headless mode (no real terminal) — use ftui test harness\n- Memory sampling on macOS: use mach_task_info or /proc equivalent\n- Writer must use WAL mode to not block readers\n- Snapshot fence: deferred read transaction holds snapshot until page sequence completes\n\n## Dependency Context\nUses DbManager from \"Implement DbManager\" task.\nUses BrowseSnapshot from \"Implement NavigationStack\" task.\nUses keyset pagination from \"Implement Issue List\" task.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T17:05:28.130516Z","created_by":"tayloreernisse","updated_at":"2026-02-19T12:49:11.856646Z","closed_at":"2026-02-19T12:49:11.856471Z","close_reason":"7 soak tests + 7 pagination race tests passing: 50k event soak, watchdog, concurrent read/write with snapshot fence, multi-reader, depth bounds","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-14hv","depends_on_id":"bd-wnuo","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} -{"id":"bd-14q","title":"Epic: Gate 4 - File Decision History (lore file-history)","description":"## Background\n\nGate 4 implements `lore file-history` — answers \"Which MRs touched this file, and why?\" by linking files to MRs via a new mr_file_changes table and resolving rename chains.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Gate 4 (Sections 4.1-4.7).\n\n## Prerequisites\n\n- Gates 1-2 COMPLETE: entity_references populated, resource events fetched\n- Migration 015 exists on disk (commit SHAs + closes watermark) — registered by bd-1oo\n- pending_dependent_fetches has job_type='mr_diffs' in CHECK constraint (migration 011)\n\n## Architecture\n\n- **New table:** mr_file_changes (migration 016) stores file paths per MR\n- **New config:** fetchMrFileChanges (default true) gates the API calls\n- **API source:** GET /projects/:id/merge_requests/:iid/diffs — extract paths only, discard diff content\n- **Rename resolution:** BFS both directions on mr_file_changes WHERE change_type='renamed', bounded at 10 hops\n- **Query:** Join mr_file_changes -> merge_requests, optionally enrich with entity_references and discussions\n\n## Children (Execution Order)\n\n1. **bd-1oo** — Register migration 015 + create migration 016 (mr_file_changes table)\n2. **bd-jec** — Add fetchMrFileChanges config flag\n3. **bd-2yo** — Fetch MR diffs API and populate mr_file_changes\n4. **bd-1yx** — Implement rename chain resolution (BFS algorithm)\n5. **bd-z94** — Implement lore file-history CLI command (human + robot output)\n\n## Gate Completion Criteria\n\n- [ ] mr_file_changes table populated from GitLab diffs API\n- [ ] merge_commit_sha and squash_commit_sha captured in merge_requests (already done in code, needs migration 015 registered)\n- [ ] `lore file-history ` returns MRs ordered by merge/creation date\n- [ ] Output includes: MR title, state, author, change type, discussion count\n- [ ] --discussions shows inline discussion snippets from DiffNotes on the file\n- [ ] Rename chains resolved with bounded hop count (default 10) and cycle detection\n- [ ] --no-follow-renames disables chain resolution\n- [ ] Robot mode JSON includes rename_chain when renames detected\n- [ ] -p required when path in multiple projects (exit 18 Ambiguous)\n","status":"open","priority":1,"issue_type":"feature","created_at":"2026-02-02T21:31:01.094024Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:56:53.434796Z","compaction_level":0,"original_size":0,"labels":["epic","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-14q","depends_on_id":"bd-1se","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-14q","depends_on_id":"bd-2zl","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} +{"id":"bd-14q","title":"Epic: Gate 4 - File Decision History (lore file-history)","description":"## Background\n\nGate 4 implements `lore file-history` — answers \"Which MRs touched this file, and why?\" by linking files to MRs via a new mr_file_changes table and resolving rename chains.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Gate 4 (Sections 4.1-4.7).\n\n## Prerequisites\n\n- Gates 1-2 COMPLETE: entity_references populated, resource events fetched\n- Migration 015 exists on disk (commit SHAs + closes watermark) — registered by bd-1oo\n- pending_dependent_fetches has job_type='mr_diffs' in CHECK constraint (migration 011)\n\n## Architecture\n\n- **New table:** mr_file_changes (migration 016) stores file paths per MR\n- **New config:** fetchMrFileChanges (default true) gates the API calls\n- **API source:** GET /projects/:id/merge_requests/:iid/diffs — extract paths only, discard diff content\n- **Rename resolution:** BFS both directions on mr_file_changes WHERE change_type='renamed', bounded at 10 hops\n- **Query:** Join mr_file_changes -> merge_requests, optionally enrich with entity_references and discussions\n\n## Children (Execution Order)\n\n1. **bd-1oo** — Register migration 015 + create migration 016 (mr_file_changes table)\n2. **bd-jec** — Add fetchMrFileChanges config flag\n3. **bd-2yo** — Fetch MR diffs API and populate mr_file_changes\n4. **bd-1yx** — Implement rename chain resolution (BFS algorithm)\n5. **bd-z94** — Implement lore file-history CLI command (human + robot output)\n\n## Gate Completion Criteria\n\n- [ ] mr_file_changes table populated from GitLab diffs API\n- [ ] merge_commit_sha and squash_commit_sha captured in merge_requests (already done in code, needs migration 015 registered)\n- [ ] `lore file-history ` returns MRs ordered by merge/creation date\n- [ ] Output includes: MR title, state, author, change type, discussion count\n- [ ] --discussions shows inline discussion snippets from DiffNotes on the file\n- [ ] Rename chains resolved with bounded hop count (default 10) and cycle detection\n- [ ] --no-follow-renames disables chain resolution\n- [ ] Robot mode JSON includes rename_chain when renames detected\n- [ ] -p required when path in multiple projects (exit 18 Ambiguous)\n","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-02-02T21:31:01.094024Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:04:47.974331Z","closed_at":"2026-02-19T13:04:47.974241Z","close_reason":"All children complete: bd-1oo, bd-jec, bd-2yo, bd-1yx, bd-z94","compaction_level":0,"original_size":0,"labels":["epic","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-14q","depends_on_id":"bd-1se","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-14q","depends_on_id":"bd-2zl","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-14q8","title":"Split commands.rs into commands/ module (registry + defs)","description":"commands.rs is 807 lines. Split into crates/lore-tui/src/commands/mod.rs (re-exports), commands/registry.rs (CommandRegistry, lookup, status_hints, help_entries, palette_entries, build_registry), and commands/defs.rs (command definitions, KeyCombo, CommandDef struct). Keep public API identical via re-exports. All downstream imports should continue to work unchanged.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T21:24:11.259683Z","created_by":"tayloreernisse","updated_at":"2026-02-18T18:48:18.915386Z","closed_at":"2026-02-18T18:48:18.915341Z","close_reason":"Split commands.rs into commands/ module (defs.rs + registry.rs + mod.rs)","compaction_level":0,"original_size":0,"labels":["TUI"]} {"id":"bd-157","title":"[CP1] Issue transformer with label extraction","description":"Transform GitLab issue payloads to normalized database schema.\n\n## Module\nsrc/gitlab/transformers/issue.rs\n\n## Structs\n\n### NormalizedIssue\n- gitlab_id: i64\n- project_id: i64 (local DB project ID)\n- iid: i64\n- title: String\n- description: Option\n- state: String\n- author_username: String\n- created_at, updated_at, last_seen_at: i64 (ms epoch)\n- web_url: String\n\n### NormalizedLabel (CP1: name-only)\n- project_id: i64\n- name: String\n\n## Functions\n\n### transform_issue(gitlab_issue: &GitLabIssue, local_project_id: i64) -> NormalizedIssue\n- Convert ISO timestamps to ms epoch using iso_to_ms()\n- Set last_seen_at to now_ms()\n- Clone string fields\n\n### extract_labels(gitlab_issue: &GitLabIssue, local_project_id: i64) -> Vec\n- Map labels vec to NormalizedLabel structs\n\nFiles: \n- src/gitlab/transformers/mod.rs\n- src/gitlab/transformers/issue.rs\nTests: tests/issue_transformer_tests.rs\nDone when: Unit tests pass for payload transformation and label extraction","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:42:47.719562Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.736142Z","closed_at":"2026-01-25T17:02:01.736142Z","deleted_at":"2026-01-25T17:02:01.736129Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-159p","title":"Add get_issue_by_iid and get_mr_by_iid to GitLabClient with wiremock tests","description":"## Background\nSurgical sync needs to fetch a single issue or MR by its project-scoped IID from GitLab REST API during the preflight phase. The existing `GitLabClient` has `paginate_issues` and `paginate_merge_requests` for bulk streaming, but no single-entity fetch by IID. The GitLab v4 API provides `/api/v4/projects/:id/issues/:iid` and `/api/v4/projects/:id/merge_requests/:iid` endpoints that return exactly one entity or 404.\n\nThese methods are used by the surgical preflight (bd-3sez) to validate that requested IIDs actually exist on GitLab before committing to the ingest phase. They must return the full `GitLabIssue` / `GitLabMergeRequest` structs (same as the paginated endpoints return) so they can be passed directly to `process_single_issue` / `process_single_mr`.\n\n## Approach\n\n### Step 1: Add `get_issue_by_iid` method (src/gitlab/client.rs)\n\nAdd after the existing `get_version` method (~line 112):\n\n```rust\npub async fn get_issue_by_iid(\n &self,\n project_id: u64,\n iid: u64,\n) -> Result {\n self.request(&format!(\"/api/v4/projects/{project_id}/issues/{iid}\"))\n .await\n}\n```\n\nThis reuses the existing `request()` method which already handles:\n- Rate limiting (via `RateLimiter`)\n- Retry on 429 (up to `MAX_RETRIES`)\n- 404 → `LoreError::GitLabNotFound { resource }`\n- 401 → `LoreError::GitLabAuthFailed`\n- JSON deserialization into `GitLabIssue`\n\n### Step 2: Add `get_mr_by_iid` method (src/gitlab/client.rs)\n\n```rust\npub async fn get_mr_by_iid(\n &self,\n project_id: u64,\n iid: u64,\n) -> Result {\n self.request(&format!(\"/api/v4/projects/{project_id}/merge_requests/{iid}\"))\n .await\n}\n```\n\n### Step 3: Add wiremock tests (src/gitlab/client_tests.rs or inline #[cfg(test)])\n\nFour tests using the same wiremock pattern as `src/gitlab/graphql_tests.rs`:\n1. `get_issue_by_iid_success` — mock 200 with full GitLabIssue JSON, verify deserialized fields\n2. `get_issue_by_iid_not_found` — mock 404, verify `LoreError::GitLabNotFound`\n3. `get_mr_by_iid_success` — mock 200 with full GitLabMergeRequest JSON, verify deserialized fields\n4. `get_mr_by_iid_not_found` — mock 404, verify `LoreError::GitLabNotFound`\n\n## Acceptance Criteria\n- [ ] `GitLabClient::get_issue_by_iid(project_id, iid)` returns `Result`\n- [ ] `GitLabClient::get_mr_by_iid(project_id, iid)` returns `Result`\n- [ ] 404 response maps to `LoreError::GitLabNotFound`\n- [ ] 401 response maps to `LoreError::GitLabAuthFailed` (inherited from `handle_response`)\n- [ ] Successful responses deserialize into the correct struct types\n- [ ] All 4 wiremock tests pass\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n- MODIFY: src/gitlab/client.rs (add two pub async methods)\n- CREATE: src/gitlab/client_tests.rs (wiremock tests, referenced via `#[cfg(test)] #[path = \"client_tests.rs\"] mod tests;` at bottom of client.rs)\n\n## TDD Anchor\nRED: Write 4 wiremock tests in `src/gitlab/client_tests.rs`:\n\n```rust\nuse super::*;\nuse crate::core::error::LoreError;\nuse wiremock::matchers::{header, method, path};\nuse wiremock::{Mock, MockServer, ResponseTemplate};\n\n#[tokio::test]\nasync fn get_issue_by_iid_success() {\n let server = MockServer::start().await;\n let issue_json = serde_json::json!({\n \"id\": 1001,\n \"iid\": 42,\n \"project_id\": 5,\n \"title\": \"Fix login bug\",\n \"state\": \"opened\",\n \"created_at\": \"2026-01-15T10:00:00Z\",\n \"updated_at\": \"2026-02-01T14:30:00Z\",\n \"author\": { \"id\": 1, \"username\": \"dev1\", \"name\": \"Developer One\", \"avatar_url\": null, \"web_url\": \"https://gitlab.example.com/dev1\" },\n \"web_url\": \"https://gitlab.example.com/group/repo/-/issues/42\",\n \"labels\": [],\n \"milestone\": null,\n \"assignees\": [],\n \"closed_at\": null,\n \"closed_by\": null,\n \"description\": \"Login fails on mobile\"\n });\n\n Mock::given(method(\"GET\"))\n .and(path(\"/api/v4/projects/5/issues/42\"))\n .and(header(\"PRIVATE-TOKEN\", \"test-token\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(&issue_json))\n .mount(&server)\n .await;\n\n let client = GitLabClient::new(&server.uri(), \"test-token\", Some(100.0));\n let issue = client.get_issue_by_iid(5, 42).await.unwrap();\n assert_eq!(issue.iid, 42);\n assert_eq!(issue.title, \"Fix login bug\");\n}\n\n#[tokio::test]\nasync fn get_issue_by_iid_not_found() {\n let server = MockServer::start().await;\n\n Mock::given(method(\"GET\"))\n .and(path(\"/api/v4/projects/5/issues/999\"))\n .respond_with(ResponseTemplate::new(404).set_body_json(serde_json::json!({\"message\": \"404 Not Found\"})))\n .mount(&server)\n .await;\n\n let client = GitLabClient::new(&server.uri(), \"test-token\", Some(100.0));\n let err = client.get_issue_by_iid(5, 999).await.unwrap_err();\n assert!(matches!(err, LoreError::GitLabNotFound { .. }));\n}\n\n#[tokio::test]\nasync fn get_mr_by_iid_success() {\n let server = MockServer::start().await;\n let mr_json = serde_json::json!({\n \"id\": 2001,\n \"iid\": 101,\n \"project_id\": 5,\n \"title\": \"Add caching layer\",\n \"state\": \"merged\",\n \"created_at\": \"2026-01-20T09:00:00Z\",\n \"updated_at\": \"2026-02-10T16:00:00Z\",\n \"author\": { \"id\": 2, \"username\": \"dev2\", \"name\": \"Developer Two\", \"avatar_url\": null, \"web_url\": \"https://gitlab.example.com/dev2\" },\n \"web_url\": \"https://gitlab.example.com/group/repo/-/merge_requests/101\",\n \"source_branch\": \"feature/caching\",\n \"target_branch\": \"main\",\n \"draft\": false,\n \"merge_status\": \"can_be_merged\",\n \"labels\": [],\n \"milestone\": null,\n \"assignees\": [],\n \"reviewers\": [],\n \"merged_by\": null,\n \"merged_at\": null,\n \"closed_at\": null,\n \"closed_by\": null,\n \"description\": \"Adds Redis caching\"\n });\n\n Mock::given(method(\"GET\"))\n .and(path(\"/api/v4/projects/5/merge_requests/101\"))\n .and(header(\"PRIVATE-TOKEN\", \"test-token\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(&mr_json))\n .mount(&server)\n .await;\n\n let client = GitLabClient::new(&server.uri(), \"test-token\", Some(100.0));\n let mr = client.get_mr_by_iid(5, 101).await.unwrap();\n assert_eq!(mr.iid, 101);\n assert_eq!(mr.title, \"Add caching layer\");\n assert_eq!(mr.source_branch, \"feature/caching\");\n}\n\n#[tokio::test]\nasync fn get_mr_by_iid_not_found() {\n let server = MockServer::start().await;\n\n Mock::given(method(\"GET\"))\n .and(path(\"/api/v4/projects/5/merge_requests/999\"))\n .respond_with(ResponseTemplate::new(404).set_body_json(serde_json::json!({\"message\": \"404 Not Found\"})))\n .mount(&server)\n .await;\n\n let client = GitLabClient::new(&server.uri(), \"test-token\", Some(100.0));\n let err = client.get_mr_by_iid(5, 999).await.unwrap_err();\n assert!(matches!(err, LoreError::GitLabNotFound { .. }));\n}\n```\n\nGREEN: Add the two methods to `GitLabClient`.\nVERIFY: `cargo test get_issue_by_iid && cargo test get_mr_by_iid`\n\n## Edge Cases\n- The `request()` method already handles 429 retries, so no extra retry logic is needed in the new methods.\n- The GitLabIssue/GitLabMergeRequest fixture JSON must include all required (non-Option) fields. Check the struct definitions in `src/gitlab/types.rs` if deserialization fails — the test fixtures above include the minimum required fields based on the struct definitions.\n- The `project_id` parameter is the GitLab-side numeric project ID (not the local SQLite row ID). The caller must resolve this from the local `projects` table's `gitlab_project_id` column.\n\n## Dependency Context\nThis is a leaf/foundation bead with no upstream dependencies. Downstream bead bd-3sez (surgical.rs) calls these methods during preflight to fetch entities by IID before ingesting.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-17T19:12:14.447996Z","created_by":"tayloreernisse","updated_at":"2026-02-19T05:42:38.833054Z","closed_at":"2026-02-19T05:42:38.832902Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"],"dependencies":[{"issue_id":"bd-159p","depends_on_id":"bd-1i4i","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-159p","depends_on_id":"bd-3sez","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} @@ -41,9 +41,9 @@ {"id":"bd-1h3f","title":"Add rename awareness to path resolution probes","description":"## Background\nThe path resolution layer (build_path_query at who.rs:467 and suffix_probe at who.rs:596) only checks position_new_path and new_path. If a user queries an old filename (e.g., 'login.rs' after rename to 'auth.rs'), the probes return 'not found' and scoring never runs — even though the scoring SQL (bd-1hoq) now matches old_path.\n\n## Approach\n\n### build_path_query() changes (who.rs:467):\n\nProbe 1 (exact_exists):\n- Notes query: add OR position_old_path = ?1\n- File changes query: add OR old_path = ?1\n\nProbe 2 (prefix_exists):\n- Notes query: add OR position_old_path LIKE ?1 ESCAPE '\\\\'\n- File changes query: add OR old_path LIKE ?1 ESCAPE '\\\\'\n\nNote: These probes use simple OR (not UNION ALL) since they only check existence (SELECT 1 ... LIMIT 1) — no risk of planner degradation on single-row probes.\n\n### suffix_probe() changes (who.rs:596):\n\nAdd two UNION branches to the existing query:\n```sql\nUNION\nSELECT position_old_path AS full_path FROM notes\nWHERE note_type = 'DiffNote' AND is_system = 0\n AND position_old_path IS NOT NULL\n AND (position_old_path LIKE ?1 ESCAPE '\\\\' OR position_old_path = ?2)\n AND (?3 IS NULL OR project_id = ?3)\nUNION\nSELECT old_path AS full_path FROM mr_file_changes\nWHERE old_path IS NOT NULL\n AND (old_path LIKE ?1 ESCAPE '\\\\' OR old_path = ?2)\n AND (?3 IS NULL OR project_id = ?3)\n```\n\nUse UNION (not UNION ALL) — the existing query uses UNION for dedup.\n\n## TDD Loop\n\n### RED (write first):\n```rust\n#[test]\nfn test_old_path_probe_exact_and_prefix() {\n let conn = setup_test_db();\n insert_project(&conn, 1, \"team/backend\");\n insert_mr(&conn, 1, 1, 100, \"alice\", \"merged\");\n insert_file_change_with_old_path(&conn, 1, 1, \"src/new/foo.rs\", Some(\"src/old/foo.rs\"), \"renamed\");\n insert_discussion(&conn, 1, 1, Some(1), None, true, false);\n insert_diffnote_at(&conn, 1, 1, 1, \"alice\", \"src/new/foo.rs\", Some(\"src/old/foo.rs\"), \"review comment\", now_ms());\n\n // Exact probe by OLD path should resolve\n let pq = build_path_query(&conn, \"src/old/foo.rs\", None).unwrap();\n assert\\!(matches\\!(pq, PathQuery::Exact { .. } | PathQuery::Prefix { .. }));\n\n // Prefix probe by OLD directory should resolve\n let pq = build_path_query(&conn, \"src/old/\", None).unwrap();\n assert\\!(matches\\!(pq, PathQuery::Prefix { .. }));\n\n // New path still works\n let pq = build_path_query(&conn, \"src/new/foo.rs\", None).unwrap();\n assert\\!(matches\\!(pq, PathQuery::Exact { .. }));\n}\n\n#[test]\nfn test_suffix_probe_uses_old_path_sources() {\n let conn = setup_test_db();\n insert_project(&conn, 1, \"team/backend\");\n insert_mr(&conn, 1, 1, 100, \"alice\", \"merged\");\n insert_file_change_with_old_path(&conn, 1, 1, \"src/utils.rs\", Some(\"legacy/utils.rs\"), \"renamed\");\n\n let result = suffix_probe(&conn, \"utils.rs\", None).unwrap();\n match result {\n SuffixResult::Ambiguous(paths) => {\n assert\\!(paths.contains(&\"src/utils.rs\".to_string()));\n assert\\!(paths.contains(&\"legacy/utils.rs\".to_string()));\n }\n SuffixResult::Unique(p) => {\n assert\\!(p == \"src/utils.rs\" || p == \"legacy/utils.rs\");\n }\n other => panic\\!(\"Expected Ambiguous or Unique, got {other:?}\"),\n }\n}\n```\n\n### GREEN: Add OR old_path clauses to probes + UNION branches to suffix_probe.\n### VERIFY: cargo test -p lore -- test_old_path_probe test_suffix_probe_uses_old_path\n\n## Acceptance Criteria\n- [ ] test_old_path_probe_exact_and_prefix passes\n- [ ] test_suffix_probe_uses_old_path_sources passes\n- [ ] Existing path probe tests still pass\n- [ ] No changes to PathQuery or SuffixResult enums\n\n## Files\n- MODIFY: src/cli/commands/who.rs (build_path_query at line 467, suffix_probe at line 596)\n\n## Edge Cases\n- position_old_path can be NULL — OR clause handles naturally (NULL \\!= ?1)\n- Old path might match multiple new paths (copy+rename) — suffix_probe Ambiguous handles this\n- Requires insert_file_change_with_old_path and insert_diffnote_at helpers from bd-2yu5","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-09T16:59:51.706482Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.411615Z","closed_at":"2026-02-12T20:43:04.411575Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"],"dependencies":[{"issue_id":"bd-1h3f","depends_on_id":"bd-2ao4","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-1h3f","depends_on_id":"bd-2yu5","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} {"id":"bd-1hj","title":"[CP1] Ingestion orchestrator","description":"Coordinate issue + dependent discussion sync with bounded concurrency.\n\n## Module\nsrc/ingestion/orchestrator.rs\n\n## Canonical Pattern (CP1)\n\nWhen gi ingest --type=issues runs:\n\n1. **Ingest issues** - cursor-based with incremental cursor updates per page\n2. **Collect touched issues** - record IssueForDiscussionSync for each issue passing cursor filter\n3. **Filter for discussion sync** - enqueue issues where:\n issue.updated_at > issues.discussions_synced_for_updated_at\n4. **Execute discussion sync** - with bounded concurrency (dependent_concurrency from config)\n5. **Update watermark** - after each issue's discussions successfully ingested\n\n## Concurrency Notes\n\nRuntime decision: Use single-threaded Tokio runtime (flavor = \"current_thread\")\n- rusqlite::Connection is !Send, conflicts with multi-threaded runtimes\n- Single-threaded avoids Send bounds entirely\n- Use tokio::task::spawn_local + LocalSet for concurrent discussion fetches\n- Keeps code simple; can upgrade to channel-based DB writer in CP2 if needed\n\n## Configuration Used\n- config.sync.dependent_concurrency - limits parallel discussion requests\n- config.sync.cursor_rewind_seconds - safety margin for cursor\n\n## Progress Reporting\n- Show total issues fetched\n- Show issues needing discussion sync\n- Show discussion/note counts per project\n\nFiles: src/ingestion/orchestrator.rs\nTests: Integration tests with mocked GitLab\nDone when: Full issue + discussion ingestion orchestrated correctly","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:57:57.325679Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.851047Z","closed_at":"2026-01-25T17:02:01.851047Z","deleted_at":"2026-01-25T17:02:01.851043Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-1hoq","title":"Restructure expert SQL with CTE-based dual-path matching","description":"## Background\nThe current query_expert() at who.rs:641 uses a 4-signal UNION ALL that only matches position_new_path and new_path, with flat COUNT-based scoring computed entirely in SQL. The new model needs dual-path matching, 5 signal types, state-aware timestamps, and returns per-signal rows for Rust-side decay computation (bd-13q8).\n\n## Approach\n**Important**: This bead builds the new SQL as a separate function WITHOUT modifying query_expert() yet. bd-13q8 wires it into query_expert(). This keeps this bead independently testable.\n\nAdd a new function:\n```rust\n/// Build the CTE-based expert scoring SQL for a given path query mode.\n/// Returns SQL string. Params: ?1=path, ?2=since_ms, ?3=project_id, ?4=as_of_ms, ?5=closed_mr_multiplier, ?6=reviewer_min_note_chars\nfn build_expert_sql(path_op: &str) -> String {\n // ... format the SQL with {path_op} inlined, all config values as bound params\n}\n```\n\n### SQL structure (8 CTEs + final SELECT):\n1. **matched_notes_raw**: UNION ALL on position_new_path + position_old_path\n2. **matched_notes**: DISTINCT dedup by id\n3. **matched_file_changes_raw**: UNION ALL on new_path + old_path\n4. **matched_file_changes**: DISTINCT dedup by (merge_request_id, project_id)\n5. **mr_activity**: Centralized state-aware timestamps AND state_mult. Joins merge_requests via matched_file_changes. Computes:\n - activity_ts: CASE WHEN state='merged' THEN COALESCE(merged_at, created_at) WHEN state='closed' THEN COALESCE(closed_at, created_at) ELSE COALESCE(updated_at, created_at) END\n - state_mult: CASE WHEN state='closed' THEN ?5 ELSE 1.0 END\n6. **reviewer_participation**: substantive DiffNotes WHERE LENGTH(TRIM(body)) >= ?6\n7. **raw**: 5 signals (diffnote_reviewer, diffnote_author, file_author, file_reviewer_participated, file_reviewer_assigned). Signals 1-2 compute state_mult inline. Signals 3-4a-4b reference mr_activity.\n8. **aggregated**: MR-level GROUP BY + note_group with COUNT\n\n### Returns 6 columns: (username TEXT, signal TEXT, mr_id INTEGER, qty INTEGER, ts INTEGER, state_mult REAL)\n\nSee plans/time-decay-expert-scoring.md section 3 for the full SQL template.\n\n## TDD Loop\n\n### RED (write first):\n```rust\n#[test]\nfn test_expert_sql_returns_expected_signal_rows() {\n let conn = setup_test_db();\n insert_project(&conn, 1, \"team/backend\");\n insert_mr(&conn, 1, 1, 100, \"alice\", \"merged\");\n insert_file_change(&conn, 1, 1, \"src/app.rs\", \"modified\");\n insert_reviewer(&conn, 1, \"bob\");\n insert_reviewer(&conn, 1, \"carol\");\n insert_discussion(&conn, 1, 1, Some(1), None, true, false);\n insert_diffnote(&conn, 1, 1, 1, \"carol\", \"src/app.rs\", \"This needs error handling for the edge case\");\n\n let sql = build_expert_sql(\"= ?1\");\n let mut stmt = conn.prepare(&sql).unwrap();\n let rows: Vec<(String, String, i64, i64, i64, f64)> = stmt\n .query_map(\n rusqlite::params![\"src/app.rs\", 0_i64, Option::::None, now_ms() + 1000, 0.5_f64, 20_i64],\n |row| Ok((\n row.get(0).unwrap(), row.get(1).unwrap(), row.get(2).unwrap(),\n row.get(3).unwrap(), row.get(4).unwrap(), row.get(5).unwrap(),\n ))\n ).unwrap().filter_map(|r| r.ok()).collect();\n\n // alice: file_author\n assert!(rows.iter().any(|(u, s, ..)| u == \"alice\" && s == \"file_author\"));\n // carol: file_reviewer_participated (left substantive DiffNote)\n assert!(rows.iter().any(|(u, s, ..)| u == \"carol\" && s == \"file_reviewer_participated\"));\n // bob: file_reviewer_assigned (no DiffNotes)\n assert!(rows.iter().any(|(u, s, ..)| u == \"bob\" && s == \"file_reviewer_assigned\"));\n // carol: note_group\n assert!(rows.iter().any(|(u, s, ..)| u == \"carol\" && s == \"note_group\"));\n // alice: diffnote_author\n assert!(rows.iter().any(|(u, s, ..)| u == \"alice\" && s == \"diffnote_author\"));\n // All merged rows have state_mult = 1.0\n assert!(rows.iter().all(|(.., sm)| (sm - 1.0).abs() < f64::EPSILON));\n}\n```\n\n### GREEN: Implement build_expert_sql() with the 8 CTEs.\n### VERIFY: cargo test -p lore -- test_expert_sql_returns_expected_signal_rows\n\n## Acceptance Criteria\n- [ ] test_expert_sql_returns_expected_signal_rows passes (all 5 signal types correct)\n- [ ] SQL compiles against :memory: DB with indexes from bd-2ao4 (migration 026)\n- [ ] 6 columns returned: username, signal, mr_id, qty, ts, state_mult (REAL, not TEXT)\n- [ ] 6 SQL params: ?1=path, ?2=since_ms, ?3=project_id, ?4=as_of_ms, ?5=closed_mr_multiplier, ?6=reviewer_min_note_chars\n- [ ] mr_activity CTE centralizes timestamp + state_mult (not repeated)\n- [ ] reviewer_participation uses ?6 not inlined literal\n- [ ] Existing query_expert() and all existing tests UNTOUCHED\n- [ ] build_expert_sql() is a pure function (no Connection param)\n\n## Files\n- MODIFY: src/cli/commands/who.rs (new build_expert_sql function + test, placed near query_expert at line ~641)\n\n## Edge Cases\n- ?5 (closed_mr_multiplier) bound as f64 — rusqlite handles this\n- ?6 (reviewer_min_note_chars) bound as i64 — SQLite LENGTH returns integer\n- Signals 1-2 compute state_mult inline (join through discussions, not mr_activity)\n- COALESCE fallback to created_at for NULL merged_at/closed_at/updated_at\n- Dedup in matched_notes/matched_file_changes prevents double-counting","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-09T16:59:44.665314Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.410514Z","closed_at":"2026-02-12T20:43:04.410470Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"],"dependencies":[{"issue_id":"bd-1hoq","depends_on_id":"bd-1soz","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-1hoq","depends_on_id":"bd-2ao4","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-1hoq","depends_on_id":"bd-2w1p","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} -{"id":"bd-1ht","title":"Epic: Gate 5 - Code Trace (lore trace)","description":"## Background\n\nGate 5 implements 'lore trace' — answers 'Why was this code introduced?' by tracing from a file path through the MR that modified it, to the issue that motivated the MR, to the discussions with decision rationale. Capstone of Phase B.\n\nGate 5 ships Tier 1 only (API-only, no local git). Tier 2 (git blame via git2-rs) deferred to Phase C.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Gate 5 (Sections 5.1-5.7).\n\n## Prerequisites\n\n- Gates 1-2 COMPLETE: entity_references populated, resource events fetched\n- Gate 4 (bd-14q): provides mr_file_changes table + resolve_rename_chain algorithm\n- entity_references source_method: 'api' | 'note_parse' | 'description_parse'\n- discussions/notes tables for DiffNote content\n- merge_requests.merged_at exists (migration 006). Use COALESCE(merged_at, updated_at) for ordering.\n\n## Architecture\n\n- **No new tables.** Trace queries combine mr_file_changes, entity_references, discussions/notes\n- **Query flow:** file -> mr_file_changes -> MRs -> entity_references (closes/related) -> issues -> discussions with DiffNote context\n- **Tier 1:** File-level granularity only. Cannot trace a specific line to its introducing commit.\n- **Path parsing:** Supports 'src/foo.rs:45' syntax — line number parsed but deferred with Tier 2 warning.\n- **Rename aware:** Reuses file_history::resolve_rename_chain for multi-path matching.\n\n## Children (Execution Order)\n\n1. **bd-2n4** — Trace query logic: file -> MR -> issue -> discussion chain (src/core/trace.rs)\n2. **bd-9dd** — CLI command with human + robot output (src/cli/commands/trace.rs)\n\n## Gate Completion Criteria\n\n- [ ] `lore trace ` shows MRs with linked issues + discussion context\n- [ ] Output includes MR -> issue -> discussion chain\n- [ ] DiffNote snippets show content on the traced file\n- [ ] Cross-references from entity_references used for MR->issue linking\n- [ ] :line suffix parses and emits Tier 2 warning\n- [ ] Robot mode JSON with tier: 'api_only'\n- [ ] Graceful handling when no MR data found (suggest sync with fetchMrFileChanges)\n","status":"open","priority":1,"issue_type":"feature","created_at":"2026-02-02T21:31:01.141053Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:57:12.357740Z","compaction_level":0,"original_size":0,"labels":["epic","gate-5","phase-b"],"dependencies":[{"issue_id":"bd-1ht","depends_on_id":"bd-14q","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1ht","depends_on_id":"bd-1se","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} +{"id":"bd-1ht","title":"Epic: Gate 5 - Code Trace (lore trace)","description":"## Background\n\nGate 5 implements 'lore trace' — answers 'Why was this code introduced?' by tracing from a file path through the MR that modified it, to the issue that motivated the MR, to the discussions with decision rationale. Capstone of Phase B.\n\nGate 5 ships Tier 1 only (API-only, no local git). Tier 2 (git blame via git2-rs) deferred to Phase C.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Gate 5 (Sections 5.1-5.7).\n\n## Prerequisites\n\n- Gates 1-2 COMPLETE: entity_references populated, resource events fetched\n- Gate 4 (bd-14q): provides mr_file_changes table + resolve_rename_chain algorithm\n- entity_references source_method: 'api' | 'note_parse' | 'description_parse'\n- discussions/notes tables for DiffNote content\n- merge_requests.merged_at exists (migration 006). Use COALESCE(merged_at, updated_at) for ordering.\n\n## Architecture\n\n- **No new tables.** Trace queries combine mr_file_changes, entity_references, discussions/notes\n- **Query flow:** file -> mr_file_changes -> MRs -> entity_references (closes/related) -> issues -> discussions with DiffNote context\n- **Tier 1:** File-level granularity only. Cannot trace a specific line to its introducing commit.\n- **Path parsing:** Supports 'src/foo.rs:45' syntax — line number parsed but deferred with Tier 2 warning.\n- **Rename aware:** Reuses file_history::resolve_rename_chain for multi-path matching.\n\n## Children (Execution Order)\n\n1. **bd-2n4** — Trace query logic: file -> MR -> issue -> discussion chain (src/core/trace.rs)\n2. **bd-9dd** — CLI command with human + robot output (src/cli/commands/trace.rs)\n\n## Gate Completion Criteria\n\n- [ ] `lore trace ` shows MRs with linked issues + discussion context\n- [ ] Output includes MR -> issue -> discussion chain\n- [ ] DiffNote snippets show content on the traced file\n- [ ] Cross-references from entity_references used for MR->issue linking\n- [ ] :line suffix parses and emits Tier 2 warning\n- [ ] Robot mode JSON with tier: 'api_only'\n- [ ] Graceful handling when no MR data found (suggest sync with fetchMrFileChanges)\n","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-02-02T21:31:01.141053Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:46:59.203539Z","closed_at":"2026-02-19T13:46:59.203489Z","close_reason":"All core children complete: bd-2n4 (trace query), bd-9dd (trace CLI). Doc updates (bd-1v8, bd-2fc) are follow-ups.","compaction_level":0,"original_size":0,"labels":["epic","gate-5","phase-b"],"dependencies":[{"issue_id":"bd-1ht","depends_on_id":"bd-14q","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1ht","depends_on_id":"bd-1se","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-1i2","title":"Integrate mark_dirty_tx into ingestion modules","description":"## Background\nThis bead integrates dirty source tracking into the existing ingestion pipelines. Every entity upserted during ingestion must be marked dirty so the document regenerator knows to update the corresponding search document. The critical constraint: mark_dirty_tx() must be called INSIDE the same transaction that upserts the entity — not after commit.\n\n**Key PRD clarification:** Mark ALL upserted entities dirty (not just changed ones). The regenerator's hash comparison handles \"unchanged\" detection cheaply — this avoids needing change detection in ingestion.\n\n## Approach\nModify 4 existing ingestion files to add mark_dirty_tx() calls inside existing transaction blocks per PRD Section 6.1.\n\n**1. src/ingestion/issues.rs:**\nInside the issue upsert loop, after each successful INSERT/UPDATE:\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Issue, issue_row.id)?;\n```\n\n**2. src/ingestion/merge_requests.rs:**\nInside the MR upsert loop:\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::MergeRequest, mr_row.id)?;\n```\n\n**3. src/ingestion/discussions.rs:**\nInside discussion insert (issue discussions, full-refresh transaction):\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, discussion_row.id)?;\n```\n\n**4. src/ingestion/mr_discussions.rs:**\nInside discussion upsert (write phase):\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, discussion_row.id)?;\n```\n\n**Discussion Sweep Cleanup (PRD Section 6.1 — CRITICAL):**\nWhen the MR discussion sweep deletes stale discussions (`last_seen_at < run_start_time`), **delete the corresponding document rows directly** — do NOT use the dirty queue for cleanup. The `ON DELETE CASCADE` on `document_labels`/`document_paths` and the `documents_embeddings_ad` trigger handle all downstream cleanup.\n\n**PRD-exact CTE pattern:**\n```sql\n-- In src/ingestion/mr_discussions.rs, during sweep phase.\n-- Uses a CTE to capture stale IDs atomically before cascading deletes.\n-- This is more defensive than two separate statements because the CTE\n-- guarantees the ID set is captured before any row is deleted.\nWITH stale AS (\n SELECT id FROM discussions\n WHERE merge_request_id = ? AND last_seen_at < ?\n)\n-- Step 1: delete orphaned documents (must happen while source_id still resolves)\nDELETE FROM documents\n WHERE source_type = 'discussion' AND source_id IN (SELECT id FROM stale);\n-- Step 2: delete the stale discussions themselves\nDELETE FROM discussions\n WHERE id IN (SELECT id FROM stale);\n```\n\n**NOTE:** If SQLite version doesn't support CTE-based multi-statement, execute as two sequential statements capturing IDs in Rust first:\n```rust\nlet stale_ids: Vec = conn.prepare(\n \"SELECT id FROM discussions WHERE merge_request_id = ? AND last_seen_at < ?\"\n)?.query_map(params![mr_id, run_start], |r| r.get(0))?\n .collect::, _>>()?;\n\nif !stale_ids.is_empty() {\n // Delete documents FIRST (while source_id still resolves)\n conn.execute(\n \"DELETE FROM documents WHERE source_type = 'discussion' AND source_id IN (...)\",\n ...\n )?;\n // Then delete the discussions\n conn.execute(\n \"DELETE FROM discussions WHERE id IN (...)\",\n ...\n )?;\n}\n```\n\n**IMPORTANT difference from dirty queue pattern:** The sweep deletes documents DIRECTLY (not via dirty_sources queue). This is because the source entity is being deleted — there's nothing for the regenerator to regenerate from. The cascade handles FTS, labels, paths, and embeddings cleanup.\n\n## Acceptance Criteria\n- [ ] Every upserted issue is marked dirty inside the same transaction\n- [ ] Every upserted MR is marked dirty inside the same transaction\n- [ ] Every upserted discussion (issue + MR) is marked dirty inside the same transaction\n- [ ] ALL upserted entities marked dirty (not just changed ones) — regenerator handles skip\n- [ ] mark_dirty_tx called with &Transaction (not &Connection)\n- [ ] mark_dirty_tx uses upsert with ON CONFLICT to reset backoff state (not INSERT OR IGNORE)\n- [ ] Discussion sweep deletes documents DIRECTLY (not via dirty queue)\n- [ ] Discussion sweep uses CTE (or Rust-side ID capture) to capture stale IDs before cascading deletes\n- [ ] Documents deleted BEFORE discussions (while source_id still resolves)\n- [ ] ON DELETE CASCADE handles document_labels, document_paths cleanup\n- [ ] documents_embeddings_ad trigger handles embedding cleanup\n- [ ] `cargo build` succeeds\n- [ ] Existing ingestion tests still pass\n\n## Files\n- `src/ingestion/issues.rs` — add mark_dirty_tx calls in upsert loop\n- `src/ingestion/merge_requests.rs` — add mark_dirty_tx calls in upsert loop\n- `src/ingestion/discussions.rs` — add mark_dirty_tx calls in insert loop\n- `src/ingestion/mr_discussions.rs` — add mark_dirty_tx calls + direct document deletion in sweep\n\n## TDD Loop\nRED: Existing tests should still pass (regression); new tests:\n- `test_issue_upsert_marks_dirty` — after issue ingest, dirty_sources has entry\n- `test_mr_upsert_marks_dirty` — after MR ingest, dirty_sources has entry\n- `test_discussion_upsert_marks_dirty` — after discussion ingest, dirty_sources has entry\n- `test_discussion_sweep_deletes_documents` — stale discussion documents deleted directly\n- `test_sweep_cascade_cleans_labels_paths` — ON DELETE CASCADE works\nGREEN: Add mark_dirty_tx calls in all 4 files, implement sweep with CTE\nVERIFY: `cargo test ingestion && cargo build`\n\n## Edge Cases\n- Upsert that doesn't change data: still marks dirty (regenerator hash check handles skip)\n- Transaction rollback: dirty mark also rolled back (atomic, inside same txn)\n- Discussion sweep with zero stale IDs: CTE returns empty, no DELETE executed\n- Large batch of upserts: each mark_dirty_tx is O(1) INSERT with ON CONFLICT\n- Sweep deletes document before discussion: order matters for source_id resolution","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:27:09.540279Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:39:17.241433Z","closed_at":"2026-01-30T17:39:17.241390Z","close_reason":"Added mark_dirty_tx calls in issues.rs, merge_requests.rs, discussions.rs, mr_discussions.rs (2 paths)","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1i2","depends_on_id":"bd-38q","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} -{"id":"bd-1i4i","title":"Implement run_sync_surgical orchestration function","description":"## Background\n\nThe surgical sync pipeline needs a top-level orchestration function that coordinates the full pipeline for syncing specific IIDs. Unlike `run_sync` (lines 63-360 of `src/cli/commands/sync.rs`) which syncs all projects and all entities, `run_sync_surgical` targets specific issues/MRs by IID within a single project. The pipeline stages are: resolve project, record sync run, preflight fetch, check cancellation, acquire lock, ingest with TOCTOU guards, inline dependent enrichment (discussions, events, diffs), scoped doc regeneration, scoped embedding, finalize recorder, and build `SyncResult`.\n\n## Approach\n\nCreate `pub async fn run_sync_surgical()` in a new file `src/cli/commands/sync_surgical.rs`. Signature:\n\n```rust\npub async fn run_sync_surgical(\n config: &Config,\n options: SyncOptions,\n run_id: Option<&str>,\n signal: &ShutdownSignal,\n) -> Result\n```\n\nThe function reads `options.issues` and `options.merge_requests` (added by bd-1lja) to determine target IIDs. Pipeline:\n\n1. **Resolve project**: Call `resolve_project(conn, project_str)` from `src/core/project.rs` to get `gitlab_project_id`.\n2. **Start recorder**: `SyncRunRecorder::start(&recorder_conn, \"surgical-sync\", run_id)`. Note: `succeed()` and `fail()` consume `self`, so control flow must ensure exactly one terminal call.\n3. **Preflight fetch**: For each IID, call `get_issue_by_iid` / `get_mr_by_iid` (bd-159p) to confirm the entity exists on GitLab and capture `updated_at` for TOCTOU.\n4. **Check cancellation**: `if signal.is_cancelled() { recorder.fail(...); return Ok(result); }`\n5. **Acquire lock**: `AppLock::new(conn, LockOptions { name: \"surgical-sync\".into(), stale_lock_minutes: config.sync.stale_lock_minutes, heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds })`. Lock must `acquire(force)` and `release()` on all exit paths.\n6. **Ingest with TOCTOU**: For each preflight entity, call surgical ingest (bd-3sez). Compare DB `updated_at` with preflight `updated_at`; skip if already current. Record outcome in `EntitySyncResult`.\n7. **Inline dependents**: For ingested entities, fetch discussions, resource events (if `config.sync.fetch_resource_events`), MR diffs (if `config.sync.fetch_mr_file_changes`). Use `config.sync.requests_per_second` for rate limiting.\n8. **Scoped docs**: Call `run_generate_docs_for_sources()` (bd-hs6j) with only the affected entity source IDs.\n9. **Scoped embed**: Call `run_embed_for_document_ids()` (bd-1elx) with only the regenerated document IDs.\n10. **Finalize**: `recorder.succeed(conn, &metrics, total_items, total_errors)`.\n11. **Build SyncResult**: Populate surgical fields (bd-wcja): `surgical_mode: Some(true)`, `surgical_iids`, `entity_results`, `preflight_only`.\n\nIf `options.preflight_only` is set, return after step 3 with the preflight data and skip steps 4-10.\n\nProgress output uses `stage_spinner_v2(icon, label, msg, robot_mode)` from `src/cli/progress.rs` line 18 during execution, and `format_stage_line(icon, label, summary, elapsed)` from `src/cli/progress.rs` line 67 for completion lines. Stage icons via `Icons::sync()` from `src/cli/render.rs` line 208. Error completion uses `color_icon(icon, has_errors)` from `src/cli/commands/sync.rs` line 55.\n\n## Acceptance Criteria\n\n1. `run_sync_surgical` compiles and runs the full pipeline for 1+ issue IIDs\n2. Preflight-only mode returns early with fetched entity data, no DB writes beyond recorder\n3. TOCTOU: entities whose DB `updated_at` matches preflight `updated_at` are skipped with `skipped_toctou` outcome\n4. Cancellation at any stage between preflight and ingest stops processing, calls `recorder.fail()`\n5. Lock is acquired before ingest and released on all exit paths (success, error, cancellation)\n6. `SyncResult` surgical fields are populated: `surgical_mode`, `surgical_iids`, `entity_results`\n7. Robot mode produces valid JSON with per-entity outcomes\n8. Human mode shows stage spinners and completion lines\n\n## Files\n\n- `src/cli/commands/sync_surgical.rs` — new file, main orchestration function\n- `src/cli/commands/mod.rs` — add `pub mod sync_surgical;`\n\n## TDD Anchor\n\nTests in `src/cli/commands/sync_surgical.rs` or a companion `sync_surgical_tests.rs`:\n\n```rust\n#[cfg(test)]\nmod tests {\n use super::*;\n use crate::core::db::{create_connection, run_migrations};\n use std::path::Path;\n use wiremock::{MockServer, Mock, ResponseTemplate};\n use wiremock::matchers::{method, path_regex};\n\n fn test_config(mock_url: &str) -> Config {\n let mut config = Config::default();\n config.gitlab.url = mock_url.to_string();\n config.gitlab.token = \"test-token\".to_string();\n config\n }\n\n fn setup_db() -> rusqlite::Connection {\n let conn = create_connection(Path::new(\":memory:\")).unwrap();\n run_migrations(&conn).unwrap();\n // Insert test project\n conn.execute(\n \"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)\n VALUES (1, 'group/project', 'https://gitlab.example.com/group/project')\",\n [],\n ).unwrap();\n conn\n }\n\n #[tokio::test]\n async fn surgical_sync_single_issue_end_to_end() {\n let server = MockServer::start().await;\n // Mock: GET /projects/:id/issues?iids[]=7 returns one issue\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(\n serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])\n ))\n .mount(&server).await;\n // Mock discussions endpoint\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/discussions\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync_surgical(&config, options, Some(\"test01\"), &signal).await.unwrap();\n\n assert_eq!(result.surgical_mode, Some(true));\n assert_eq!(result.surgical_iids.as_ref().unwrap().issues, vec![7]);\n let entities = result.entity_results.as_ref().unwrap();\n assert_eq!(entities.len(), 1);\n assert_eq!(entities[0].outcome, \"synced\");\n }\n\n #[tokio::test]\n async fn preflight_only_returns_early() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n preflight_only: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync_surgical(&config, options, Some(\"test02\"), &signal).await.unwrap();\n\n assert_eq!(result.preflight_only, Some(true));\n assert_eq!(result.issues_updated, 0); // No actual ingest happened\n }\n\n #[tokio::test]\n async fn cancellation_before_ingest_fails_recorder() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n signal.cancel(); // Cancel before we start\n let result = run_sync_surgical(&config, options, Some(\"test03\"), &signal).await.unwrap();\n\n // Result should indicate cancellation\n assert_eq!(result.issues_updated, 0);\n }\n}\n```\n\n## Edge Cases\n\n- **Entity not found on GitLab**: Preflight returns 404 for an IID. Record `EntitySyncResult { outcome: \"not_found\" }` and continue with remaining IIDs.\n- **All entities skipped by TOCTOU**: Every entity's `updated_at` matches DB. Result has `entity_results` with all `skipped_toctou`, zero actual sync work.\n- **Mixed success/failure**: Some IIDs succeed, some fail. All recorded in `entity_results`. Function returns `Ok` with partial results, not `Err`.\n- **SyncRunRecorder consume semantics**: `succeed()` and `fail()` take `self` by value. The orchestrator must ensure exactly one terminal call. Use an `Option` pattern: `let mut recorder = Some(recorder); ... recorder.take().unwrap().succeed(...)`.\n- **Lock contention**: If another sync holds the lock and `force` is false, fail with clear error before any ingest.\n- **Empty IID lists**: If both `options.issues` and `options.merge_requests` are empty, return immediately with default `SyncResult` (no surgical fields set).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-wcja (SyncResult fields), bd-1lja (SyncOptions extensions), bd-159p (get_by_iid client methods), bd-3sez (surgical ingest/preflight/TOCTOU), bd-kanh (per-entity helpers), bd-arka (SyncRunRecorder surgical methods), bd-1elx (scoped embed), bd-hs6j (scoped docs), bd-tiux (migration 027)\n- **Blocks (downstream)**: bd-3bec (wiring into run_sync), bd-3jqx (integration tests)\n- This is the keystone bead — it consumes all upstream primitives and is consumed by the final wiring and integration test beads.","status":"open","priority":1,"issue_type":"task","created_at":"2026-02-17T19:17:24.197299Z","created_by":"tayloreernisse","updated_at":"2026-02-17T20:03:01.815253Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"],"dependencies":[{"issue_id":"bd-1i4i","depends_on_id":"bd-3bec","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} +{"id":"bd-1i4i","title":"Implement run_sync_surgical orchestration function","description":"## Background\n\nThe surgical sync pipeline needs a top-level orchestration function that coordinates the full pipeline for syncing specific IIDs. Unlike `run_sync` (lines 63-360 of `src/cli/commands/sync.rs`) which syncs all projects and all entities, `run_sync_surgical` targets specific issues/MRs by IID within a single project. The pipeline stages are: resolve project, record sync run, preflight fetch, check cancellation, acquire lock, ingest with TOCTOU guards, inline dependent enrichment (discussions, events, diffs), scoped doc regeneration, scoped embedding, finalize recorder, and build `SyncResult`.\n\n## Approach\n\nCreate `pub async fn run_sync_surgical()` in a new file `src/cli/commands/sync_surgical.rs`. Signature:\n\n```rust\npub async fn run_sync_surgical(\n config: &Config,\n options: SyncOptions,\n run_id: Option<&str>,\n signal: &ShutdownSignal,\n) -> Result\n```\n\nThe function reads `options.issues` and `options.merge_requests` (added by bd-1lja) to determine target IIDs. Pipeline:\n\n1. **Resolve project**: Call `resolve_project(conn, project_str)` from `src/core/project.rs` to get `gitlab_project_id`.\n2. **Start recorder**: `SyncRunRecorder::start(&recorder_conn, \"surgical-sync\", run_id)`. Note: `succeed()` and `fail()` consume `self`, so control flow must ensure exactly one terminal call.\n3. **Preflight fetch**: For each IID, call `get_issue_by_iid` / `get_mr_by_iid` (bd-159p) to confirm the entity exists on GitLab and capture `updated_at` for TOCTOU.\n4. **Check cancellation**: `if signal.is_cancelled() { recorder.fail(...); return Ok(result); }`\n5. **Acquire lock**: `AppLock::new(conn, LockOptions { name: \"surgical-sync\".into(), stale_lock_minutes: config.sync.stale_lock_minutes, heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds })`. Lock must `acquire(force)` and `release()` on all exit paths.\n6. **Ingest with TOCTOU**: For each preflight entity, call surgical ingest (bd-3sez). Compare DB `updated_at` with preflight `updated_at`; skip if already current. Record outcome in `EntitySyncResult`.\n7. **Inline dependents**: For ingested entities, fetch discussions, resource events (if `config.sync.fetch_resource_events`), MR diffs (if `config.sync.fetch_mr_file_changes`). Use `config.sync.requests_per_second` for rate limiting.\n8. **Scoped docs**: Call `run_generate_docs_for_sources()` (bd-hs6j) with only the affected entity source IDs.\n9. **Scoped embed**: Call `run_embed_for_document_ids()` (bd-1elx) with only the regenerated document IDs.\n10. **Finalize**: `recorder.succeed(conn, &metrics, total_items, total_errors)`.\n11. **Build SyncResult**: Populate surgical fields (bd-wcja): `surgical_mode: Some(true)`, `surgical_iids`, `entity_results`, `preflight_only`.\n\nIf `options.preflight_only` is set, return after step 3 with the preflight data and skip steps 4-10.\n\nProgress output uses `stage_spinner_v2(icon, label, msg, robot_mode)` from `src/cli/progress.rs` line 18 during execution, and `format_stage_line(icon, label, summary, elapsed)` from `src/cli/progress.rs` line 67 for completion lines. Stage icons via `Icons::sync()` from `src/cli/render.rs` line 208. Error completion uses `color_icon(icon, has_errors)` from `src/cli/commands/sync.rs` line 55.\n\n## Acceptance Criteria\n\n1. `run_sync_surgical` compiles and runs the full pipeline for 1+ issue IIDs\n2. Preflight-only mode returns early with fetched entity data, no DB writes beyond recorder\n3. TOCTOU: entities whose DB `updated_at` matches preflight `updated_at` are skipped with `skipped_toctou` outcome\n4. Cancellation at any stage between preflight and ingest stops processing, calls `recorder.fail()`\n5. Lock is acquired before ingest and released on all exit paths (success, error, cancellation)\n6. `SyncResult` surgical fields are populated: `surgical_mode`, `surgical_iids`, `entity_results`\n7. Robot mode produces valid JSON with per-entity outcomes\n8. Human mode shows stage spinners and completion lines\n\n## Files\n\n- `src/cli/commands/sync_surgical.rs` — new file, main orchestration function\n- `src/cli/commands/mod.rs` — add `pub mod sync_surgical;`\n\n## TDD Anchor\n\nTests in `src/cli/commands/sync_surgical.rs` or a companion `sync_surgical_tests.rs`:\n\n```rust\n#[cfg(test)]\nmod tests {\n use super::*;\n use crate::core::db::{create_connection, run_migrations};\n use std::path::Path;\n use wiremock::{MockServer, Mock, ResponseTemplate};\n use wiremock::matchers::{method, path_regex};\n\n fn test_config(mock_url: &str) -> Config {\n let mut config = Config::default();\n config.gitlab.url = mock_url.to_string();\n config.gitlab.token = \"test-token\".to_string();\n config\n }\n\n fn setup_db() -> rusqlite::Connection {\n let conn = create_connection(Path::new(\":memory:\")).unwrap();\n run_migrations(&conn).unwrap();\n // Insert test project\n conn.execute(\n \"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)\n VALUES (1, 'group/project', 'https://gitlab.example.com/group/project')\",\n [],\n ).unwrap();\n conn\n }\n\n #[tokio::test]\n async fn surgical_sync_single_issue_end_to_end() {\n let server = MockServer::start().await;\n // Mock: GET /projects/:id/issues?iids[]=7 returns one issue\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(\n serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])\n ))\n .mount(&server).await;\n // Mock discussions endpoint\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/discussions\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync_surgical(&config, options, Some(\"test01\"), &signal).await.unwrap();\n\n assert_eq!(result.surgical_mode, Some(true));\n assert_eq!(result.surgical_iids.as_ref().unwrap().issues, vec![7]);\n let entities = result.entity_results.as_ref().unwrap();\n assert_eq!(entities.len(), 1);\n assert_eq!(entities[0].outcome, \"synced\");\n }\n\n #[tokio::test]\n async fn preflight_only_returns_early() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n preflight_only: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync_surgical(&config, options, Some(\"test02\"), &signal).await.unwrap();\n\n assert_eq!(result.preflight_only, Some(true));\n assert_eq!(result.issues_updated, 0); // No actual ingest happened\n }\n\n #[tokio::test]\n async fn cancellation_before_ingest_fails_recorder() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n signal.cancel(); // Cancel before we start\n let result = run_sync_surgical(&config, options, Some(\"test03\"), &signal).await.unwrap();\n\n // Result should indicate cancellation\n assert_eq!(result.issues_updated, 0);\n }\n}\n```\n\n## Edge Cases\n\n- **Entity not found on GitLab**: Preflight returns 404 for an IID. Record `EntitySyncResult { outcome: \"not_found\" }` and continue with remaining IIDs.\n- **All entities skipped by TOCTOU**: Every entity's `updated_at` matches DB. Result has `entity_results` with all `skipped_toctou`, zero actual sync work.\n- **Mixed success/failure**: Some IIDs succeed, some fail. All recorded in `entity_results`. Function returns `Ok` with partial results, not `Err`.\n- **SyncRunRecorder consume semantics**: `succeed()` and `fail()` take `self` by value. The orchestrator must ensure exactly one terminal call. Use an `Option` pattern: `let mut recorder = Some(recorder); ... recorder.take().unwrap().succeed(...)`.\n- **Lock contention**: If another sync holds the lock and `force` is false, fail with clear error before any ingest.\n- **Empty IID lists**: If both `options.issues` and `options.merge_requests` are empty, return immediately with default `SyncResult` (no surgical fields set).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-wcja (SyncResult fields), bd-1lja (SyncOptions extensions), bd-159p (get_by_iid client methods), bd-3sez (surgical ingest/preflight/TOCTOU), bd-kanh (per-entity helpers), bd-arka (SyncRunRecorder surgical methods), bd-1elx (scoped embed), bd-hs6j (scoped docs), bd-tiux (migration 027)\n- **Blocks (downstream)**: bd-3bec (wiring into run_sync), bd-3jqx (integration tests)\n- This is the keystone bead — it consumes all upstream primitives and is consumed by the final wiring and integration test beads.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-17T19:17:24.197299Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:03:54.458047Z","closed_at":"2026-02-19T13:03:54.457868Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"],"dependencies":[{"issue_id":"bd-1i4i","depends_on_id":"bd-3bec","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} {"id":"bd-1j1","title":"Integration test: full Phase B sync pipeline","description":"## Background\n\nThis integration test proves the full Phase B sync pipeline works end-to-end. Since Gates 1 and 2 are already implemented and closed, this test validates that the complete pipeline — including Gate 4 mr_diffs draining — works together.\n\n## Codebase Context\n\n- **Gates 1-2 FULLY IMPLEMENTED (CLOSED):** resource events fetch, closes_issues API, system note parsing (note_parser.rs), entity_references extraction (references.rs)\n- **Gate 4 in progress:** migration 016 (mr_file_changes), fetch_mr_diffs, drain_mr_diffs — already wired in orchestrator (lines 708-726, 1514+)\n- **26 migrations exist** (001-026). LATEST_SCHEMA_VERSION = 26. In-memory DB must run all 26.\n- Orchestrator has drain_resource_events() (line 932), drain_mr_closes_issues() (line 1254), and drain_mr_diffs() (line 1514).\n- wiremock crate used in existing tests (check dev-dependencies in Cargo.toml)\n- src/core/dependent_queue.rs: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- IngestProjectResult and IngestMrProjectResult track counts for all drain phases\n\n## Approach\n\nCreate tests/phase_b_integration.rs:\n\n### Test Setup\n\n1. In-memory SQLite DB with all 26 migrations (001-026)\n2. wiremock mock server with:\n - /api/v4/projects/:id/issues — 2 test issues\n - /api/v4/projects/:id/merge_requests — 1 test MR\n - /api/v4/projects/:id/issues/:iid/resource_state_events — state events\n - /api/v4/projects/:id/issues/:iid/resource_label_events — label events\n - /api/v4/projects/:id/merge_requests/:iid/resource_state_events — merge event with source_merge_request_iid\n - /api/v4/projects/:id/merge_requests/:iid/closes_issues — linked issues\n - /api/v4/projects/:id/merge_requests/:iid/diffs — file changes\n - /api/v4/projects/:id/issues/:iid/discussions — discussion with system note \"mentioned in !1\"\n3. Config with fetch_resource_events=true and fetch_mr_file_changes=true\n4. Use dependent_concurrency=1 to avoid timing issues\n\n### Test Flow\n\n```rust\n#[tokio::test]\nasync fn test_full_phase_b_pipeline() {\n // 1. Set up mock server + DB with all 26 migrations\n // 2. Run ingest issues + MRs (orchestrator functions)\n // 3. Verify pending_dependent_fetches enqueued: resource_events, mr_closes_issues, mr_diffs\n // 4. Drain all dependent fetch queues\n // 5. Assert: resource_state_events populated (count > 0)\n // 6. Assert: resource_label_events populated (count > 0)\n // 7. Assert: entity_references has closes ref with source_method='api'\n // 8. Assert: entity_references has mentioned ref with source_method='note_parse'\n // 9. Assert: mr_file_changes populated from diffs API\n // 10. Assert: pending_dependent_fetches fully drained (no stuck locks)\n}\n```\n\n### Assertions (SQL)\n\n```sql\nSELECT COUNT(*) FROM resource_state_events -- > 0\nSELECT COUNT(*) FROM resource_label_events -- > 0\nSELECT COUNT(*) FROM entity_references WHERE reference_type = 'closes' AND source_method = 'api' -- >= 1\nSELECT COUNT(*) FROM entity_references WHERE source_method = 'note_parse' -- >= 1\nSELECT COUNT(*) FROM mr_file_changes -- > 0\nSELECT COUNT(*) FROM pending_dependent_fetches WHERE locked_at IS NOT NULL -- = 0\n```\n\n## Acceptance Criteria\n\n- [ ] Test creates DB with all 26 migrations, mocks, and runs full pipeline\n- [ ] resource_state_events and resource_label_events populated\n- [ ] entity_references has closes ref (source_method='api') and mentioned ref (source_method='note_parse')\n- [ ] mr_file_changes populated from diffs mock\n- [ ] pending_dependent_fetches fully drained (no stuck locks, no retryable jobs)\n- [ ] Test runs in < 10 seconds\n- [ ] `cargo test --test phase_b_integration` passes\n\n## Files\n\n- CREATE: tests/phase_b_integration.rs\n\n## TDD Anchor\n\nRED: Write test with all assertions — should pass if all Gates are wired correctly.\n\nGREEN: If anything fails, it indicates a missing orchestrator connection — fix the wiring.\n\nVERIFY: cargo test --test phase_b_integration -- --nocapture\n\n## Edge Cases\n\n- Paginated mock responses: include Link header for multi-page responses\n- Empty pages: verify graceful handling\n- Use dependent_concurrency=1 to avoid timing issues in test environment\n- Stale lock reclaim: test that locks older than stale_lock_minutes are reclaimed\n- If Gate 4 drain_mr_diffs is not fully wired yet, the mr_file_changes assertion will fail — this is the intended RED signal\n\n## Dependency Context\n\n- **bd-8t4 (resource_state_events extraction)**: CLOSED. Provides drain_resource_events() which populates resource_state_events and resource_label_events tables.\n- **bd-3ia (closes_issues)**: CLOSED. Provides drain_mr_closes_issues() which populates entity_references with reference_type='closes', source_method='api'.\n- **bd-1ji (note parsing)**: CLOSED. Provides note_parser.rs which extracts \"mentioned in !N\" patterns and stores as entity_references with source_method='note_parse'.\n- **dependent_queue.rs**: Provides the claim/complete/fail lifecycle. All three drain functions use this.\n- **orchestrator.rs**: Contains all drain functions. drain_mr_diffs() at line 1514+ populates mr_file_changes.","status":"open","priority":3,"issue_type":"task","created_at":"2026-02-02T22:42:26.355071Z","created_by":"tayloreernisse","updated_at":"2026-02-17T16:52:30.970742Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1j1","depends_on_id":"bd-1ji","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-1j1","depends_on_id":"bd-1se","type":"parent-child","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-1j1","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-1j1","depends_on_id":"bd-8t4","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} {"id":"bd-1j5o","title":"Verification: quality gates, query plan check, real-world validation","description":"## Background\n\nPost-implementation verification checkpoint. Runs after all code beads complete to validate the full scoring model works correctly against real data, not just test fixtures.\n\n## Approach\n\nExecute 8 verification steps in order. Each step has a binary pass/fail outcome.\n\n### Step 1: Compiler check\n```bash\ncargo check --all-targets\n```\nPass: exit 0\n\n### Step 2: Clippy\n```bash\ncargo clippy --all-targets -- -D warnings\n```\nPass: exit 0\n\n### Step 3: Formatting\n```bash\ncargo fmt --check\n```\nPass: exit 0\n\n### Step 4: Test suite\n```bash\ncargo test -p lore\n```\nPass: all tests green, including 31 new decay/scoring tests\n\n### Step 5: UBS scan\n```bash\nubs src/cli/commands/who.rs src/core/config.rs src/core/db.rs\n```\nPass: exit 0\n\n### Step 6: Query plan verification (manual)\nRun against real database:\n```bash\ncargo run --release -- who --path MeasurementQualityDialog.tsx -vvv 2>&1 | grep -i \"query plan\"\n```\nOr use sqlite3 CLI with EXPLAIN QUERY PLAN on the expert SQL (both exact and prefix modes).\n\nPass criteria (6 checks):\n- matched_notes_raw branch 1 uses existing new_path index\n- matched_notes_raw branch 2 uses idx_notes_old_path_author\n- matched_file_changes_raw uses idx_mfc_new_path_project_mr and idx_mfc_old_path_project_mr\n- reviewer_participation uses idx_notes_diffnote_discussion_author\n- mr_activity CTE joins merge_requests via primary key from matched_file_changes\n- Path resolution probes (old_path leg) use idx_notes_old_path_project_created\nDocument observed plan as SQL comment near the CTE.\n\n### Step 7: Performance baseline (manual)\n```bash\ntime cargo run --release -- who --path MeasurementQualityDialog.tsx\ntime cargo run --release -- who --path src/\ntime cargo run --release -- who --path Dialog.tsx\n```\nPass criteria (soft SLOs):\n- Exact path: p95 < 200ms\n- Prefix: p95 < 300ms\n- Suffix: p95 < 500ms\nRecord timings as SQL comment for future regression reference.\n\n### Step 8: Real-world validation\n```bash\ncargo run --release -- who --path MeasurementQualityDialog.tsx\ncargo run --release -- who --path MeasurementQualityDialog.tsx --explain-score\ncargo run --release -- who --path MeasurementQualityDialog.tsx --as-of 2025-06-01\ncargo run --release -- who --path MeasurementQualityDialog.tsx --all-history\n```\nPass criteria:\n- [ ] Recency discounting visible (recent authors rank above old reviewers)\n- [ ] --explain-score components sum to total (within f64 tolerance)\n- [ ] --as-of produces identical results on repeated runs\n- [ ] Assigned-only reviewers rank below participated reviewers on same MR\n- [ ] Known renamed file path resolves and credits old expertise\n- [ ] LGTM-only reviewers classified as assigned-only\n- [ ] Closed MRs at ~50% contribution visible via --explain-score\n\n## Acceptance Criteria\n- [ ] Steps 1-5 pass (exit 0)\n- [ ] Step 6: query plan documented with all 6 index usage points confirmed\n- [ ] Step 7: timing baselines recorded\n- [ ] Step 8: all 7 real-world checks pass\n\n## Files\n- All files modified by child beads (read-only verification)\n- Add SQL comments near CTE with observed EXPLAIN QUERY PLAN output\n\n## Edge Cases\n- SQLite planner may choose different plans across versions — document version\n- Timing varies by hardware — record machine specs alongside baselines\n- Real DB may have NULL merged_at on old MRs — state-aware fallback handles this","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-09T17:00:59.287720Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.415816Z","closed_at":"2026-02-12T20:43:04.415772Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"],"dependencies":[{"issue_id":"bd-1j5o","depends_on_id":"bd-1b50","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-1j5o","depends_on_id":"bd-1vti","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} {"id":"bd-1je","title":"Implement pending discussion queue","description":"## Background\nThe pending discussion queue tracks discussions that need to be fetched from GitLab. When an issue or MR is updated, its discussions may need re-fetching. This queue is separate from dirty_sources (which tracks entities needing document regeneration) — it tracks entities needing API calls to GitLab. The queue uses the same backoff pattern as dirty_sources for consistency.\n\n## Approach\nCreate `src/ingestion/discussion_queue.rs`:\n\n```rust\nuse crate::core::backoff::compute_next_attempt_at;\n\n/// Noteable type for discussion queue.\n#[derive(Debug, Clone, Copy)]\npub enum NoteableType {\n Issue,\n MergeRequest,\n}\n\nimpl NoteableType {\n pub fn as_str(&self) -> &'static str {\n match self {\n Self::Issue => \"Issue\",\n Self::MergeRequest => \"MergeRequest\",\n }\n }\n}\n\npub struct PendingFetch {\n pub project_id: i64,\n pub noteable_type: NoteableType,\n pub noteable_iid: i64,\n pub attempt_count: i32,\n}\n\n/// Queue a discussion fetch. ON CONFLICT DO UPDATE resets backoff (consistent with dirty_sources).\npub fn queue_discussion_fetch(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n) -> Result<()>;\n\n/// Get next batch of pending fetches (WHERE next_attempt_at IS NULL OR <= now).\npub fn get_pending_fetches(conn: &Connection, limit: usize) -> Result>;\n\n/// Mark fetch complete (remove from queue).\npub fn complete_fetch(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n) -> Result<()>;\n\n/// Record fetch error with backoff.\npub fn record_fetch_error(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n error: &str,\n) -> Result<()>;\n```\n\n## Acceptance Criteria\n- [ ] queue_discussion_fetch uses ON CONFLICT DO UPDATE (consistent with dirty_sources pattern)\n- [ ] Re-queuing resets: attempt_count=0, next_attempt_at=NULL, last_error=NULL\n- [ ] get_pending_fetches respects next_attempt_at backoff\n- [ ] get_pending_fetches returns entries ordered by queued_at ASC\n- [ ] complete_fetch removes entry from queue\n- [ ] record_fetch_error increments attempt_count, computes next_attempt_at via shared backoff\n- [ ] NoteableType.as_str() returns \"Issue\" or \"MergeRequest\" (matches DB CHECK constraint)\n- [ ] `cargo test discussion_queue` passes\n\n## Files\n- `src/ingestion/discussion_queue.rs` — new file\n- `src/ingestion/mod.rs` — add `pub mod discussion_queue;`\n\n## TDD Loop\nRED: Tests in `#[cfg(test)] mod tests`:\n- `test_queue_and_get` — queue entry, get returns it\n- `test_requeue_resets_backoff` — queue, error, re-queue -> attempt_count=0\n- `test_backoff_respected` — entry with future next_attempt_at not returned\n- `test_complete_removes` — complete_fetch removes entry\n- `test_error_increments_attempts` — error -> attempt_count=1, next_attempt_at set\nGREEN: Implement all functions\nVERIFY: `cargo test discussion_queue`\n\n## Edge Cases\n- Queue same (project_id, noteable_type, noteable_iid) twice: ON CONFLICT resets state\n- NoteableType must match DB CHECK constraint exactly (\"Issue\", \"MergeRequest\" — capitalized)\n- Empty queue: get_pending_fetches returns empty Vec","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:27:09.505548Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:31:35.496454Z","closed_at":"2026-01-30T17:31:35.496405Z","close_reason":"Implemented discussion_queue with queue/get/complete/record_error + 6 tests","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1je","depends_on_id":"bd-hrs","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1je","depends_on_id":"bd-mem","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} @@ -61,7 +61,7 @@ {"id":"bd-1n5q","title":"lore brief: situational awareness for topic/module/person","description":"## Background\nComposable capstone command. An agent says \"I am about to work on auth\" and gets everything in one call: open issues, active MRs, experts, recent activity, unresolved threads, related context. Replaces 5 separate lore calls with 1.\n\n## Input Modes\n1. Topic: `lore brief 'authentication'` — FTS search to find relevant entities, aggregate\n2. Path: `lore brief --path src/auth/` — who expert internals for path expertise\n3. Person: `lore brief --person teernisse` — who workload internals\n4. Entity: `lore brief issues 3864` — single entity focus with cross-references\n\n## Section Assembly Architecture\n\n### Reuse existing run_* functions (ship faster, recommended for v1)\nEach section calls existing CLI command functions and converts their output.\n\nIMPORTANT: All existing run_* functions take `&Config`, NOT `&Connection`. The Config contains the db_path and each function opens its own connection internally.\n\n```rust\n// In src/cli/commands/brief.rs\n\nuse crate::cli::commands::list::{run_list_issues, run_list_mrs, ListFilters, MrListFilters};\nuse crate::cli::commands::who::{run_who, WhoArgs, WhoMode};\nuse crate::core::config::Config;\n\npub async fn run_brief(config: &Config, args: BriefArgs) -> Result {\n let mut sections_computed = Vec::new();\n\n // 1. open_issues: reuse list.rs\n // Signature: pub fn run_list_issues(config: &Config, filters: ListFilters) -> Result\n // Located at src/cli/commands/list.rs:268\n let open_issues = run_list_issues(config, ListFilters {\n state: Some(\"opened\".into()),\n limit: Some(5),\n project: args.project.clone(),\n // ... scope by topic/path/person based on mode\n ..Default::default()\n })?;\n sections_computed.push(\"open_issues\");\n\n // 2. active_mrs: reuse list.rs\n // Signature: pub fn run_list_mrs(config: &Config, filters: MrListFilters) -> Result\n // Located at src/cli/commands/list.rs:476\n let active_mrs = run_list_mrs(config, MrListFilters {\n state: Some(\"opened\".into()),\n limit: Some(5),\n project: args.project.clone(),\n ..Default::default()\n })?;\n sections_computed.push(\"active_mrs\");\n\n // 3. experts: reuse who.rs\n // Signature: pub fn run_who(config: &Config, args: &WhoArgs) -> Result\n // Located at src/cli/commands/who.rs:276\n let experts = run_who(config, &WhoArgs {\n mode: WhoMode::Expert,\n path: args.path.clone(),\n limit: Some(3),\n ..Default::default()\n })?;\n sections_computed.push(\"experts\");\n\n // 4. recent_activity: reuse timeline internals\n // The timeline pipeline is 5-stage (SEED->HYDRATE->EXPAND->COLLECT->RENDER)\n // Types in src/core/timeline.rs, seed in src/core/timeline_seed.rs\n // ...etc\n}\n```\n\nNOTE: ListFilters and MrListFilters may not implement Default. Check before using `..Default::default()`. If they don't, derive it or construct all fields explicitly.\n\n### Concrete Function References (src/cli/commands/)\n| Module | Function | Signature | Line |\n|--------|----------|-----------|------|\n| list.rs | run_list_issues | `(config: &Config, filters: ListFilters) -> Result` | 268 |\n| list.rs | run_list_mrs | `(config: &Config, filters: MrListFilters) -> Result` | 476 |\n| who.rs | run_who | `(config: &Config, args: &WhoArgs) -> Result` | 276 |\n| search.rs | run_search | `(config: &Config, query: &str, cli_filters: SearchCliFilters, fts_mode: FtsQueryMode, requested_mode: &str, explain: bool) -> Result` | 61 |\n\nNOTE: run_search is currently synchronous (pub fn, not pub async fn). If bd-1ksf ships first, it becomes async. Brief should handle both cases — call `.await` if async, direct call if sync.\n\n### Section Details\n| Section | Source | Limit | Fallback |\n|---------|--------|-------|----------|\n| open_issues | list.rs with state=opened | 5 | empty array |\n| active_mrs | list.rs with state=opened | 5 | empty array |\n| experts | who.rs Expert mode | 3 | empty array (no path data) |\n| recent_activity | timeline pipeline | 10 events | empty array |\n| unresolved_threads | SQL: discussions WHERE resolved=false | 5 | empty array |\n| related | search_vector() via bd-8con | 5 | omit section (no embeddings) |\n| warnings | computed from dates/state | all | empty array |\n\n### Warning Generation\n```rust\nfn compute_warnings(issues: &[IssueRow]) -> Vec {\n let now = chrono::Utc::now();\n issues.iter().filter_map(|i| {\n let updated = parse_timestamp(i.updated_at)?;\n let days_stale = (now - updated).num_days();\n if days_stale > 30 {\n Some(format!(\"Issue #{} has no activity for {} days\", i.iid, days_stale))\n } else { None }\n }).chain(\n issues.iter().filter(|i| i.assignees.is_empty())\n .map(|i| format!(\"Issue #{} is unassigned\", i.iid))\n ).collect()\n}\n```\n\n## Robot Mode Output Schema\n```json\n{\n \"ok\": true,\n \"data\": {\n \"mode\": \"topic\",\n \"query\": \"authentication\",\n \"summary\": \"3 open issues, 2 active MRs, top expert: teernisse\",\n \"open_issues\": [{ \"iid\": 123, \"title\": \"...\", \"state\": \"opened\", \"assignees\": [...], \"updated_at\": \"...\", \"labels\": [...] }],\n \"active_mrs\": [{ \"iid\": 456, \"title\": \"...\", \"state\": \"opened\", \"author\": \"...\", \"draft\": false, \"updated_at\": \"...\" }],\n \"experts\": [{ \"username\": \"teernisse\", \"score\": 42, \"last_activity\": \"...\" }],\n \"recent_activity\": [{ \"timestamp\": \"...\", \"event_type\": \"state_change\", \"entity_ref\": \"issues#123\", \"summary\": \"...\", \"actor\": \"...\" }],\n \"unresolved_threads\": [{ \"discussion_id\": \"abc\", \"entity_ref\": \"issues#123\", \"started_by\": \"...\", \"note_count\": 5, \"last_note_at\": \"...\" }],\n \"related\": [{ \"iid\": 789, \"title\": \"...\", \"similarity_score\": 0.85 }],\n \"warnings\": [\"Issue #3800 has no activity for 45 days\"]\n },\n \"meta\": { \"elapsed_ms\": 1200, \"sections_computed\": [\"open_issues\", \"active_mrs\", \"experts\", \"recent_activity\"] }\n}\n```\n\n## Clap Registration\n```rust\n// In src/main.rs Commands enum, add:\nBrief {\n /// Free-text topic, entity type, or omit for project-wide brief\n query: Option,\n /// Focus on a file path (who expert mode)\n #[arg(long)]\n path: Option,\n /// Focus on a person (who workload mode)\n #[arg(long)]\n person: Option,\n /// Scope to project (fuzzy match)\n #[arg(short, long)]\n project: Option,\n /// Maximum items per section\n #[arg(long, default_value = \"5\")]\n section_limit: usize,\n},\n```\n\n## TDD Loop\nRED: Tests in src/cli/commands/brief.rs:\n- test_brief_topic_returns_all_sections: insert test data, search 'auth', assert all section keys present in response\n- test_brief_path_uses_who_expert: brief --path src/auth/, assert experts section populated\n- test_brief_person_uses_who_workload: brief --person user, assert open_issues filtered to user's assignments\n- test_brief_warnings_stale_issue: insert issue with updated_at > 30 days ago, assert warning generated\n- test_brief_token_budget: robot mode output for topic query is under 12000 bytes (~3000 tokens)\n- test_brief_no_embeddings_graceful: related section omitted (not errored) when no embeddings exist\n- test_brief_empty_topic: zero matches returns valid JSON with empty arrays + \"No data found\" summary\n\nGREEN: Implement brief with section assembly, calling existing run_* functions\n\nVERIFY:\n```bash\ncargo test brief:: && cargo clippy --all-targets -- -D warnings\ncargo run --release -- -J brief 'throw time' | jq '.data | keys'\ncargo run --release -- -J brief 'throw time' | wc -c # target <12000\n```\n\n## Acceptance Criteria\n- [ ] lore brief TOPIC returns all sections for free-text topic\n- [ ] lore brief --path PATH returns path-focused briefing with experts\n- [ ] lore brief --person USERNAME returns person-focused briefing\n- [ ] lore brief issues N returns entity-focused briefing\n- [ ] Robot mode output under 12000 bytes (~3000 tokens)\n- [ ] Each section degrades gracefully if its data source is unavailable\n- [ ] summary field is auto-generated one-liner from section counts\n- [ ] warnings detect: stale issues (>30d), unassigned, no due date\n- [ ] Performance: <2s total (acceptable since composing multiple queries)\n- [ ] Command registered in main.rs and robot-docs\n\n## Edge Cases\n- Topic with zero matches: return empty sections + \"No data found for this topic\" summary\n- Path that nobody has touched: experts empty, related may still have results\n- Person not found in DB: exit code 17 with suggestion\n- All sections empty: still return valid JSON with empty arrays\n- Very broad topic (\"the\"): may return too many results — each section respects its limit cap\n- ListFilters/MrListFilters may not derive Default — construct all fields explicitly if needed\n\n## Dependencies\n- Hybrid search (bd-1ksf) for topic relevance ranking\n- lore who (already shipped) for expertise\n- lore related (bd-8con) for semantic connections (BLOCKER — related section is core to the feature)\n- Timeline pipeline (already shipped) for recent activity\n\n## Dependency Context\n- **bd-1ksf (hybrid search)**: Provides `search_hybrid()` which brief uses for topic mode to find relevant entities. Without it, topic mode falls back to FTS-only via `search_fts()`.\n- **bd-8con (related)**: Provides `run_related()` which brief calls to populate the `related` section with semantically similar entities. This is a blocking dependency — the related section is a core differentiator.\n\n## Files to Create/Modify\n- NEW: src/cli/commands/brief.rs\n- src/cli/commands/mod.rs (add pub mod brief; re-export)\n- src/main.rs (register Brief subcommand in Commands enum, add handle_brief fn)\n- Reuse: list.rs, who.rs, timeline.rs, search.rs, show.rs internals","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-12T15:47:22.893231Z","created_by":"tayloreernisse","updated_at":"2026-02-12T16:31:33.752020Z","compaction_level":0,"original_size":0,"labels":["cli-imp","intelligence"],"dependencies":[{"issue_id":"bd-1n5q","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1n5q","depends_on_id":"bd-1ksf","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1n5q","depends_on_id":"bd-8con","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-1nf","title":"Register 'lore timeline' command with all flags","description":"## Background\n\nThis bead wires the `lore timeline` command into the CLI — adding the subcommand to the Commands enum, defining all flags, registering in VALID_COMMANDS, and dispatching to the timeline handler. The actual query logic and rendering are in separate beads.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.1 (Command Design).\n\n## Codebase Context\n\n- Commands enum in `src/cli/mod.rs` (line ~86): uses #[derive(Subcommand)] with nested Args structs\n- VALID_COMMANDS in `src/main.rs` (line ~448): &[&str] array for fuzzy command matching\n- Handler dispatch in `src/main.rs` match on Commands:: variants\n- robot-docs manifest in `src/main.rs`: registers commands for `lore robot-docs` output\n- Existing pattern: `Sync(SyncArgs)`, `Search(SearchArgs)`, etc.\n- No timeline module exists yet — this bead creates the CLI entry point only\n\n## Approach\n\n### 1. TimelineArgs struct (`src/cli/mod.rs`):\n\n```rust\n/// Show a chronological timeline of events matching a query\n#[derive(Parser, Debug)]\npub struct TimelineArgs {\n /// Search query (keywords to find in issues, MRs, and discussions)\n pub query: String,\n\n /// Scope to a specific project (fuzzy match)\n #[arg(short = 'p', long)]\n pub project: Option,\n\n /// Only show events after this date (e.g. \"6m\", \"2w\", \"2024-01-01\")\n #[arg(long)]\n pub since: Option,\n\n /// Cross-reference expansion depth (0 = no expansion)\n #[arg(long, default_value = \"1\")]\n pub depth: usize,\n\n /// Also follow 'mentioned' edges during expansion (high fan-out)\n #[arg(long = \"expand-mentions\")]\n pub expand_mentions: bool,\n\n /// Maximum number of events to display\n #[arg(short = 'n', long = \"limit\", default_value = \"100\")]\n pub limit: usize,\n}\n```\n\n### 2. Commands enum variant:\n\n```rust\n/// Show a chronological timeline of events matching a query\n#[command(name = \"timeline\")]\nTimeline(TimelineArgs),\n```\n\n### 3. Handler in `src/main.rs`:\n\n```rust\nCommands::Timeline(args) => {\n // Placeholder: will be filled by bd-2f2 (human) and bd-dty (robot)\n // For now: resolve project, call timeline query, dispatch to renderer\n}\n```\n\n### 4. VALID_COMMANDS: add `\"timeline\"` to the array\n\n### 5. robot-docs: add timeline command description to manifest\n\n## Acceptance Criteria\n\n- [ ] `TimelineArgs` struct with all 6 flags: query, project, since, depth, expand-mentions, limit\n- [ ] Commands::Timeline variant registered in Commands enum\n- [ ] Handler stub in src/main.rs dispatches to timeline logic\n- [ ] `\"timeline\"` added to VALID_COMMANDS array\n- [ ] robot-docs manifest includes timeline command description\n- [ ] `lore timeline --help` shows correct help text\n- [ ] `lore timeline` without query shows error (query is required positional)\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/cli/mod.rs` (TimelineArgs struct + Commands::Timeline variant)\n- `src/main.rs` (handler dispatch + VALID_COMMANDS + robot-docs entry)\n\n## TDD Loop\n\nNo unit tests for CLI wiring. Verify with:\n\n```bash\ncargo check --all-targets\ncargo run -- timeline --help\n```\n\n## Edge Cases\n\n- --since parsing: reuse existing date parsing from ListFilters (src/cli/mod.rs handles \"7d\", \"2w\", \"YYYY-MM-DD\")\n- --depth 0: valid, means no cross-reference expansion\n- --expand-mentions: off by default because mentioned edges have high fan-out\n","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:28.422082Z","created_by":"tayloreernisse","updated_at":"2026-02-06T13:49:15.313047Z","closed_at":"2026-02-06T13:49:15.312993Z","close_reason":"Wired lore timeline command: TimelineArgs with 9 flags, Commands::Timeline variant, handle_timeline handler, VALID_COMMANDS entry, robot-docs manifest with temporal_intelligence workflow","compaction_level":0,"original_size":0,"labels":["cli","gate-3","phase-b"],"dependencies":[{"issue_id":"bd-1nf","depends_on_id":"bd-2f2","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1nf","depends_on_id":"bd-dty","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1nf","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-1np","title":"[CP1] GitLab types for issues, discussions, notes","description":"## Background\n\nGitLab types define the Rust structs for deserializing GitLab API responses. These types are the foundation for all ingestion work - issues, discussions, and notes must be correctly typed for serde to parse them.\n\n## Approach\n\nAdd types to `src/gitlab/types.rs` with serde derives:\n\n### GitLabIssue\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabIssue {\n pub id: i64, // GitLab global ID\n pub iid: i64, // Project-scoped issue number\n pub project_id: i64,\n pub title: String,\n pub description: Option,\n pub state: String, // \"opened\" | \"closed\"\n pub created_at: String, // ISO 8601\n pub updated_at: String, // ISO 8601\n pub closed_at: Option,\n pub author: GitLabAuthor,\n pub labels: Vec, // Array of label names (CP1 canonical)\n pub web_url: String,\n}\n```\n\nNOTE: `labels_details` intentionally NOT modeled - varies across GitLab versions.\n\n### GitLabAuthor\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabAuthor {\n pub id: i64,\n pub username: String,\n pub name: String,\n}\n```\n\n### GitLabDiscussion\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabDiscussion {\n pub id: String, // String ID like \"6a9c1750b37d...\"\n pub individual_note: bool, // true = standalone comment\n pub notes: Vec,\n}\n```\n\n### GitLabNote\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabNote {\n pub id: i64,\n #[serde(rename = \"type\")]\n pub note_type: Option, // \"DiscussionNote\" | \"DiffNote\" | null\n pub body: String,\n pub author: GitLabAuthor,\n pub created_at: String, // ISO 8601\n pub updated_at: String, // ISO 8601\n pub system: bool, // true for system-generated notes\n #[serde(default)]\n pub resolvable: bool,\n #[serde(default)]\n pub resolved: bool,\n pub resolved_by: Option,\n pub resolved_at: Option,\n pub position: Option,\n}\n```\n\n### GitLabNotePosition\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabNotePosition {\n pub old_path: Option,\n pub new_path: Option,\n pub old_line: Option,\n pub new_line: Option,\n}\n```\n\n## Acceptance Criteria\n\n- [ ] GitLabIssue deserializes from API response JSON\n- [ ] GitLabAuthor embedded correctly in issue and note\n- [ ] GitLabDiscussion with notes array deserializes\n- [ ] GitLabNote handles null note_type (use Option)\n- [ ] GitLabNote uses #[serde(rename = \"type\")] for reserved keyword\n- [ ] resolvable/resolved default to false via #[serde(default)]\n- [ ] All timestamp fields are String (ISO 8601 parsed elsewhere)\n\n## Files\n\n- src/gitlab/types.rs (edit - add types)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/gitlab_types_tests.rs\n#[test] fn deserializes_gitlab_issue_from_json()\n#[test] fn deserializes_gitlab_discussion_from_json()\n#[test] fn handles_null_note_type()\n#[test] fn handles_missing_resolvable_field()\n#[test] fn deserializes_labels_as_string_array()\n```\n\nGREEN: Add type definitions with serde attributes\n\nVERIFY: `cargo test gitlab_types`\n\n## Edge Cases\n\n- note_type can be null, \"DiscussionNote\", or \"DiffNote\"\n- labels array can be empty\n- description can be null\n- resolved_by/resolved_at can be null\n- position is only present for DiffNotes","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.150472Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:17:08.842965Z","closed_at":"2026-01-25T22:17:08.842895Z","close_reason":"Implemented GitLabAuthor, GitLabIssue, GitLabDiscussion, GitLabNote, GitLabNotePosition types with 10 passing tests","compaction_level":0,"original_size":0} -{"id":"bd-1nsl","title":"Epic: Surgical Per-IID Sync","status":"open","priority":1,"issue_type":"epic","created_at":"2026-02-17T19:11:34.020453Z","created_by":"tayloreernisse","updated_at":"2026-02-17T19:11:34.023031Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} +{"id":"bd-1nsl","title":"Epic: Surgical Per-IID Sync","status":"closed","priority":1,"issue_type":"epic","created_at":"2026-02-17T19:11:34.020453Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:47:02.942871Z","closed_at":"2026-02-19T13:47:02.942827Z","close_reason":"All child beads completed: bd-wcja (SyncResult fields), bd-1elx (SurgicalTarget types), bd-hs6j (TOCTOU guard), bd-3sez (preflight_fetch), bd-arka (ingest_issue_by_iid), bd-tiux (ingest_mr_by_iid), bd-1lja (SyncOptions extensions), bd-159p (dirty_tracker), bd-1sc6 (SyncRunRecorder), bd-kanh (enrichment helpers), bd-1i4i (orchestrator), bd-3bec (dispatch wiring). 886 tests pass.","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} {"id":"bd-1o1","title":"OBSERV: Add -v/--verbose and --log-format CLI flags","description":"## Background\nUsers and agents need CLI-controlled verbosity without knowing RUST_LOG syntax. The -v flag convention (cargo, curl, ssh) is universally understood. --log-format json enables lore sync 2>&1 | jq workflows without reading log files.\n\n## Approach\nAdd two new global flags to the Cli struct in src/cli/mod.rs (insert after the quiet field at line ~37):\n\n```rust\n/// Increase log verbosity (-v, -vv, -vvv)\n#[arg(short = 'v', long = \"verbose\", action = clap::ArgAction::Count, global = true)]\npub verbose: u8,\n\n/// Log format for stderr output: text (default) or json\n#[arg(long = \"log-format\", global = true, value_parser = [\"text\", \"json\"], default_value = \"text\")]\npub log_format: String,\n```\n\nThe existing Cli struct (src/cli/mod.rs:13-42) has these global flags: config, robot, json, color, quiet. The new flags follow the same pattern.\n\nNote: clap::ArgAction::Count allows -v, -vv, -vvv as a single flag with increasing count (0, 1, 2, 3).\n\n## Acceptance Criteria\n- [ ] lore -v sync parses without error (verbose=1)\n- [ ] lore -vv sync parses (verbose=2)\n- [ ] lore -vvv sync parses (verbose=3)\n- [ ] lore --log-format json sync parses (log_format=\"json\")\n- [ ] lore --log-format text sync parses (default)\n- [ ] lore --log-format xml sync errors (invalid value)\n- [ ] Existing commands unaffected (verbose defaults to 0, log_format to \"text\")\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/cli/mod.rs (modify Cli struct, lines 13-42)\n\n## TDD Loop\nRED: Write test that parses Cli with -v flag and asserts verbose=1\nGREEN: Add the two fields to Cli struct\nVERIFY: cargo test -p lore && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- -v and -q together: both parse fine; conflict resolution happens in subscriber setup (bd-2rr), not here\n- -v flag must be global=true so it works before and after subcommands: lore -v sync AND lore sync -v\n- --log-format is a string, not enum, to keep Cli struct simple","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-04T15:53:55.421339Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:10:22.585947Z","closed_at":"2026-02-04T17:10:22.585905Z","close_reason":"Added -v/--verbose (count) and --log-format (text|json) global CLI flags","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-1o1","depends_on_id":"bd-2nx","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-1o4h","title":"OBSERV: Define StageTiming struct in src/core/metrics.rs","description":"## Background\nStageTiming is the materialized view of span timing data. It's the data structure that flows through robot JSON output, sync_runs.metrics_json, and the human-readable timing summary. Defined in a new file because it's genuinely new functionality that doesn't fit existing modules.\n\n## Approach\nCreate src/core/metrics.rs:\n\n```rust\nuse serde::Serialize;\n\nfn is_zero(v: &usize) -> bool { *v == 0 }\n\n#[derive(Debug, Clone, Serialize)]\npub struct StageTiming {\n pub name: String,\n #[serde(skip_serializing_if = \"Option::is_none\")]\n pub project: Option,\n pub elapsed_ms: u64,\n pub items_processed: usize,\n #[serde(skip_serializing_if = \"is_zero\")]\n pub items_skipped: usize,\n #[serde(skip_serializing_if = \"is_zero\")]\n pub errors: usize,\n #[serde(skip_serializing_if = \"Vec::is_empty\")]\n pub sub_stages: Vec,\n}\n```\n\nRegister module in src/core/mod.rs (line ~11, add):\n```rust\npub mod metrics;\n```\n\nThe is_zero helper is a private function used by serde's skip_serializing_if. It must take &usize (reference) and return bool.\n\n## Acceptance Criteria\n- [ ] StageTiming serializes to JSON matching PRD Section 4.6.2 example\n- [ ] items_skipped omitted when 0\n- [ ] errors omitted when 0\n- [ ] sub_stages omitted when empty vec\n- [ ] project omitted when None\n- [ ] name, elapsed_ms, items_processed always present\n- [ ] Struct is Debug + Clone + Serialize\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/metrics.rs (new file)\n- src/core/mod.rs (register module, add line after existing pub mod declarations)\n\n## TDD Loop\nRED:\n - test_stage_timing_serialization: create StageTiming with sub_stages, serialize, assert JSON structure\n - test_stage_timing_zero_fields_omitted: errors=0, items_skipped=0, assert no \"errors\" or \"items_skipped\" keys\n - test_stage_timing_empty_sub_stages: sub_stages=vec![], assert no \"sub_stages\" key\nGREEN: Create metrics.rs with StageTiming struct and is_zero helper\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- is_zero must be a function, not a closure (serde skip_serializing_if requires a function path)\n- Vec::is_empty is a method on Vec, and serde accepts \"Vec::is_empty\" as a path for skip_serializing_if\n- Recursive StageTiming (sub_stages contains StageTiming): serde handles this naturally, no special handling needed","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-04T15:54:31.907234Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:21:40.915842Z","closed_at":"2026-02-04T17:21:40.915794Z","close_reason":"Created src/core/metrics.rs with StageTiming struct, serde skip_serializing_if for zero/empty fields, 5 tests","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-1o4h","depends_on_id":"bd-3er","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-1oi7","title":"NOTE-2A: Schema migration for note documents (migration 024)","description":"## Background\nThe documents and dirty_sources tables have CHECK constraints limiting source_type to ('issue', 'merge_request', 'discussion'). Need to add 'note' as valid source_type. SQLite doesn't support ALTER CONSTRAINT, so use the table-rebuild pattern. Uses migration slot 024 (022 = query indexes, 023 = issue_detail_fields already exists).\n\n## Approach\nCreate migrations/024_note_documents.sql:\n\n1. Rebuild dirty_sources: CREATE dirty_sources_new with CHECK adding 'note', INSERT SELECT, DROP old, RENAME.\n2. Rebuild documents (complex — must preserve FTS consistency):\n - Save junction table data (_doc_labels_backup, _doc_paths_backup)\n - Drop FTS triggers (documents_ai, documents_ad, documents_au — defined in migration 008_fts5.sql)\n - Drop junction tables (document_labels, document_paths — defined in migration 007_documents.sql)\n - Create documents_new with updated CHECK adding 'note'\n - INSERT INTO documents_new SELECT * FROM documents (preserves rowids for FTS)\n - Drop documents, rename new\n - Recreate all indexes (idx_documents_project_updated, idx_documents_author, idx_documents_source, idx_documents_content_hash — see migration 007_documents.sql for definitions)\n - Recreate junction tables + restore data from backups\n - Recreate FTS triggers (see migration 008_fts5.sql for trigger SQL)\n - INSERT INTO documents_fts(documents_fts) VALUES('rebuild')\n3. Defense-in-depth triggers:\n - notes_ad_cleanup: AFTER DELETE ON notes WHEN old.is_system = 0 → delete doc + dirty_sources for source_type='note', source_id=old.id\n - notes_au_system_cleanup: AFTER UPDATE OF is_system ON notes WHEN NEW.is_system = 1 AND OLD.is_system = 0 → delete doc + dirty_sources\n4. Drop temp backup tables\n\nRegister as (\"024\", include_str!(\"../../migrations/024_note_documents.sql\")) in MIGRATIONS array in src/core/db.rs. Position AFTER the \"023\" entry.\n\n## Files\n- CREATE: migrations/024_note_documents.sql\n- MODIFY: src/core/db.rs (add (\"024\", include_str!(...)) to MIGRATIONS array, after line 75)\n\n## TDD Anchor\nRED: test_migration_024_allows_note_source_type — INSERT with source_type='note' should succeed in both documents and dirty_sources.\nGREEN: Implement the table rebuild migration.\nVERIFY: cargo test migration_024 -- --nocapture\nTests: test_migration_024_preserves_existing_data, test_migration_024_fts_triggers_intact, test_migration_024_row_counts_preserved, test_migration_024_integrity_checks_pass, test_migration_024_fts_rebuild_consistent, test_migration_024_note_delete_trigger_cleans_document, test_migration_024_note_system_flip_trigger_cleans_document, test_migration_024_system_note_delete_trigger_does_not_fire\n\n## Acceptance Criteria\n- [ ] INSERT source_type='note' succeeds in documents and dirty_sources\n- [ ] All existing data preserved through table rebuild (row counts match before/after)\n- [ ] FTS triggers fire correctly after rebuild (insert a doc, verify FTS entry exists)\n- [ ] documents_fts row count == documents row count after rebuild\n- [ ] PRAGMA foreign_key_check returns no violations\n- [ ] notes_ad_cleanup trigger fires on note deletion (deletes document + dirty_sources)\n- [ ] notes_au_system_cleanup trigger fires when is_system flips 0→1\n- [ ] System note deletion does NOT trigger notes_ad_cleanup (is_system = 1 guard)\n- [ ] All 9 tests pass\n\n## Edge Cases\n- Rowid preservation: INSERT INTO documents_new SELECT * preserves id column = rowid for FTS consistency\n- CRITICAL: Must save/restore junction table data (ON DELETE CASCADE on document_labels/document_paths would delete them when documents table is dropped)\n- The FTS rebuild at end is a safety net for any rowid drift\n- Empty database: migration is a no-op (all SELECTs return 0 rows, tables rebuilt with new CHECK)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T17:01:35.164340Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:24.078558Z","closed_at":"2026-02-12T18:13:24.078512Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"],"dependencies":[{"issue_id":"bd-1oi7","depends_on_id":"bd-18bf","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1oi7","depends_on_id":"bd-22ai","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-1oi7","depends_on_id":"bd-ef0u","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} @@ -153,7 +153,7 @@ {"id":"bd-2lg6","title":"Implement Clock trait (SystemClock + FakeClock)","description":"## Background\nAll relative-time rendering (e.g., \"3h ago\" labels) must use an injected Clock, not wall-clock time. This ensures deterministic snapshot tests and consistent timestamps within a single frame. FakeClock lets tests control time precisely.\n\n## Approach\nCreate crates/lore-tui/src/clock.rs with:\n- Clock trait: fn now(&self) -> chrono::DateTime\n- SystemClock: impl Clock using chrono::Utc::now()\n- FakeClock: wraps Arc>>, impl Clock returning the frozen value. Methods: new(fixed_time), advance(duration), set(time)\n- Both cloneable (SystemClock is Copy, FakeClock shares Arc)\n\n## Acceptance Criteria\n- [ ] Clock trait with now() method\n- [ ] SystemClock returns real wall-clock time\n- [ ] FakeClock returns frozen time, advance() moves it forward\n- [ ] FakeClock is Clone (shared Arc)\n- [ ] Tests pass: frozen clock returns same time on repeated calls\n- [ ] Tests pass: advance() moves time forward by exact duration\n\n## Files\n- CREATE: crates/lore-tui/src/clock.rs\n\n## TDD Anchor\nRED: Write test_fake_clock_frozen that creates FakeClock at a fixed time, calls now() twice, asserts both return the same value.\nGREEN: Implement FakeClock with Arc>.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml test_fake_clock\n\n## Edge Cases\n- FakeClock must be Send+Sync for use across Cmd::task threads\n- advance() must handle chrono overflow gracefully (use checked_add)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T16:54:11.756415Z","created_by":"tayloreernisse","updated_at":"2026-02-12T19:48:39.169147Z","closed_at":"2026-02-12T19:48:39.169096Z","close_reason":"Clock trait + SystemClock + FakeClock with 7 tests: frozen time, advance, set, clone-shares-state, Send+Sync, trait object. Clippy clean.","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-2lg6","depends_on_id":"bd-3ddw","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-2ms","title":"[CP1] Unit tests for transformers","description":"Comprehensive unit tests for issue and discussion transformers.\n\n## Issue Transformer Tests (tests/issue_transformer_tests.rs)\n\n- transforms_gitlab_issue_to_normalized_schema\n- extracts_labels_from_issue_payload\n- handles_missing_optional_fields_gracefully\n- converts_iso_timestamps_to_ms_epoch\n- sets_last_seen_at_to_current_time\n\n## Discussion Transformer Tests (tests/discussion_transformer_tests.rs)\n\n- transforms_discussion_payload_to_normalized_schema\n- extracts_notes_array_from_discussion\n- sets_individual_note_flag_correctly\n- flags_system_notes_with_is_system_true\n- preserves_note_order_via_position_field\n- computes_first_note_at_and_last_note_at_correctly\n- computes_resolvable_and_resolved_status\n\n## Test Setup\n- Load from test fixtures\n- Use serde_json for deserialization\n- Compare against expected NormalizedX structs\n\nFiles: tests/issue_transformer_tests.rs, tests/discussion_transformer_tests.rs\nDone when: All transformer unit tests pass","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:59:04.165187Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:02.015847Z","closed_at":"2026-01-25T17:02:02.015847Z","deleted_at":"2026-01-25T17:02:02.015841Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-2mz","title":"Epic: Gate A - Lexical MVP","description":"## Background\nGate A delivers the lexical search MVP — the foundation that works without sqlite-vec or Ollama. It introduces the document layer (documents, document_labels, document_paths), FTS5 indexing, search filters, and the search + stats + generate-docs CLI commands. Gate A is independently shippable — users get working search with FTS5 only.\n\n## Gate A Deliverables\n1. Document generation from issues/MRs/discussions with FTS5 indexing\n2. Lexical search + filters + snippets + lore stats\n\n## Bead Dependencies (execution order)\n1. **bd-3lc** — Rename GiError to LoreError (no deps, enables all subsequent work)\n2. **bd-hrs** — Migration 007 (blocked by bd-3lc)\n3. **bd-221** — Migration 008 FTS5 (blocked by bd-hrs)\n4. **bd-36p** — Document types + extractor module (blocked by bd-3lc)\n5. **bd-18t** — Truncation logic (blocked by bd-36p)\n6. **bd-247** — Issue extraction (blocked by bd-36p, bd-hrs)\n7. **bd-1yz** — MR extraction (blocked by bd-36p, bd-hrs)\n8. **bd-2fp** — Discussion extraction (blocked by bd-36p, bd-hrs, bd-18t)\n9. **bd-1u1** — Document regenerator (blocked by bd-36p, bd-38q, bd-hrs)\n10. **bd-1k1** — FTS5 search (blocked by bd-221)\n11. **bd-3q2** — Search filters (blocked by bd-36p)\n12. **bd-3lu** — Search CLI (blocked by bd-1k1, bd-3q2, bd-36p)\n13. **bd-3qs** — Generate-docs CLI (blocked by bd-1u1, bd-3lu)\n14. **bd-pr1** — Stats CLI (blocked by bd-hrs)\n15. **bd-2dk** — Project resolution (blocked by bd-3lc)\n\n## Acceptance Criteria\n- [ ] `lore search \"query\"` returns FTS5 results with snippets\n- [ ] `lore search --type issue --label bug \"query\"` filters correctly\n- [ ] `lore generate-docs` creates documents from all entities\n- [ ] `lore generate-docs --full` regenerates everything\n- [ ] `lore stats` shows document/FTS/queue counts\n- [ ] `lore stats --check` verifies FTS consistency\n- [ ] No sqlite-vec dependency in Gate A","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-30T15:25:09.721108Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:54:44.243610Z","closed_at":"2026-01-30T17:54:44.243562Z","close_reason":"All Gate A sub-beads complete. Lexical MVP delivered: document extraction (issue/MR/discussion), FTS5 indexing, search with filters/snippets/RRF, generate-docs CLI, stats CLI with integrity check/repair.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-2mz","depends_on_id":"bd-3lu","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-2mz","depends_on_id":"bd-3qs","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-2mz","depends_on_id":"bd-pr1","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} -{"id":"bd-2n4","title":"Implement trace query: file -> MR -> issue -> discussion chain","description":"## Background\n\nThe trace query builds a chain from file path -> MRs -> issues -> discussions, combining data from mr_file_changes (Gate 4), entity_references (Gate 2), and the existing discussions/notes tables. This is the backend for the trace CLI command.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 5.4 (Query Flow Tier 1).\n\n## Codebase Context\n\n- entity_references table (migration 011): source_entity_type, source_entity_id, target_entity_type, target_entity_id, reference_type, source_method\n- mr_file_changes table (migration 016, bd-1oo): merge_request_id, project_id, old_path, new_path, change_type\n- discussions table: issue_id, merge_request_id\n- notes table: discussion_id, author_username, body, created_at, is_system, position_new_path (for DiffNotes)\n- merge_requests table: iid, title, state, author_username, web_url, merged_at, updated_at\n- issues table: iid, title, state, web_url\n- resolve_rename_chain() from bd-1yx (src/core/file_history.rs) provides multi-path matching\n- reference_type values: 'closes', 'mentioned', 'related'\n\n## Approach\n\nCreate `src/core/trace.rs`:\n\n```rust\nuse rusqlite::Connection;\nuse crate::core::file_history::resolve_rename_chain;\nuse crate::core::error::Result;\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceChain {\n pub merge_request: TraceMr,\n pub issues: Vec,\n pub discussions: Vec,\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceMr {\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub author_username: String,\n pub web_url: Option,\n pub merged_at: Option,\n pub merge_commit_sha: Option,\n pub file_change_type: String,\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceIssue {\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub web_url: Option,\n pub reference_type: String, // \"closes\", \"mentioned\", \"related\"\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceDiscussion {\n pub author_username: String,\n pub body_snippet: String, // truncated to 500 chars\n pub created_at: i64,\n pub is_diff_note: bool, // true if position_new_path matched\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceResult {\n pub path: String,\n pub resolved_paths: Vec,\n pub chains: Vec,\n}\n\npub fn run_trace(\n conn: &Connection,\n project_id: i64,\n path: &str,\n follow_renames: bool,\n include_discussions: bool,\n limit: usize,\n) -> Result {\n // 1. Resolve rename chain (unless !follow_renames)\n let paths = if follow_renames {\n resolve_rename_chain(conn, project_id, path, 10)?\n } else {\n vec![path.to_string()]\n };\n\n // 2. Find MRs via mr_file_changes for all resolved paths\n // Dynamic IN-clause for path set\n // 3. For each MR, find linked issues via entity_references\n // 4. If include_discussions, fetch DiffNote discussions on traced file\n // 5. Order chains by COALESCE(merged_at, updated_at) DESC, apply limit\n}\n```\n\n### SQL for step 2 (find MRs):\n\nBuild dynamic IN-clause placeholders for the resolved path set:\n```sql\nSELECT DISTINCT mr.id, mr.iid, mr.title, mr.state, mr.author_username,\n mr.web_url, mr.merged_at, mr.updated_at, mr.merge_commit_sha,\n mfc.change_type\nFROM mr_file_changes mfc\nJOIN merge_requests mr ON mr.id = mfc.merge_request_id\nWHERE mfc.project_id = ?1\n AND (mfc.new_path IN (...placeholders...) OR mfc.old_path IN (...placeholders...))\nORDER BY COALESCE(mr.merged_at, mr.updated_at) DESC\nLIMIT ?N\n```\n\n### SQL for step 3 (linked issues):\n```sql\nSELECT i.iid, i.title, i.state, i.web_url, er.reference_type\nFROM entity_references er\nJOIN issues i ON i.id = er.target_entity_id\nWHERE er.source_entity_type = 'merge_request'\n AND er.source_entity_id = ?1\n AND er.target_entity_type = 'issue'\n```\n\n### SQL for step 4 (DiffNote discussions):\n```sql\nSELECT n.author_username, n.body, n.created_at, n.position_new_path\nFROM notes n\nJOIN discussions d ON d.id = n.discussion_id\nWHERE d.merge_request_id = ?1\n AND n.position_new_path IN (...placeholders...)\n AND n.is_system = 0\nORDER BY n.created_at ASC\n```\n\nRegister in `src/core/mod.rs`: `pub mod trace;`\n\n## Acceptance Criteria\n\n- [ ] run_trace() returns chains ordered by COALESCE(merged_at, updated_at) DESC\n- [ ] Rename-aware: uses all paths from resolve_rename_chain\n- [ ] Issues linked via entity_references (closes, mentioned, related)\n- [ ] DiffNote discussions correctly filtered to traced file paths via position_new_path\n- [ ] Discussion body_snippet truncated to 500 chars\n- [ ] Empty result (file not in any MR) returns TraceResult with empty chains\n- [ ] Limit applies to number of chains (MRs), not total discussions\n- [ ] Module registered in src/core/mod.rs as `pub mod trace;`\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/trace.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod trace;`)\n\n## TDD Loop\n\nRED:\n- `test_trace_empty_file` — unknown file returns empty chains\n- `test_trace_finds_mr` — file in mr_file_changes returns chain with correct MR\n- `test_trace_follows_renames` — renamed file finds historical MRs\n- `test_trace_links_issues` — MR with entity_references shows linked issues\n- `test_trace_limits_chains` — limit=1 returns at most 1 chain\n- `test_trace_no_follow_renames` — follow_renames=false only matches literal path\n\nTests need in-memory DB with migrations applied through 016 + test fixtures for mr_file_changes, entity_references, discussions, notes.\n\nGREEN: Implement SQL queries and chain assembly.\n\nVERIFY: `cargo test --lib -- trace`\n\n## Edge Cases\n\n- MR with no linked issues: chain has empty issues vec\n- Same issue linked from multiple MRs: appears in each chain independently\n- DiffNote on old_path (before rename): captured via resolved path set\n- include_discussions=false: skip DiffNote query for performance\n- Null merged_at: falls back to updated_at for ordering\n- Dynamic IN-clause: use rusqlite::params_from_iter for parameterized queries\n","status":"in_progress","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:32.738743Z","created_by":"tayloreernisse","updated_at":"2026-02-17T19:08:40.226759Z","compaction_level":0,"original_size":0,"labels":["gate-5","phase-b","query"],"dependencies":[{"issue_id":"bd-2n4","depends_on_id":"bd-1ht","type":"parent-child","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-2n4","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-2n4","depends_on_id":"bd-z94","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} +{"id":"bd-2n4","title":"Implement trace query: file -> MR -> issue -> discussion chain","description":"## Background\n\nThe trace query builds a chain from file path -> MRs -> issues -> discussions, combining data from mr_file_changes (Gate 4), entity_references (Gate 2), and the existing discussions/notes tables. This is the backend for the trace CLI command.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 5.4 (Query Flow Tier 1).\n\n## Codebase Context\n\n- entity_references table (migration 011): source_entity_type, source_entity_id, target_entity_type, target_entity_id, reference_type, source_method\n- mr_file_changes table (migration 016, bd-1oo): merge_request_id, project_id, old_path, new_path, change_type\n- discussions table: issue_id, merge_request_id\n- notes table: discussion_id, author_username, body, created_at, is_system, position_new_path (for DiffNotes)\n- merge_requests table: iid, title, state, author_username, web_url, merged_at, updated_at\n- issues table: iid, title, state, web_url\n- resolve_rename_chain() from bd-1yx (src/core/file_history.rs) provides multi-path matching\n- reference_type values: 'closes', 'mentioned', 'related'\n\n## Approach\n\nCreate `src/core/trace.rs`:\n\n```rust\nuse rusqlite::Connection;\nuse crate::core::file_history::resolve_rename_chain;\nuse crate::core::error::Result;\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceChain {\n pub merge_request: TraceMr,\n pub issues: Vec,\n pub discussions: Vec,\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceMr {\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub author_username: String,\n pub web_url: Option,\n pub merged_at: Option,\n pub merge_commit_sha: Option,\n pub file_change_type: String,\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceIssue {\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub web_url: Option,\n pub reference_type: String, // \"closes\", \"mentioned\", \"related\"\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceDiscussion {\n pub author_username: String,\n pub body_snippet: String, // truncated to 500 chars\n pub created_at: i64,\n pub is_diff_note: bool, // true if position_new_path matched\n}\n\n#[derive(Debug, Clone, Serialize)]\npub struct TraceResult {\n pub path: String,\n pub resolved_paths: Vec,\n pub chains: Vec,\n}\n\npub fn run_trace(\n conn: &Connection,\n project_id: i64,\n path: &str,\n follow_renames: bool,\n include_discussions: bool,\n limit: usize,\n) -> Result {\n // 1. Resolve rename chain (unless !follow_renames)\n let paths = if follow_renames {\n resolve_rename_chain(conn, project_id, path, 10)?\n } else {\n vec![path.to_string()]\n };\n\n // 2. Find MRs via mr_file_changes for all resolved paths\n // Dynamic IN-clause for path set\n // 3. For each MR, find linked issues via entity_references\n // 4. If include_discussions, fetch DiffNote discussions on traced file\n // 5. Order chains by COALESCE(merged_at, updated_at) DESC, apply limit\n}\n```\n\n### SQL for step 2 (find MRs):\n\nBuild dynamic IN-clause placeholders for the resolved path set:\n```sql\nSELECT DISTINCT mr.id, mr.iid, mr.title, mr.state, mr.author_username,\n mr.web_url, mr.merged_at, mr.updated_at, mr.merge_commit_sha,\n mfc.change_type\nFROM mr_file_changes mfc\nJOIN merge_requests mr ON mr.id = mfc.merge_request_id\nWHERE mfc.project_id = ?1\n AND (mfc.new_path IN (...placeholders...) OR mfc.old_path IN (...placeholders...))\nORDER BY COALESCE(mr.merged_at, mr.updated_at) DESC\nLIMIT ?N\n```\n\n### SQL for step 3 (linked issues):\n```sql\nSELECT i.iid, i.title, i.state, i.web_url, er.reference_type\nFROM entity_references er\nJOIN issues i ON i.id = er.target_entity_id\nWHERE er.source_entity_type = 'merge_request'\n AND er.source_entity_id = ?1\n AND er.target_entity_type = 'issue'\n```\n\n### SQL for step 4 (DiffNote discussions):\n```sql\nSELECT n.author_username, n.body, n.created_at, n.position_new_path\nFROM notes n\nJOIN discussions d ON d.id = n.discussion_id\nWHERE d.merge_request_id = ?1\n AND n.position_new_path IN (...placeholders...)\n AND n.is_system = 0\nORDER BY n.created_at ASC\n```\n\nRegister in `src/core/mod.rs`: `pub mod trace;`\n\n## Acceptance Criteria\n\n- [ ] run_trace() returns chains ordered by COALESCE(merged_at, updated_at) DESC\n- [ ] Rename-aware: uses all paths from resolve_rename_chain\n- [ ] Issues linked via entity_references (closes, mentioned, related)\n- [ ] DiffNote discussions correctly filtered to traced file paths via position_new_path\n- [ ] Discussion body_snippet truncated to 500 chars\n- [ ] Empty result (file not in any MR) returns TraceResult with empty chains\n- [ ] Limit applies to number of chains (MRs), not total discussions\n- [ ] Module registered in src/core/mod.rs as `pub mod trace;`\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/trace.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod trace;`)\n\n## TDD Loop\n\nRED:\n- `test_trace_empty_file` — unknown file returns empty chains\n- `test_trace_finds_mr` — file in mr_file_changes returns chain with correct MR\n- `test_trace_follows_renames` — renamed file finds historical MRs\n- `test_trace_links_issues` — MR with entity_references shows linked issues\n- `test_trace_limits_chains` — limit=1 returns at most 1 chain\n- `test_trace_no_follow_renames` — follow_renames=false only matches literal path\n\nTests need in-memory DB with migrations applied through 016 + test fixtures for mr_file_changes, entity_references, discussions, notes.\n\nGREEN: Implement SQL queries and chain assembly.\n\nVERIFY: `cargo test --lib -- trace`\n\n## Edge Cases\n\n- MR with no linked issues: chain has empty issues vec\n- Same issue linked from multiple MRs: appears in each chain independently\n- DiffNote on old_path (before rename): captured via resolved path set\n- include_discussions=false: skip DiffNote query for performance\n- Null merged_at: falls back to updated_at for ordering\n- Dynamic IN-clause: use rusqlite::params_from_iter for parameterized queries\n","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:32.738743Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:44:31.977494Z","closed_at":"2026-02-19T13:44:31.977416Z","close_reason":"Implementation complete: run_trace() with full test suite (12 tests pass)","compaction_level":0,"original_size":0,"labels":["gate-5","phase-b","query"],"dependencies":[{"issue_id":"bd-2n4","depends_on_id":"bd-1ht","type":"parent-child","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-2n4","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-2n4","depends_on_id":"bd-z94","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} {"id":"bd-2nb","title":"[CP1] Issue ingestion module","description":"Fetch and store issues with cursor-based incremental sync.\n\nImplement ingestIssues(options) → { fetched, upserted, labelsCreated }\n\nLogic:\n1. Get current cursor from sync_cursors\n2. Paginate through issues updated after cursor\n3. Apply local filtering for tuple cursor semantics\n4. For each issue:\n - Store raw payload (compressed)\n - Upsert issue record\n - Extract and upsert labels\n - Link issue to labels via junction\n5. Update cursor after each page commit\n\nFiles: src/ingestion/issues.ts\nTests: tests/integration/issue-ingestion.test.ts\nDone when: Issues, labels, issue_labels populated correctly with resumable cursor","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:19:50.701180Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.154318Z","closed_at":"2026-01-25T15:21:35.154318Z","deleted_at":"2026-01-25T15:21:35.154316Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-2nfs","title":"Implement snapshot test infrastructure + terminal compat matrix","description":"## Background\nSnapshot tests ensure deterministic rendering using FakeClock and ftui's test backend. They capture rendered TUI output as styled text and compare against golden files, catching visual regressions without a real terminal. The terminal compatibility matrix is a separate documentation artifact, not an automated test.\n\n## Approach\n\n### Snapshot Infrastructure\n\n**Test Backend**: Use `ftui_harness::TestBackend` (or equivalent from ftui-harness crate) which captures rendered output as a Buffer without needing a real terminal. If ftui-harness is not available, create a minimal TestBackend that implements ftui's backend trait and stores cells in a `Vec>`.\n\n**Deterministic Rendering**:\n- Inject FakeClock (from bd-2lg6) to freeze all relative time computations (\"2 hours ago\" always renders the same)\n- Fix terminal size to 120x40 for all snapshot tests\n- Use synthetic DB fixture with known data (same fixture pattern as parity tests)\n\n**Snapshot Capture Flow**:\n```rust\nfn capture_snapshot(app: &LoreApp, size: (u16, u16)) -> String {\n let backend = TestBackend::new(size.0, size.1);\n // Render app.view() to backend\n // Convert buffer cells to plain text with ANSI annotations\n // Return as String\n}\n```\n\n**Golden File Management**:\n- Golden files stored in `crates/lore-tui/tests/snapshots/` as `.snap` files\n- Naming: `{test_name}.snap` (e.g., `dashboard_default.snap`)\n- Update mode: set env var `UPDATE_SNAPSHOTS=1` to overwrite golden files instead of comparing\n- Use `insta` crate (or manual file comparison) for snapshot assertion\n\n**Fixture Data** (synthetic, deterministic):\n- 50 issues (mix of opened/closed/locked states, various labels)\n- 25 MRs (mix of opened/merged/closed/draft)\n- 100 discussions with notes\n- Known timestamps relative to FakeClock's frozen time\n\n### Snapshot Tests\n\nEach test:\n1. Creates in-memory DB with fixture data\n2. Creates LoreApp with FakeClock frozen at 2026-01-15T12:00:00Z\n3. Sets initial screen state\n4. Renders via TestBackend at 120x40\n5. Compares output against golden file\n\nTests to implement:\n- `test_dashboard_snapshot`: Dashboard screen with fixture counts and recent activity\n- `test_issue_list_snapshot`: Issue list with default sort, showing state badges and relative times\n- `test_issue_detail_snapshot`: Single issue detail with description and discussion thread\n- `test_mr_list_snapshot`: MR list showing draft indicators and review status\n- `test_search_results_snapshot`: Search results with highlighted matches\n- `test_empty_state_snapshot`: Dashboard with empty DB (zero issues/MRs)\n\n### Terminal Compatibility Matrix (Documentation)\n\nThis is a manual verification checklist, NOT an automated test. Document results in `crates/lore-tui/TERMINAL_COMPAT.md`:\n\n| Feature | iTerm2 | tmux | Alacritty | kitty |\n|---------|--------|------|-----------|-------|\n| True color (RGB) | | | | |\n| Unicode width (CJK) | | | | |\n| Box-drawing chars | | | | |\n| Bold/italic/underline | | | | |\n| Mouse events | | | | |\n| Resize handling | | | | |\n| Alt screen | | | | |\n\nFill in during manual QA, not during automated test implementation.\n\n## Acceptance Criteria\n- [ ] At least 6 snapshot tests pass with golden files committed to repo\n- [ ] All snapshots use FakeClock frozen at 2026-01-15T12:00:00Z\n- [ ] All snapshots render at fixed 120x40 terminal size\n- [ ] Dashboard snapshot matches golden file (deterministic)\n- [ ] Issue list snapshot matches golden file (deterministic)\n- [ ] Empty state snapshot matches golden file\n- [ ] UPDATE_SNAPSHOTS=1 env var overwrites golden files for updates\n- [ ] Golden files are plain text (diffable in version control)\n- [ ] TERMINAL_COMPAT.md template created (to be filled during manual QA)\n\n## Files\n- CREATE: crates/lore-tui/tests/snapshot_tests.rs\n- CREATE: crates/lore-tui/tests/snapshots/ (directory for golden files)\n- CREATE: crates/lore-tui/tests/snapshots/dashboard_default.snap\n- CREATE: crates/lore-tui/tests/snapshots/issue_list_default.snap\n- CREATE: crates/lore-tui/tests/snapshots/issue_detail.snap\n- CREATE: crates/lore-tui/tests/snapshots/mr_list_default.snap\n- CREATE: crates/lore-tui/tests/snapshots/search_results.snap\n- CREATE: crates/lore-tui/tests/snapshots/empty_state.snap\n- CREATE: crates/lore-tui/TERMINAL_COMPAT.md (template)\n\n## TDD Anchor\nRED: Write `test_dashboard_snapshot` that creates LoreApp with FakeClock and fixture DB, renders Dashboard at 120x40, asserts output matches `snapshots/dashboard_default.snap`. Fails because golden file does not exist yet.\nGREEN: Render the Dashboard, run with UPDATE_SNAPSHOTS=1 to generate golden file, then run normally to verify match.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml snapshot\n\n## Edge Cases\n- Golden file encoding: always UTF-8, normalize line endings to LF\n- FakeClock must be injected into all components that compute relative time (e.g., \"2 hours ago\")\n- Snapshot diffs on CI: print a clear diff showing expected vs actual when mismatch occurs\n- Fixture data must NOT include non-deterministic values (random IDs, current timestamps)\n- If ftui-harness API changes, TestBackend shim may need updating\n\n## Dependency Context\n- Uses FakeClock from bd-2lg6 (Implement Clock trait)\n- Uses all screen views from Phase 2 (Dashboard, Issue List, MR List, Detail views)\n- Uses TestBackend from ftui-harness crate (or custom implementation)\n- Depends on bd-3h00 (session persistence) per phase ordering — screens must be complete before snapshotting\n- Downstream: bd-nu0d (fuzz tests) and bd-3fjk (race tests) depend on this infrastructure","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T17:03:54.220114Z","created_by":"tayloreernisse","updated_at":"2026-02-19T05:37:55.058498Z","closed_at":"2026-02-19T05:37:55.058110Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-2nfs","depends_on_id":"bd-1b6k","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-2nfs","depends_on_id":"bd-3h00","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-2ni","title":"OBSERV Epic: Phase 2 - Spans + Correlation IDs","description":"Add tracing spans to all sync stages and generate UUID-based run_id for correlation. Every log line within a sync run includes run_id in JSON span context. Nested spans produce correct parent-child chains.\n\nDepends on: Phase 1 (subscriber must support span recording)\nUnblocks: Phase 3 (metrics), Phase 5 (rate limit logging)\n\nFiles: src/cli/commands/sync.rs, src/cli/commands/ingest.rs, src/ingestion/orchestrator.rs, src/documents/regenerator.rs, src/embedding/pipeline.rs, src/main.rs\n\nAcceptance criteria (PRD Section 6.2):\n- Every log line includes run_id in JSON span context\n- Nested spans produce chain: fetch_pages includes parent ingest_issues span\n- run_id is 8-char hex (truncated UUIDv4)\n- Spans visible in -vv stderr output","status":"closed","priority":1,"issue_type":"epic","created_at":"2026-02-04T15:53:08.935218Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:19:38.721297Z","closed_at":"2026-02-04T17:19:38.721241Z","close_reason":"Phase 2 complete: run_id correlation IDs generated at sync/ingest entry, root spans with .instrument() for async, #[instrument] on 5 key pipeline functions","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-2ni","depends_on_id":"bd-2nx","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} @@ -206,7 +206,7 @@ {"id":"bd-3a4k","title":"CLI: list issues status column, filter, and robot fields","description":"## Background\nList issues needs a Status column in the table, status fields in robot JSON, and a --status filter for querying by work item status name. The filter supports multiple values (OR semantics) and case-insensitive matching.\n\n## Approach\nExtend list.rs row types, SQL, table rendering. Add --status Vec to clap args. Build dynamic WHERE clause with COLLATE NOCASE. Wire into both ListFilters constructions in main.rs. Register in autocorrect.\n\n## Files\n- src/cli/commands/list.rs (row types, SQL, table, filter, color helper)\n- src/cli/mod.rs (--status flag on IssuesArgs)\n- src/main.rs (wire statuses into both ListFilters)\n- src/cli/autocorrect.rs (add --status to COMMAND_FLAGS)\n\n## Implementation\n\nIssueListRow + IssueListRowJson: add 5 status fields (all Option)\nFrom<&IssueListRow> for IssueListRowJson: clone all 5 fields\n\nquery_issues SELECT: add i.status_name, i.status_category, i.status_color, i.status_icon_name, i.status_synced_at after existing columns\n Existing SELECT has 12 columns (indices 0-11). New columns: indices 12-16.\n Row mapping: status_name: row.get(12)?, ..., status_synced_at: row.get(16)?\n\nListFilters: add pub statuses: &'a [String]\n\nWHERE clause builder (after has_due_date block):\n if statuses.len() == 1: \"i.status_name = ? COLLATE NOCASE\" + push param\n if statuses.len() > 1: \"i.status_name IN (?, ?, ...) COLLATE NOCASE\" + push all params\n\nTable: add \"Status\" column header (bold) between State and Assignee\n Row: match &issue.status_name -> Some: colored_cell_hex(status, color), None: Cell::new(\"\")\n\nNew helper:\n fn colored_cell_hex(content, hex: Option<&str>) -> Cell\n If no hex or colors disabled: Cell::new(content)\n Parse 6-char hex, use Cell::new(content).fg(Color::Rgb { r, g, b })\n\nIn src/cli/mod.rs IssuesArgs:\n #[arg(long, help_heading = \"Filters\")]\n pub status: Vec,\n\nIn src/main.rs handle_issues (~line 695):\n ListFilters { ..., statuses: &args.status }\nIn legacy List handler (~line 2421):\n ListFilters { ..., statuses: &[] }\n\nIn src/cli/autocorrect.rs COMMAND_FLAGS \"issues\" entry:\n Add \"--status\" between existing flags\n\n## Acceptance Criteria\n- [ ] Status column appears in table between State and Assignee\n- [ ] NULL status -> empty cell\n- [ ] Status colored by hex in human mode\n- [ ] --status \"In progress\" filters correctly\n- [ ] --status \"in progress\" matches \"In progress\" (COLLATE NOCASE)\n- [ ] --status \"To do\" --status \"In progress\" -> OR semantics (both returned)\n- [ ] Robot: status_name, status_category in each issue JSON\n- [ ] --fields supports status_name, status_category, status_color, status_icon_name, status_synced_at\n- [ ] --fields minimal does NOT include status fields\n- [ ] Autocorrect registry test passes (--status registered)\n- [ ] cargo check --all-targets passes\n\n## TDD Loop\nRED: test_list_filter_by_status, test_list_filter_by_status_case_insensitive, test_list_filter_by_multiple_statuses\nGREEN: Implement all changes across 4 files\nVERIFY: cargo test list_filter && cargo test registry_covers\n\n## Edge Cases\n- COLLATE NOCASE is ASCII-only but sufficient (all system statuses are ASCII)\n- Single-value uses = for simplicity; multi-value uses IN with dynamic placeholders\n- --status combined with other filters (--state, --label) -> AND logic\n- autocorrect registry_covers_command_flags test will FAIL if --status not registered\n- Legacy List command path also constructs ListFilters — needs statuses: &[]\n- Column index offset: new columns start at 12 (0-indexed)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-11T06:42:26.438Z","created_by":"tayloreernisse","updated_at":"2026-02-11T07:21:33.421297Z","closed_at":"2026-02-11T07:21:33.421247Z","close_reason":"Implemented by agent swarm — all quality gates pass (595 tests, 0 failures)","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3a4k","depends_on_id":"bd-2y79","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-3a4k","depends_on_id":"bd-3dum","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-3ae","title":"Epic: CP2 Gate A - MRs Only","description":"## Background\nGate A validates core MR ingestion works before adding complexity. Proves the cursor-based sync, pagination, and basic CLI work. This is the foundation - if Gate A fails, nothing else matters.\n\n## Acceptance Criteria (Pass/Fail)\n- [ ] `gi ingest --type=merge_requests` completes without error\n- [ ] `SELECT COUNT(*) FROM merge_requests` > 0\n- [ ] `gi list mrs --limit=5` shows 5 MRs with iid, title, state, author\n- [ ] `gi count mrs` shows total count matching DB query\n- [ ] MR with `state=locked` can be stored (if exists in test data)\n- [ ] Draft MR shows `draft=1` in DB and `[DRAFT]` in list output\n- [ ] `work_in_progress=true` MR shows `draft=1` (fallback works)\n- [ ] `head_sha` populated for MRs with commits\n- [ ] `references_short` and `references_full` populated\n- [ ] Re-run ingest shows \"0 new MRs\" or minimal refetch (cursor working)\n- [ ] Cursor saved at page boundary, not item boundary\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate A: MRs Only ===\"\n\n# 1. Clear any existing MR data for clean test\necho \"Step 1: Reset MR cursor for clean test...\"\nsqlite3 \"$DB_PATH\" \"DELETE FROM sync_cursors WHERE resource_type = 'merge_requests';\"\n\n# 2. Run MR ingestion\necho \"Step 2: Ingest MRs...\"\ngi ingest --type=merge_requests\n\n# 3. Verify MRs exist\necho \"Step 3: Verify MR count...\"\nMR_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests;\")\necho \" MR count: $MR_COUNT\"\n[ \"$MR_COUNT\" -gt 0 ] || { echo \"FAIL: No MRs ingested\"; exit 1; }\n\n# 4. Verify list command\necho \"Step 4: Test list command...\"\ngi list mrs --limit=5\n\n# 5. Verify count command\necho \"Step 5: Test count command...\"\ngi count mrs\n\n# 6. Verify draft handling\necho \"Step 6: Check draft MRs...\"\nDRAFT_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE draft = 1;\")\necho \" Draft MR count: $DRAFT_COUNT\"\n\n# 7. Verify head_sha population\necho \"Step 7: Check head_sha...\"\nSHA_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE head_sha IS NOT NULL;\")\necho \" MRs with head_sha: $SHA_COUNT\"\n\n# 8. Verify references\necho \"Step 8: Check references...\"\nREF_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE references_short IS NOT NULL;\")\necho \" MRs with references: $REF_COUNT\"\n\n# 9. Verify cursor saved\necho \"Step 9: Check cursor...\"\nCURSOR=$(sqlite3 \"$DB_PATH\" \"SELECT updated_at, gitlab_id FROM sync_cursors WHERE resource_type = 'merge_requests';\")\necho \" Cursor: $CURSOR\"\n[ -n \"$CURSOR\" ] || { echo \"FAIL: Cursor not saved\"; exit 1; }\n\n# 10. Re-run and verify minimal refetch\necho \"Step 10: Re-run ingest (should be minimal)...\"\ngi ingest --type=merge_requests\n# Output should show minimal or zero new MRs\n\necho \"\"\necho \"=== Gate A: PASSED ===\"\n```\n\n## Test Commands (Quick Verification)\n```bash\n# Run these in order:\ngi ingest --type=merge_requests\ngi list mrs --limit=10\ngi count mrs\n\n# Verify in DB:\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT \n COUNT(*) as total,\n SUM(CASE WHEN draft = 1 THEN 1 ELSE 0 END) as drafts,\n SUM(CASE WHEN head_sha IS NOT NULL THEN 1 ELSE 0 END) as with_sha,\n SUM(CASE WHEN references_short IS NOT NULL THEN 1 ELSE 0 END) as with_refs\n FROM merge_requests;\n\"\n\n# Re-run (should be no-op):\ngi ingest --type=merge_requests\n```\n\n## Dependencies\nThis gate requires these beads to be complete:\n- bd-3ir (Database migration)\n- bd-5ta (GitLab MR types)\n- bd-34o (MR transformer)\n- bd-iba (GitLab client pagination)\n- bd-ser (MR ingestion module)\n\n## Edge Cases\n- `locked` state is transitional (merge in progress); may not exist in test data\n- Some older GitLab instances may not return `head_sha` for all MRs\n- `work_in_progress` is deprecated but should still work as fallback\n- Very large projects (10k+ MRs) may take significant time on first sync","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:00.966522Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.057298Z","closed_at":"2026-01-27T00:48:21.057225Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3ae","depends_on_id":"bd-iba","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-3ae","depends_on_id":"bd-ser","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-3as","title":"Implement timeline event collection and chronological interleaving","description":"## Background\n\nThe event collection phase is steps 4-5 of the timeline pipeline (spec Section 3.2). It takes seed + expanded entity sets and collects all their events from resource event tables, then interleaves chronologically.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 steps 4-5, Section 3.3 (Event Model).\n\n## Codebase Context\n\n- resource_state_events: columns include state, actor_username (not actor_gitlab_id for display), created_at, issue_id, merge_request_id, source_merge_request_iid, source_commit\n- resource_label_events: columns include action ('add'|'remove'), label_name (NULLABLE since migration 012), actor_username, created_at\n- resource_milestone_events: columns include action ('add'|'remove'), milestone_title (NULLABLE since migration 012), actor_username, created_at\n- issues table: created_at, author_username, title, web_url\n- merge_requests table: created_at, author_username, title, web_url, merged_at, updated_at\n- All timestamps are ms epoch UTC (stored as INTEGER)\n\n## Approach\n\nCreate `src/core/timeline_collect.rs`:\n\n```rust\nuse rusqlite::Connection;\nuse crate::core::timeline::{TimelineEvent, TimelineEventType, EntityRef, ExpandedEntityRef};\n\npub fn collect_events(\n conn: &Connection,\n seed_entities: &[EntityRef],\n expanded_entities: &[ExpandedEntityRef],\n evidence_notes: &[TimelineEvent], // from seed phase\n since_ms: Option, // --since filter\n limit: usize, // -n flag (default 100)\n) -> Result> { ... }\n```\n\n### Event Collection Per Entity\n\nFor each entity (seed + expanded), collect:\n\n1. **Creation event** (`Created`):\n ```sql\n -- Issues:\n SELECT created_at, author_username, title, web_url FROM issues WHERE id = ?1\n -- MRs:\n SELECT created_at, author_username, title, web_url FROM merge_requests WHERE id = ?1\n ```\n\n2. **State changes** (`StateChanged { state }`):\n ```sql\n SELECT state, actor_username, created_at FROM resource_state_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2) -- since filter\n ORDER BY created_at ASC\n ```\n NOTE: For MRs, a state='merged' event also produces a separate Merged variant.\n\n3. **Label changes** (`LabelAdded`/`LabelRemoved`):\n ```sql\n SELECT action, label_name, actor_username, created_at FROM resource_label_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2)\n ORDER BY created_at ASC\n ```\n Handle NULL label_name (deleted label): use \"[deleted label]\" as fallback.\n\n4. **Milestone changes** (`MilestoneSet`/`MilestoneRemoved`):\n ```sql\n SELECT action, milestone_title, actor_username, created_at FROM resource_milestone_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2)\n ORDER BY created_at ASC\n ```\n Handle NULL milestone_title: use \"[deleted milestone]\" as fallback.\n\n5. **Merge event** (Merged, MR only):\n Derive from merge_requests.merged_at (preferred) OR resource_state_events WHERE state='merged'. Skip StateChanged when state='merged' — emit only the Merged variant.\n\n### Chronological Interleave\n\n```rust\nevents.sort(); // Uses Ord impl from bd-20e\nif let Some(since) = since_ms {\n events.retain(|e| e.timestamp >= since);\n}\nevents.truncate(limit);\n```\n\nRegister in `src/core/mod.rs`: `pub mod timeline_collect;`\n\n## Acceptance Criteria\n\n- [ ] Collects Created, StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged, NoteEvidence events\n- [ ] Merged events deduplicated from StateChanged{merged} — emit only Merged variant\n- [ ] NULL label_name/milestone_title handled with fallback text\n- [ ] --since filter applied to all event types\n- [ ] Events sorted chronologically with stable tiebreak\n- [ ] Limit applied AFTER sorting\n- [ ] Evidence notes from seed phase included\n- [ ] is_seed correctly set based on entity source\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_collect.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_collect;`)\n\n## TDD Loop\n\nRED:\n- `test_collect_creation_event` - entity produces Created event\n- `test_collect_state_events` - state changes produce StateChanged events\n- `test_collect_merged_dedup` - state='merged' produces Merged not StateChanged\n- `test_collect_null_label_fallback` - NULL label_name uses fallback text\n- `test_collect_since_filter` - old events excluded\n- `test_collect_chronological_sort` - mixed entity events interleave correctly\n- `test_collect_respects_limit`\n\nTests need in-memory DB with migrations 001-014 applied.\n\nGREEN: Implement SQL queries and event assembly.\n\nVERIFY: `cargo test --lib -- timeline_collect`\n\n## Edge Cases\n\n- MR with merged_at=NULL and no state='merged' event: no Merged event emitted\n- Entity with 0 events in resource tables: only Created event returned\n- NULL actor_username: actor field is None\n- Timestamps at exact --since boundary: use >= (inclusive)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.703942Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:53:01.160429Z","closed_at":"2026-02-05T21:53:01.160380Z","close_reason":"Completed: Created src/core/timeline_collect.rs with event collection for Created, StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged, NoteEvidence. Merged dedup (state=merged skipped in favor of Merged variant). NULL label/milestone fallbacks. Since filter, chronological sort, limit. 10 tests pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-3as","depends_on_id":"bd-1ep","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-3as","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-3as","depends_on_id":"bd-ypa","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} -{"id":"bd-3bec","title":"Wire surgical dispatch in run_sync and update robot-docs","description":"## Background\n\nThe existing `run_sync` function (lines 63-360 of `src/cli/commands/sync.rs`) handles the normal full-sync pipeline. Once `run_sync_surgical` (bd-1i4i) is implemented, this bead wires the dispatch: when `SyncOptions` contains issue or MR IIDs, route to the surgical path instead of the normal path. This also requires updating `handle_sync_cmd` (line 2120 of `src/main.rs`) to pass through the new CLI fields (bd-1lja), and updating the robot-docs schema to document the new surgical response fields.\n\n## Approach\n\nThree changes:\n\n**1. Dispatch in `run_sync` (src/cli/commands/sync.rs)**\n\nAdd an early check at the top of `run_sync` (after line 68):\n\n```rust\npub async fn run_sync(\n config: &Config,\n options: SyncOptions,\n run_id: Option<&str>,\n signal: &ShutdownSignal,\n) -> Result {\n // Surgical dispatch: if any IIDs specified, route to surgical pipeline\n if !options.issues.is_empty() || !options.merge_requests.is_empty() {\n return run_sync_surgical(config, options, run_id, signal).await;\n }\n\n // ... existing normal sync pipeline unchanged ...\n}\n```\n\n**2. Update `handle_sync_cmd` (src/main.rs line 2120)**\n\nPass new fields from `SyncArgs` into `SyncOptions`:\n\n```rust\nlet options = SyncOptions {\n full: args.full && !args.no_full,\n force: args.force && !args.no_force,\n no_embed: args.no_embed,\n no_docs: args.no_docs,\n no_events: args.no_events,\n robot_mode,\n dry_run,\n // New surgical fields (from bd-1lja)\n issues: args.issue.clone(),\n merge_requests: args.mr.clone(),\n project: args.project.clone(),\n preflight_only: args.preflight_only,\n};\n```\n\nAlso: when surgical mode is detected (issues/MRs non-empty), skip the normal SyncRunRecorder setup in `handle_sync_cmd` since `run_sync_surgical` manages its own recorder.\n\n**3. Update robot-docs (src/main.rs handle_robot_docs)**\n\nAdd documentation for the surgical sync response format. The robot-docs output should include:\n- New CLI flags: `--issue`, `--mr`, `-p`/`--project`, `--preflight-only`\n- Surgical response fields: `surgical_mode`, `surgical_iids`, `entity_results`, `preflight_only`\n- `EntitySyncResult` schema: `entity_type`, `iid`, `outcome`, `error`, `toctou_reason`\n- Exit codes for surgical-specific errors\n\n## Acceptance Criteria\n\n1. `lore sync --issue 7 -p group/project` dispatches to `run_sync_surgical`, not normal sync\n2. `lore sync` (no IIDs) follows the existing normal pipeline unchanged\n3. `handle_sync_cmd` passes `issues`, `merge_requests`, `project`, `preflight_only` from args to options\n4. `lore robot-docs` output includes surgical sync documentation\n5. All existing sync tests pass without modification\n6. Robot mode JSON output for surgical sync matches documented schema\n\n## Files\n\n- `src/cli/commands/sync.rs` — add dispatch check at top of `run_sync`, add `use super::sync_surgical::run_sync_surgical`\n- `src/main.rs` — update `handle_sync_cmd` to pass new fields, update robot-docs text\n- `src/cli/commands/mod.rs` — ensure `sync_surgical` module is public (may already be done by bd-1i4i)\n\n## TDD Anchor\n\nTests in `src/cli/commands/sync.rs` or a companion test file:\n\n```rust\n#[cfg(test)]\nmod dispatch_tests {\n use super::*;\n\n #[test]\n fn sync_options_with_issues_is_surgical() {\n let options = SyncOptions {\n issues: vec![7],\n ..SyncOptions::default()\n };\n assert!(!options.issues.is_empty() || !options.merge_requests.is_empty());\n }\n\n #[test]\n fn sync_options_without_iids_is_normal() {\n let options = SyncOptions::default();\n assert!(options.issues.is_empty() && options.merge_requests.is_empty());\n }\n\n #[test]\n fn sync_options_with_mrs_is_surgical() {\n let options = SyncOptions {\n merge_requests: vec![10, 20],\n ..SyncOptions::default()\n };\n assert!(!options.issues.is_empty() || !options.merge_requests.is_empty());\n }\n\n #[tokio::test]\n async fn dispatch_routes_to_surgical_when_issues_present() {\n // Integration-level test: verify run_sync with IIDs calls surgical path.\n // This test uses wiremock to mock the surgical path's GitLab calls.\n // The key assertion: when options.issues is non-empty, the function\n // does NOT attempt the normal ingest flow (no project cursor queries).\n let server = wiremock::MockServer::start().await;\n wiremock::Mock::given(wiremock::matchers::method(\"GET\"))\n .and(wiremock::matchers::path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(wiremock::ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let mut config = Config::default();\n config.gitlab.url = server.uri();\n config.gitlab.token = \"test-token\".to_string();\n let options = SyncOptions {\n issues: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync(&config, options, Some(\"dispatch-test\"), &signal).await;\n\n // Should succeed via surgical path (or at least not panic from normal path)\n assert!(result.is_ok());\n let r = result.unwrap();\n assert_eq!(r.surgical_mode, Some(true));\n }\n\n #[test]\n fn robot_docs_includes_surgical_sync() {\n // Verify the robot-docs string contains surgical sync documentation\n // This tests the static text, not runtime behavior\n let docs = include_str!(\"../../../src/main.rs\");\n // The robot-docs handler should mention surgical sync\n // (Actual assertion depends on how robot-docs are generated)\n }\n}\n```\n\n## Edge Cases\n\n- **Dry-run + surgical**: `handle_sync_cmd` currently short-circuits dry-run before SyncRunRecorder setup (line 2149). Surgical dry-run should also short-circuit, but preflight-only is the surgical equivalent. Clarify: `--dry-run --issue 7` should be treated as `--preflight-only --issue 7`.\n- **Normal sync recorder vs surgical recorder**: `handle_sync_cmd` creates a `SyncRunRecorder` for normal sync (line 2159). When dispatching to surgical, skip this since `run_sync_surgical` creates its own. Use the `options.issues.is_empty() && options.merge_requests.is_empty()` check to decide.\n- **Robot-docs backward compatibility**: New fields are additive. Existing robot-docs consumers that ignore unknown fields are unaffected.\n- **No project specified with IIDs**: If `--issue 7` is passed without `-p project`, the dispatch should fail with a clear usage error (validation in bd-1lja).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-1i4i (the `run_sync_surgical` function to call), bd-1lja (SyncOptions extensions with `issues`, `merge_requests`, `project`, `preflight_only` fields), bd-wcja (SyncResult surgical fields for assertion)\n- **No downstream dependents** — this is the final wiring bead for the main code path.\n- Must NOT modify the normal sync pipeline behavior. The dispatch is a pure conditional branch at function entry.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-17T19:18:10.648172Z","created_by":"tayloreernisse","updated_at":"2026-02-17T20:03:44.531713Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} +{"id":"bd-3bec","title":"Wire surgical dispatch in run_sync and update robot-docs","description":"## Background\n\nThe existing `run_sync` function (lines 63-360 of `src/cli/commands/sync.rs`) handles the normal full-sync pipeline. Once `run_sync_surgical` (bd-1i4i) is implemented, this bead wires the dispatch: when `SyncOptions` contains issue or MR IIDs, route to the surgical path instead of the normal path. This also requires updating `handle_sync_cmd` (line 2120 of `src/main.rs`) to pass through the new CLI fields (bd-1lja), and updating the robot-docs schema to document the new surgical response fields.\n\n## Approach\n\nThree changes:\n\n**1. Dispatch in `run_sync` (src/cli/commands/sync.rs)**\n\nAdd an early check at the top of `run_sync` (after line 68):\n\n```rust\npub async fn run_sync(\n config: &Config,\n options: SyncOptions,\n run_id: Option<&str>,\n signal: &ShutdownSignal,\n) -> Result {\n // Surgical dispatch: if any IIDs specified, route to surgical pipeline\n if !options.issues.is_empty() || !options.merge_requests.is_empty() {\n return run_sync_surgical(config, options, run_id, signal).await;\n }\n\n // ... existing normal sync pipeline unchanged ...\n}\n```\n\n**2. Update `handle_sync_cmd` (src/main.rs line 2120)**\n\nPass new fields from `SyncArgs` into `SyncOptions`:\n\n```rust\nlet options = SyncOptions {\n full: args.full && !args.no_full,\n force: args.force && !args.no_force,\n no_embed: args.no_embed,\n no_docs: args.no_docs,\n no_events: args.no_events,\n robot_mode,\n dry_run,\n // New surgical fields (from bd-1lja)\n issues: args.issue.clone(),\n merge_requests: args.mr.clone(),\n project: args.project.clone(),\n preflight_only: args.preflight_only,\n};\n```\n\nAlso: when surgical mode is detected (issues/MRs non-empty), skip the normal SyncRunRecorder setup in `handle_sync_cmd` since `run_sync_surgical` manages its own recorder.\n\n**3. Update robot-docs (src/main.rs handle_robot_docs)**\n\nAdd documentation for the surgical sync response format. The robot-docs output should include:\n- New CLI flags: `--issue`, `--mr`, `-p`/`--project`, `--preflight-only`\n- Surgical response fields: `surgical_mode`, `surgical_iids`, `entity_results`, `preflight_only`\n- `EntitySyncResult` schema: `entity_type`, `iid`, `outcome`, `error`, `toctou_reason`\n- Exit codes for surgical-specific errors\n\n## Acceptance Criteria\n\n1. `lore sync --issue 7 -p group/project` dispatches to `run_sync_surgical`, not normal sync\n2. `lore sync` (no IIDs) follows the existing normal pipeline unchanged\n3. `handle_sync_cmd` passes `issues`, `merge_requests`, `project`, `preflight_only` from args to options\n4. `lore robot-docs` output includes surgical sync documentation\n5. All existing sync tests pass without modification\n6. Robot mode JSON output for surgical sync matches documented schema\n\n## Files\n\n- `src/cli/commands/sync.rs` — add dispatch check at top of `run_sync`, add `use super::sync_surgical::run_sync_surgical`\n- `src/main.rs` — update `handle_sync_cmd` to pass new fields, update robot-docs text\n- `src/cli/commands/mod.rs` — ensure `sync_surgical` module is public (may already be done by bd-1i4i)\n\n## TDD Anchor\n\nTests in `src/cli/commands/sync.rs` or a companion test file:\n\n```rust\n#[cfg(test)]\nmod dispatch_tests {\n use super::*;\n\n #[test]\n fn sync_options_with_issues_is_surgical() {\n let options = SyncOptions {\n issues: vec![7],\n ..SyncOptions::default()\n };\n assert!(!options.issues.is_empty() || !options.merge_requests.is_empty());\n }\n\n #[test]\n fn sync_options_without_iids_is_normal() {\n let options = SyncOptions::default();\n assert!(options.issues.is_empty() && options.merge_requests.is_empty());\n }\n\n #[test]\n fn sync_options_with_mrs_is_surgical() {\n let options = SyncOptions {\n merge_requests: vec![10, 20],\n ..SyncOptions::default()\n };\n assert!(!options.issues.is_empty() || !options.merge_requests.is_empty());\n }\n\n #[tokio::test]\n async fn dispatch_routes_to_surgical_when_issues_present() {\n // Integration-level test: verify run_sync with IIDs calls surgical path.\n // This test uses wiremock to mock the surgical path's GitLab calls.\n // The key assertion: when options.issues is non-empty, the function\n // does NOT attempt the normal ingest flow (no project cursor queries).\n let server = wiremock::MockServer::start().await;\n wiremock::Mock::given(wiremock::matchers::method(\"GET\"))\n .and(wiremock::matchers::path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(wiremock::ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let mut config = Config::default();\n config.gitlab.url = server.uri();\n config.gitlab.token = \"test-token\".to_string();\n let options = SyncOptions {\n issues: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync(&config, options, Some(\"dispatch-test\"), &signal).await;\n\n // Should succeed via surgical path (or at least not panic from normal path)\n assert!(result.is_ok());\n let r = result.unwrap();\n assert_eq!(r.surgical_mode, Some(true));\n }\n\n #[test]\n fn robot_docs_includes_surgical_sync() {\n // Verify the robot-docs string contains surgical sync documentation\n // This tests the static text, not runtime behavior\n let docs = include_str!(\"../../../src/main.rs\");\n // The robot-docs handler should mention surgical sync\n // (Actual assertion depends on how robot-docs are generated)\n }\n}\n```\n\n## Edge Cases\n\n- **Dry-run + surgical**: `handle_sync_cmd` currently short-circuits dry-run before SyncRunRecorder setup (line 2149). Surgical dry-run should also short-circuit, but preflight-only is the surgical equivalent. Clarify: `--dry-run --issue 7` should be treated as `--preflight-only --issue 7`.\n- **Normal sync recorder vs surgical recorder**: `handle_sync_cmd` creates a `SyncRunRecorder` for normal sync (line 2159). When dispatching to surgical, skip this since `run_sync_surgical` creates its own. Use the `options.issues.is_empty() && options.merge_requests.is_empty()` check to decide.\n- **Robot-docs backward compatibility**: New fields are additive. Existing robot-docs consumers that ignore unknown fields are unaffected.\n- **No project specified with IIDs**: If `--issue 7` is passed without `-p project`, the dispatch should fail with a clear usage error (validation in bd-1lja).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-1i4i (the `run_sync_surgical` function to call), bd-1lja (SyncOptions extensions with `issues`, `merge_requests`, `project`, `preflight_only` fields), bd-wcja (SyncResult surgical fields for assertion)\n- **No downstream dependents** — this is the final wiring bead for the main code path.\n- Must NOT modify the normal sync pipeline behavior. The dispatch is a pure conditional branch at function entry.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-17T19:18:10.648172Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:46:48.925597Z","closed_at":"2026-02-19T13:46:48.925543Z","close_reason":"Wired surgical dispatch in run_sync (early return to run_sync_surgical when is_surgical()), short-circuited handle_sync_cmd to skip redundant outer recorder, updated robot-docs with surgical sync flags and schema. 886 tests pass.","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} {"id":"bd-3bo","title":"[CP1] gi count issues/discussions/notes commands","description":"Count entities in the database.\n\nCommands:\n- gi count issues → 'Issues: N'\n- gi count discussions --type=issue → 'Issue Discussions: N'\n- gi count notes --type=issue → 'Issue Notes: N (excluding M system)'\n\nFiles: src/cli/commands/count.ts\nDone when: Counts match expected values from GitLab","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:16.190875Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.156293Z","closed_at":"2026-01-25T15:21:35.156293Z","deleted_at":"2026-01-25T15:21:35.156290Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-3bpk","title":"NOTE-0A: Upsert/sweep for issue discussion notes","description":"## Background\nIssue discussion note ingestion uses a delete/reinsert pattern (DELETE FROM notes WHERE discussion_id = ? at line 132-135 of src/ingestion/discussions.rs then re-insert). This makes notes.id unstable across syncs. MR discussion notes already use upsert (ON CONFLICT(gitlab_id) DO UPDATE at line 470-536 of src/ingestion/mr_discussions.rs) producing stable IDs. Phase 2 depends on stable notes.id as source_id for note documents.\n\n## Approach\nRefactor src/ingestion/discussions.rs to match the MR pattern in src/ingestion/mr_discussions.rs:\n\n1. Create shared NoteUpsertOutcome struct (in src/ingestion/discussions.rs, also used by mr_discussions.rs):\n pub struct NoteUpsertOutcome { pub local_note_id: i64, pub changed_semantics: bool }\n\n2. Replace insert_note() (line 201-233) with upsert_note_for_issue(). Current signature is:\n fn insert_note(conn: &Connection, discussion_id: i64, note: &NormalizedNote, payload_id: Option) -> Result<()>\n New signature:\n fn upsert_note_for_issue(conn: &Connection, discussion_id: i64, note: &NormalizedNote, last_seen_at: i64, payload_id: Option) -> Result\n\n Use ON CONFLICT(gitlab_id) DO UPDATE SET body, note_type, updated_at, last_seen_at, resolvable, resolved, resolved_by, resolved_at, position_old_path, position_new_path, position_old_line, position_new_line, position_type, position_line_range_start, position_line_range_end, position_base_sha, position_start_sha, position_head_sha\n\n IMPORTANT: The current issue insert_note() only populates: gitlab_id, discussion_id, project_id, note_type, is_system, author_username, body, created_at, updated_at, last_seen_at, position (integer array order), resolvable, resolved, resolved_by, resolved_at, raw_payload_id. It does NOT populate the decomposed position columns (position_new_path, etc.). The MR upsert_note() at line 470 DOES populate all decomposed position columns. Your upsert must include ALL columns from the MR pattern. The NormalizedNote struct (from src/gitlab/transformers.rs) has all position fields.\n\n3. Change detection via pre-read: SELECT existing note before upsert, compare semantic fields (body, note_type, resolved, resolved_by, positions). Exclude updated_at/last_seen_at from semantic comparison. Use IS NOT for NULL-safe comparison.\n\n4. Add sweep_stale_issue_notes(conn, discussion_id, last_seen_at) — DELETE FROM notes WHERE discussion_id = ? AND last_seen_at < ?\n\n5. Replace the delete-reinsert loop (lines 132-139) with:\n for note in notes { let outcome = upsert_note_for_issue(&tx, local_discussion_id, ¬e, last_seen_at, None)?; }\n sweep_stale_issue_notes(&tx, local_discussion_id, last_seen_at)?;\n\n6. Update upsert_note() in mr_discussions.rs (line 470) to return NoteUpsertOutcome with same semantic change detection. Current signature returns Result<()>.\n\nReference files:\n- src/ingestion/mr_discussions.rs: upsert_note() line 470, sweep_stale_notes() line 551\n- src/ingestion/discussions.rs: insert_note() line 201, delete pattern line 132-135\n- src/gitlab/transformers.rs: NormalizedNote struct definition\n\n## Files\n- MODIFY: src/ingestion/discussions.rs (refactor insert_note -> upsert + sweep, lines 132-233)\n- MODIFY: src/ingestion/mr_discussions.rs (return NoteUpsertOutcome from upsert_note at line 470)\n\n## TDD Anchor\nRED: test_issue_note_upsert_stable_id — insert 2 notes, record IDs, re-sync same gitlab_ids, assert IDs unchanged.\nGREEN: Implement upsert_note_for_issue with ON CONFLICT.\nVERIFY: cargo test upsert_stable_id -- --nocapture\nTests: test_issue_note_upsert_detects_body_change, test_issue_note_upsert_unchanged_returns_false, test_issue_note_upsert_updated_at_only_does_not_mark_semantic_change, test_issue_note_sweep_removes_stale, test_issue_note_upsert_returns_local_id\n\n## Acceptance Criteria\n- [ ] upsert_note_for_issue() uses ON CONFLICT(gitlab_id) DO UPDATE\n- [ ] Local note IDs stable across re-syncs of identical data\n- [ ] changed_semantics = true only for body/note_type/resolved/position changes\n- [ ] changed_semantics = false for updated_at-only changes\n- [ ] sweep removes notes with stale last_seen_at\n- [ ] MR upsert_note() returns NoteUpsertOutcome\n- [ ] Issue upsert populates ALL position columns (matching MR pattern)\n- [ ] All 6 tests pass, clippy clean\n\n## Edge Cases\n- NULL body: IS NOT comparison handles NULLs correctly\n- UNIQUE(gitlab_id) already exists on notes table (migration 002)\n- last_seen_at prevents stale-sweep of notes currently being ingested\n- Issue notes currently don't populate position_new_path etc. — the new upsert must extract these from NormalizedNote (check that the transformer populates them for issue DiffNotes)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T16:59:14.783336Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:24.151831Z","closed_at":"2026-02-12T18:13:24.151781Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"],"dependencies":[{"issue_id":"bd-3bpk","depends_on_id":"bd-18bf","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-3bpk","depends_on_id":"bd-2b28","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-3bpk","depends_on_id":"bd-2ezb","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-3bpk","depends_on_id":"bd-jbfw","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-3cjp","title":"NOTE-2I: Batch parent metadata cache for note regeneration","description":"## Background\nextract_note_document() (from NOTE-2C) fetches parent entity metadata per note via SQL queries. During initial backfill of ~8K notes, this creates N+1 amplification — 50 notes on same MR = 50 identical parent lookups. This is a performance optimization for batch regeneration only.\n\n## Approach\n1. Add ParentMetadataCache struct in src/documents/extractor.rs:\n pub struct ParentMetadataCache {\n cache: HashMap<(String, i64), ParentMetadata>,\n }\n Key: (noteable_type: String, parent_local_id: i64)\n ParentMetadata struct: { iid: i64, title: String, web_url: String, labels: Vec, project_path: String }\n\n Methods:\n - pub fn new() -> Self\n - pub fn get_or_fetch(&mut self, conn: &Connection, noteable_type: &str, parent_id: i64) -> Result>\n get_or_fetch uses HashMap entry API: on miss, fetches from DB (same queries as extract_note_document), caches, returns ref.\n\n2. Add pub fn extract_note_document_cached(conn: &Connection, note_id: i64, cache: &mut ParentMetadataCache) -> Result>:\n Same logic as extract_note_document but calls cache.get_or_fetch() instead of inline parent queries. The uncached version remains for single-note use.\n\n3. Update batch regeneration loop in src/documents/regenerator.rs. The main regeneration loop is in regenerate_dirty_documents() (top of file, ~line 20). It processes dirty entries one at a time via regenerate_one() (line 86). For batch cache to work:\n - Create ParentMetadataCache before the loop\n - In the SourceType::Note arm of regenerate_one, pass the cache through\n - This requires either making regenerate_one() take an optional cache parameter, or restructuring to handle Note specially in the loop body.\n\n Cleanest approach: Add cache: &mut Option parameter to regenerate_one(). Initialize as Some(ParentMetadataCache::new()) before the loop. Only SourceType::Note uses it. Other types ignore it.\n\n Cache is created fresh per regenerate_dirty_documents() call — no cross-invocation persistence.\n\n## Files\n- MODIFY: src/documents/extractor.rs (add ParentMetadataCache struct + extract_note_document_cached)\n- MODIFY: src/documents/regenerator.rs (add cache parameter to regenerate_one, use in batch loop)\n- MODIFY: src/documents/mod.rs (export ParentMetadataCache if needed externally)\n\n## TDD Anchor\nRED: test_note_regeneration_batch_uses_cache — insert project, issue, 10 notes on same issue, mark all dirty, regenerate all, assert all 10 documents created correctly.\nGREEN: Implement ParentMetadataCache and extract_note_document_cached.\nVERIFY: cargo test note_regeneration_batch -- --nocapture\nTests: test_note_regeneration_cache_consistent_with_direct_extraction (cached output == uncached output), test_note_regeneration_cache_invalidates_across_parents (notes from different parents get correct metadata)\n\n## Acceptance Criteria\n- [ ] ParentMetadataCache reduces DB queries during batch regeneration (10 notes on 1 parent = 1 parent fetch, not 10)\n- [ ] Cached extraction produces identical DocumentData output to uncached\n- [ ] Cache keyed per (noteable_type, parent_id) — no cross-parent leakage\n- [ ] Cache scoped to single regenerate_dirty_documents call — no persistence or invalidation complexity\n- [ ] All 3 tests pass\n\n## Dependency Context\n- Depends on NOTE-2C (bd-18yh): extract_note_document function must exist to create the cached variant\n\n## Edge Cases\n- Parent deleted between cache creation and lookup: get_or_fetch returns None, extract_note_document_cached returns None (same as uncached)\n- Very large batch (10K+ notes): cache grows but is bounded by number of unique parents (typically <100 issues/MRs)\n- Cache miss for orphaned discussion: cached None result prevents repeated failed lookups","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-12T17:03:00.515490Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:15.870738Z","closed_at":"2026-02-12T18:13:15.870693Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"]} @@ -261,11 +261,11 @@ {"id":"bd-6pmy","title":"Implement LoreApp Model trait (full update/view skeleton)","description":"## Background\nLoreApp is the central Model implementation for FrankenTUI's Elm Architecture. It owns all state (AppState), the navigation stack, task supervisor, db manager, clock, config, and crash context. The update() method is the single entry point for all state transitions, implementing a 5-stage key dispatch pipeline. The view() method routes to per-screen render functions.\n\n## Approach\nExpand crates/lore-tui/src/app.rs:\n- LoreApp struct fields: config (Config), db (DbManager), state (AppState), navigation (NavigationStack), supervisor (TaskSupervisor), clock (Box), input_mode (InputMode), command_registry (CommandRegistry), crash_context (CrashContext)\n- init() -> Cmd: install crash_context panic hook, return Cmd::task that loads dashboard data\n- update(msg: Msg) -> Option>: push CrashEvent to crash_context FIRST, then full dispatch with 5-stage interpret_key pipeline:\n 1. Quit check (q in Normal mode, Ctrl+C always)\n 2. InputMode routing (Text->delegate to text widget, Palette->delegate to palette, GoPrefix->check timeout+destination)\n 3. Global shortcuts (H=Home, Esc=back, Ctrl+P=palette, g=prefix, Ctrl+O/I=jump)\n 4. Screen-local keys (delegate to AppState::interpret_screen_key)\n 5. Fallback (unhandled key, no-op)\n\n**Key normalization pass in interpret_key():**\nBefore the 5-stage pipeline, normalize terminal key variants:\n- Backspace variants: map Delete/Backspace to canonical Backspace\n- Alt key variants: map Meta+key to Alt+key\n- Shift+Tab: map BackTab to Shift+Tab\n- This ensures consistent behavior across terminals (iTerm2, Alacritty, Terminal.app, tmux)\n\n- For non-key messages: match on Msg variants, update state, optionally return Cmd::task for async work\n- Stale result guard: check supervisor.is_current() before applying *Loaded results\n- view(frame): match navigation.current() to dispatch to per-screen view functions (stub initially)\n- subscriptions(): tick timer (250ms for spinner animation), debounce timers\n\n## Acceptance Criteria\n- [ ] LoreApp struct compiles with all required fields including crash_context\n- [ ] init() installs panic hook and returns a Cmd that triggers dashboard load\n- [ ] update() pushes CrashEvent to crash_context before dispatching\n- [ ] update() handles Msg::Quit by returning None\n- [ ] update() handles NavigateTo by pushing nav stack and spawning load_screen\n- [ ] update() handles GoBack by popping nav stack\n- [ ] interpret_key normalizes Backspace/Alt/Shift+Tab variants before dispatch\n- [ ] interpret_key 5-stage pipeline dispatches correctly per InputMode\n- [ ] GoPrefix times out after 500ms (checked via clock.now())\n- [ ] Stale results dropped: IssueListLoaded with old generation ignored\n- [ ] view() routes to correct screen render function based on navigation.current()\n- [ ] subscriptions() returns tick timer\n\n## Files\n- MODIFY: crates/lore-tui/src/app.rs (expand from minimal to full implementation)\n\n## TDD Anchor\nRED: Write test_quit_returns_none that creates LoreApp (with FakeClock, in-memory DB), calls update(Msg::Quit), asserts it returns None.\nGREEN: Implement update() with Quit match arm.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml test_quit\n\nAdditional tests:\n- test_navigate_to_pushes_stack: update(NavigateTo(IssueList)) changes navigation.current()\n- test_go_back_pops_stack: after push, GoBack returns to previous screen\n- test_stale_result_dropped: IssueListLoaded with old generation doesn't update state\n- test_go_prefix_timeout: GoPrefix cancels after 500ms (using FakeClock)\n- test_key_normalization_backspace: both Delete and Backspace map to canonical Backspace\n- test_crash_context_records_events: after update(), crash_context.events.len() increases\n\n## Edge Cases\n- update() must handle rapid-fire messages without blocking (no long computations in update)\n- Ctrl+C must always quit regardless of InputMode (safety escape)\n- GoPrefix must cancel on any non-destination key, not just on timeout\n- Text mode must pass Esc through to blur text input first, then Normal mode handles Esc for navigation\n- Key normalization must handle unknown/exotic key codes gracefully (pass through unchanged)\n\n## Dependency Context\nUses DbManager from \"Implement DbManager\" (bd-2kop).\nUses Clock/FakeClock from \"Implement Clock trait\" (bd-2lg6).\nUses Msg, Screen, InputMode from \"Implement core types\" (bd-c9gk).\nUses NavigationStack from \"Implement NavigationStack\" (bd-1qpp).\nUses TaskSupervisor from \"Implement TaskSupervisor\" (bd-3le2).\nUses CrashContext from \"Implement crash_context ring buffer\" (bd-2fr7).\nUses CommandRegistry from \"Implement CommandRegistry\" (bd-38lb).\nUses AppState from \"Implement AppState composition\" (bd-1v9m).","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T16:55:27.130909Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:52:30.228655Z","closed_at":"2026-02-12T20:52:30.228596Z","close_reason":"LoreApp full Model impl: struct with all fields, 5-stage key dispatch, navigate_to, handle_msg with stale guard, 22 app tests. Fixed crossterm→ftui type migration.","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-6pmy","depends_on_id":"bd-1qpp","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-6pmy","depends_on_id":"bd-1v9m","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-6pmy","depends_on_id":"bd-2emv","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-6pmy","depends_on_id":"bd-2fr7","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-6pmy","depends_on_id":"bd-2kop","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-6pmy","depends_on_id":"bd-2lg6","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-6pmy","depends_on_id":"bd-38lb","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-6pmy","depends_on_id":"bd-3le2","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} {"id":"bd-88m","title":"[CP1] Issue ingestion module","description":"Fetch and store issues with cursor-based incremental sync.\n\n## Module\nsrc/ingestion/issues.rs\n\n## Key Structs\n\n### IngestIssuesResult\n- fetched: usize\n- upserted: usize\n- labels_created: usize\n- issues_needing_discussion_sync: Vec\n\n### IssueForDiscussionSync\n- local_issue_id: i64\n- iid: i64\n- updated_at: i64\n\n## Main Function\npub async fn ingest_issues(conn, client, config, project_id, gitlab_project_id) -> Result\n\n## Logic\n1. Get current cursor from sync_cursors (updated_at_cursor, tie_breaker_id)\n2. Paginate through issues updated after cursor with cursor_rewind_seconds\n3. Apply local filtering for tuple cursor semantics:\n - Skip if issue.updated_at < cursor_updated_at\n - Skip if issue.updated_at == cursor_updated_at AND issue.id <= cursor_gitlab_id\n4. For each issue passing filter:\n - Begin transaction\n - Store raw payload (compressed)\n - Transform and upsert issue\n - Clear existing label links (DELETE FROM issue_labels)\n - Extract and upsert labels\n - Link issue to labels via junction\n - Commit transaction\n - Track for discussion sync eligibility\n5. Incremental cursor update every 100 issues\n6. Final cursor update\n7. Determine issues needing discussion sync: where updated_at > discussions_synced_for_updated_at\n\n## Helper Functions\n- get_cursor(conn, project_id) -> (Option, Option)\n- get_discussions_synced_at(conn, issue_id) -> Option\n- upsert_issue(conn, issue, payload_id) -> usize\n- get_local_issue_id(conn, gitlab_id) -> i64\n- clear_issue_labels(conn, issue_id)\n- upsert_label(conn, label) -> bool\n- get_label_id(conn, project_id, name) -> i64\n- link_issue_label(conn, issue_id, label_id)\n- update_cursor(conn, project_id, resource_type, updated_at, gitlab_id)\n\nFiles: src/ingestion/mod.rs, src/ingestion/issues.rs\nTests: tests/issue_ingestion_tests.rs\nDone when: Issues, labels, issue_labels populated correctly with resumable cursor","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:57:35.655708Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.806982Z","closed_at":"2026-01-25T17:02:01.806982Z","deleted_at":"2026-01-25T17:02:01.806977Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-8ab7","title":"Implement Issue Detail (state + action + view)","description":"## Background\nThe Issue Detail screen shows a single issue with progressive hydration: Phase 1 loads metadata (fast), Phase 2 loads discussions asynchronously, Phase 3 loads thread bodies on expand. All subqueries run inside a single read transaction for snapshot consistency.\n\n## Approach\nState (state/issue_detail.rs):\n- IssueDetailState: current_key (Option), metadata (Option), discussions (Vec), discussions_loaded (bool), cross_refs (Vec), tree_state (TreePersistState), scroll_offset (usize)\n- IssueMetadata: iid, title, description, state, author, assignee, labels, milestone, created_at, updated_at, web_url, status_name, status_icon, closing_mr_iids, related_issue_iids\n- handle_key(): j/k scroll, Enter expand discussion thread, d open description, x cross-refs, o open in browser, t scoped timeline, Esc back to list\n\nAction (action.rs):\n- fetch_issue_detail(conn, key, clock) -> Result: uses with_read_snapshot for snapshot consistency. Fetches metadata, discussion count, cross-refs in single transaction.\n- fetch_discussions(conn, key) -> Result, LoreError>: loads discussions for the issue, separate async call (Phase 2 of hydration)\n\nView (view/issue_detail.rs):\n- render_issue_detail(frame, state, area, theme): header (IID, title, state badge, labels), description (markdown rendered with sanitization), discussions (tree widget), cross-references section\n- Header: \"Issue #42 — Fix auth flow [opened]\" with colored state badge\n- Description: rendered markdown, scrollable\n- Discussions: loaded async, shown with spinner until ready\n- Cross-refs: closing MRs, related issues as navigable links\n\n## Acceptance Criteria\n- [ ] Metadata loads in Phase 1 (p95 < 75ms on M-tier)\n- [ ] Discussions load async in Phase 2 (spinner shown while loading)\n- [ ] All detail subqueries run inside single read transaction (snapshot consistency)\n- [ ] Description text sanitized via sanitize_for_terminal()\n- [ ] Discussion tree renders with expand/collapse\n- [ ] Cross-references navigable via Enter\n- [ ] Esc returns to Issue List with cursor position preserved\n- [ ] Open in browser (o) uses classify_safe_url before launching\n- [ ] Scoped timeline (t) navigates to Timeline filtered for this entity\n\n## Files\n- MODIFY: crates/lore-tui/src/state/issue_detail.rs (expand from stub)\n- MODIFY: crates/lore-tui/src/action.rs (add fetch_issue_detail, fetch_discussions)\n- CREATE: crates/lore-tui/src/view/issue_detail.rs\n\n## TDD Anchor\nRED: Write test_fetch_issue_detail_snapshot in action.rs that inserts an issue with 2 discussions, calls fetch_issue_detail, asserts metadata and discussion count are correct.\nGREEN: Implement fetch_issue_detail with read transaction.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml test_fetch_issue_detail\n\n## Edge Cases\n- Issue with no description: show placeholder \"[No description]\"\n- Issue with hundreds of discussions: paginate or lazy-load beyond first 50\n- Cross-refs to entities not in local DB: show as text-only (not navigable)\n- Issue description with embedded images: show [image] placeholder (no inline rendering)\n- Entity cache (future): near-instant reopen during Enter/Esc drill workflows\n\n## Dependency Context\nUses discussion tree and cross-ref widgets from \"Implement discussion tree + cross-reference widgets\" task.\nUses EntityKey, Msg from \"Implement core types\" task.\nUses with_read_snapshot from DbManager from \"Implement DbManager\" task.\nUses sanitize_for_terminal from \"Implement terminal safety module\" task.\nUses Clock for timestamps from \"Implement Clock trait\" task.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T16:59:10.081146Z","created_by":"tayloreernisse","updated_at":"2026-02-18T20:17:02.568850Z","closed_at":"2026-02-18T20:17:02.568729Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-8ab7","depends_on_id":"bd-1cl9","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-8ab7","depends_on_id":"bd-1d6z","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-8ab7","depends_on_id":"bd-3ei1","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} -{"id":"bd-8con","title":"lore related: semantic similarity discovery","description":"## Background\nGiven any entity or free text, find semantically related entities using vector embeddings. No other GitLab tool does this — glab, GitLab Advanced Search, and even paid tiers are keyword-only. This finds conceptual connections humans miss.\n\n## Current Infrastructure (Verified 2026-02-12)\n- sqlite-vec extension loaded via sqlite3_vec_init in src/core/db.rs:84\n- Embeddings stored in: embedding_metadata table (chunk info) + vec0 virtual table named `embeddings` (vectors)\n- Migration 009 creates embedding infrastructure\n- search_vector() at src/search/vector.rs:43 — works with sqlite-vec KNN queries\n- OllamaClient::embed_batch() at src/embedding/ollama.rs:103 — batch embedding\n- Model: nomic-embed-text, 768 dimensions, context_length=2048 tokens (~1500 bytes)\n- 61K documents in DB, embedding coverage TBD\n\n### sqlite-vec Distance Metric\nThe `embeddings` virtual table is `vec0(embedding float[768])`. sqlite-vec's MATCH query returns L2 (Euclidean) distance by default. Lower distance = more similar. The `search_vector()` function returns `VectorResult { document_id: i64, distance: f64 }`.\n\n## Approach\n\n### Entity Mode: lore related issues N\n1. Look up document for issue N:\n```sql\nSELECT d.id, d.content_text\nFROM documents d\nJOIN issues i ON d.source_type = 'issue' AND d.source_id = i.id\nWHERE i.iid = ?1 AND i.project_id = (SELECT id FROM projects WHERE ...)\n```\nNOTE: `documents.source_id` is the internal DB id from the source table (issues.id), NOT the GitLab IID. See migration 007 comment: `source_id INTEGER NOT NULL -- local DB id in the source table`.\n\n2. Get its embedding: Look up via embedding_metadata which maps document_id -> rowid in the vec0 table:\n```sql\nSELECT em.rowid\nFROM embedding_metadata em\nWHERE em.document_id = ?1\nLIMIT 1 -- use first chunk's embedding as representative\n```\nThen extract the embedding vector from the vec0 table to use as the KNN query.\n\nAlternatively, embed the document's content_text on-the-fly via OllamaClient (simpler, more robust):\n```rust\nlet embedding = client.embed_batch(&[&doc.content_text]).await?[0].clone();\n```\n\n3. Call search_vector(conn, &embedding, limit * 2) for KNN — multiply limit to have room after filtering self\n4. Exclude self (filter out source document_id from results)\n5. Hydrate results: join documents -> issues/mrs/discussions for title, url, labels, author\n6. Compute shared_labels: parse `documents.label_names` (JSON array string) for both source and each result, intersect\n7. Return ranked list\n\n### Query Mode: lore related 'free text'\n1. Embed query via OllamaClient::embed_batch(&[query_text])\n2. Call search_vector(conn, &query_embedding, limit)\n3. Hydrate and return (same as entity mode minus self-exclusion)\n\n### Key Design Decision\nThis is intentionally SIMPLER than hybrid search. No FTS, no RRF. Pure vector similarity. The point is conceptual relatedness, not keyword matching.\n\n### Distance to Similarity Score Conversion\nsqlite-vec returns L2 (Euclidean) distance. Convert to 0-1 similarity:\n```rust\n/// Convert L2 distance to a 0-1 similarity score.\n/// Uses inverse relationship: closer (lower distance) = higher similarity.\n/// The +1 prevents division by zero and ensures score is in (0, 1].\nfn distance_to_similarity(distance: f64) -> f64 {\n 1.0 / (1.0 + distance)\n}\n```\nFor normalized embeddings (which nomic-embed-text produces), L2 distance ranges roughly 0-2. This formula maps:\n- distance 0.0 -> similarity 1.0 (identical)\n- distance 1.0 -> similarity 0.5\n- distance 2.0 -> similarity 0.33\n\n### Label Extraction for shared_labels\n```rust\nfn parse_label_names(label_names_json: &Option) -> HashSet {\n label_names_json\n .as_deref()\n .and_then(|s| serde_json::from_str::>(s).ok())\n .unwrap_or_default()\n .into_iter()\n .collect()\n}\n\nlet source_labels = parse_label_names(&source_doc.label_names);\nlet result_labels = parse_label_names(&result_doc.label_names);\nlet shared: Vec = source_labels.intersection(&result_labels).cloned().collect();\n```\n\n## Function Signatures\n\n```rust\n// New: src/cli/commands/related.rs\npub struct RelatedArgs {\n pub entity_type: Option, // \"issues\" or \"mrs\"\n pub entity_iid: Option,\n pub query: Option, // free text mode\n pub project: Option,\n pub limit: Option,\n}\n\npub async fn run_related(\n config: &Config,\n args: RelatedArgs,\n) -> Result\n\n// Reuse from src/search/vector.rs:43\npub fn search_vector(\n conn: &Connection,\n query_embedding: &[f32],\n limit: usize,\n) -> Result>\n// VectorResult { document_id: i64, distance: f64 }\n\n// Reuse from src/embedding/ollama.rs:103\npub async fn embed_batch(&self, texts: &[&str]) -> Result>>\n```\n\n## Robot Mode Output Schema\n```json\n{\n \"ok\": true,\n \"data\": {\n \"source\": { \"type\": \"issue\", \"iid\": 3864, \"title\": \"...\" },\n \"query\": \"switch throw time...\",\n \"results\": [{\n \"source_type\": \"issue\",\n \"iid\": 3800,\n \"title\": \"Rail Break Card\",\n \"url\": \"...\",\n \"similarity_score\": 0.87,\n \"shared_labels\": [\"customer:BNSF\"],\n \"shared_authors\": [],\n \"project_path\": \"vs/typescript-code\"\n }]\n },\n \"meta\": { \"elapsed_ms\": 42, \"mode\": \"entity\", \"embedding_dims\": 768, \"distance_metric\": \"l2\" }\n}\n```\n\n## Clap Registration\n```rust\n// In src/main.rs Commands enum, add:\nRelated {\n /// Entity type (\"issues\" or \"mrs\") or free text query\n query_or_type: String,\n /// Entity IID (when first arg is entity type)\n iid: Option,\n /// Maximum results\n #[arg(short = 'n', long, default_value = \"10\")]\n limit: usize,\n /// Scope to project (fuzzy match)\n #[arg(short, long)]\n project: Option,\n},\n```\n\n## TDD Loop\nRED: Tests in src/cli/commands/related.rs:\n- test_related_entity_excludes_self: insert doc + embedding for issue, query related, assert source doc not in results\n- test_related_shared_labels: insert 2 docs with overlapping labels (JSON in label_names), assert shared_labels computed correctly\n- test_related_empty_embeddings: no embeddings in DB, assert exit code 14 with helpful error\n- test_related_query_mode: embed free text via mock, assert results returned\n- test_related_similarity_score_range: all scores between 0.0 and 1.0\n- test_distance_to_similarity: unit test the conversion function (0.0->1.0, 1.0->0.5, large->~0.0)\n\nGREEN: Implement related command using search_vector + hydration\n\nVERIFY:\n```bash\ncargo test related:: && cargo clippy --all-targets -- -D warnings\ncargo run --release -- -J related issues 3864 -n 5 | jq '.data.results[0].similarity_score'\n```\n\n## Acceptance Criteria\n- [ ] lore related issues N returns top-K semantically similar entities\n- [ ] lore related mrs N works for merge requests\n- [ ] lore related 'free text' works as concept search (requires Ollama)\n- [ ] Results exclude the input entity itself\n- [ ] similarity_score is 0-1 range (higher = more similar), converted from L2 distance\n- [ ] Robot mode includes shared_labels (from documents.label_names JSON), shared_authors per result\n- [ ] Human mode shows ranked list with titles, scores, common labels\n- [ ] No embeddings in DB: exit code 14 with message \"Run 'lore embed' first\"\n- [ ] Ollama unavailable (query mode only): exit code 14 with suggestion\n- [ ] Performance: <1s for 61K documents\n- [ ] Command registered in main.rs and robot-docs\n\n## Edge Cases\n- Entity has no embedding (added after last lore embed): embed its content_text on-the-fly via OllamaClient, or exit 14 if Ollama unavailable\n- All results have very low similarity (<0.3): include warning \"No strongly related entities found\"\n- Entity is a discussion (not issue/MR): should still work (documents table has discussion docs)\n- Multiple documents per entity (discussion docs): use the entity-level document, not discussion subdocs\n- Free text query very short (1-2 words): may produce noisy results, add warning\n- Entity not found in DB: exit code 17 with suggestion to sync\n- Ambiguous project: exit code 18 with suggestion to use -p flag\n- documents.label_names may be NULL or invalid JSON — parse_label_names handles both gracefully\n\n## Dependency Context\n- **bd-1ksf (hybrid search)**: BLOCKER. Shares OllamaClient infrastructure. Also ensures async search.rs patterns are established. Related reuses the same vector search infrastructure.\n\n## Files to Create/Modify\n- NEW: src/cli/commands/related.rs\n- src/cli/commands/mod.rs (add pub mod related; re-export)\n- src/main.rs (register Related subcommand in Commands enum, add handle_related fn)\n- Reuse: search_vector() from src/search/vector.rs, OllamaClient from src/embedding/ollama.rs","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-12T15:46:58.665923Z","created_by":"tayloreernisse","updated_at":"2026-02-12T16:31:35.489138Z","compaction_level":0,"original_size":0,"labels":["cli-imp","intelligence","search"],"dependencies":[{"issue_id":"bd-8con","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-8con","depends_on_id":"bd-1ksf","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} +{"id":"bd-8con","title":"lore related: semantic similarity discovery","description":"## Background\nGiven any entity or free text, find semantically related entities using vector embeddings. No other GitLab tool does this — glab, GitLab Advanced Search, and even paid tiers are keyword-only. This finds conceptual connections humans miss.\n\n## Current Infrastructure (Verified 2026-02-12)\n- sqlite-vec extension loaded via sqlite3_vec_init in src/core/db.rs:84\n- Embeddings stored in: embedding_metadata table (chunk info) + vec0 virtual table named `embeddings` (vectors)\n- Migration 009 creates embedding infrastructure\n- search_vector() at src/search/vector.rs:43 — works with sqlite-vec KNN queries\n- OllamaClient::embed_batch() at src/embedding/ollama.rs:103 — batch embedding\n- Model: nomic-embed-text, 768 dimensions, context_length=2048 tokens (~1500 bytes)\n- 61K documents in DB, embedding coverage TBD\n\n### sqlite-vec Distance Metric\nThe `embeddings` virtual table is `vec0(embedding float[768])`. sqlite-vec's MATCH query returns L2 (Euclidean) distance by default. Lower distance = more similar. The `search_vector()` function returns `VectorResult { document_id: i64, distance: f64 }`.\n\n## Approach\n\n### Entity Mode: lore related issues N\n1. Look up document for issue N:\n```sql\nSELECT d.id, d.content_text\nFROM documents d\nJOIN issues i ON d.source_type = 'issue' AND d.source_id = i.id\nWHERE i.iid = ?1 AND i.project_id = (SELECT id FROM projects WHERE ...)\n```\nNOTE: `documents.source_id` is the internal DB id from the source table (issues.id), NOT the GitLab IID. See migration 007 comment: `source_id INTEGER NOT NULL -- local DB id in the source table`.\n\n2. Get its embedding: Look up via embedding_metadata which maps document_id -> rowid in the vec0 table:\n```sql\nSELECT em.rowid\nFROM embedding_metadata em\nWHERE em.document_id = ?1\nLIMIT 1 -- use first chunk's embedding as representative\n```\nThen extract the embedding vector from the vec0 table to use as the KNN query.\n\nAlternatively, embed the document's content_text on-the-fly via OllamaClient (simpler, more robust):\n```rust\nlet embedding = client.embed_batch(&[&doc.content_text]).await?[0].clone();\n```\n\n3. Call search_vector(conn, &embedding, limit * 2) for KNN — multiply limit to have room after filtering self\n4. Exclude self (filter out source document_id from results)\n5. Hydrate results: join documents -> issues/mrs/discussions for title, url, labels, author\n6. Compute shared_labels: parse `documents.label_names` (JSON array string) for both source and each result, intersect\n7. Return ranked list\n\n### Query Mode: lore related 'free text'\n1. Embed query via OllamaClient::embed_batch(&[query_text])\n2. Call search_vector(conn, &query_embedding, limit)\n3. Hydrate and return (same as entity mode minus self-exclusion)\n\n### Key Design Decision\nThis is intentionally SIMPLER than hybrid search. No FTS, no RRF. Pure vector similarity. The point is conceptual relatedness, not keyword matching.\n\n### Distance to Similarity Score Conversion\nsqlite-vec returns L2 (Euclidean) distance. Convert to 0-1 similarity:\n```rust\n/// Convert L2 distance to a 0-1 similarity score.\n/// Uses inverse relationship: closer (lower distance) = higher similarity.\n/// The +1 prevents division by zero and ensures score is in (0, 1].\nfn distance_to_similarity(distance: f64) -> f64 {\n 1.0 / (1.0 + distance)\n}\n```\nFor normalized embeddings (which nomic-embed-text produces), L2 distance ranges roughly 0-2. This formula maps:\n- distance 0.0 -> similarity 1.0 (identical)\n- distance 1.0 -> similarity 0.5\n- distance 2.0 -> similarity 0.33\n\n### Label Extraction for shared_labels\n```rust\nfn parse_label_names(label_names_json: &Option) -> HashSet {\n label_names_json\n .as_deref()\n .and_then(|s| serde_json::from_str::>(s).ok())\n .unwrap_or_default()\n .into_iter()\n .collect()\n}\n\nlet source_labels = parse_label_names(&source_doc.label_names);\nlet result_labels = parse_label_names(&result_doc.label_names);\nlet shared: Vec = source_labels.intersection(&result_labels).cloned().collect();\n```\n\n## Function Signatures\n\n```rust\n// New: src/cli/commands/related.rs\npub struct RelatedArgs {\n pub entity_type: Option, // \"issues\" or \"mrs\"\n pub entity_iid: Option,\n pub query: Option, // free text mode\n pub project: Option,\n pub limit: Option,\n}\n\npub async fn run_related(\n config: &Config,\n args: RelatedArgs,\n) -> Result\n\n// Reuse from src/search/vector.rs:43\npub fn search_vector(\n conn: &Connection,\n query_embedding: &[f32],\n limit: usize,\n) -> Result>\n// VectorResult { document_id: i64, distance: f64 }\n\n// Reuse from src/embedding/ollama.rs:103\npub async fn embed_batch(&self, texts: &[&str]) -> Result>>\n```\n\n## Robot Mode Output Schema\n```json\n{\n \"ok\": true,\n \"data\": {\n \"source\": { \"type\": \"issue\", \"iid\": 3864, \"title\": \"...\" },\n \"query\": \"switch throw time...\",\n \"results\": [{\n \"source_type\": \"issue\",\n \"iid\": 3800,\n \"title\": \"Rail Break Card\",\n \"url\": \"...\",\n \"similarity_score\": 0.87,\n \"shared_labels\": [\"customer:BNSF\"],\n \"shared_authors\": [],\n \"project_path\": \"vs/typescript-code\"\n }]\n },\n \"meta\": { \"elapsed_ms\": 42, \"mode\": \"entity\", \"embedding_dims\": 768, \"distance_metric\": \"l2\" }\n}\n```\n\n## Clap Registration\n```rust\n// In src/main.rs Commands enum, add:\nRelated {\n /// Entity type (\"issues\" or \"mrs\") or free text query\n query_or_type: String,\n /// Entity IID (when first arg is entity type)\n iid: Option,\n /// Maximum results\n #[arg(short = 'n', long, default_value = \"10\")]\n limit: usize,\n /// Scope to project (fuzzy match)\n #[arg(short, long)]\n project: Option,\n},\n```\n\n## TDD Loop\nRED: Tests in src/cli/commands/related.rs:\n- test_related_entity_excludes_self: insert doc + embedding for issue, query related, assert source doc not in results\n- test_related_shared_labels: insert 2 docs with overlapping labels (JSON in label_names), assert shared_labels computed correctly\n- test_related_empty_embeddings: no embeddings in DB, assert exit code 14 with helpful error\n- test_related_query_mode: embed free text via mock, assert results returned\n- test_related_similarity_score_range: all scores between 0.0 and 1.0\n- test_distance_to_similarity: unit test the conversion function (0.0->1.0, 1.0->0.5, large->~0.0)\n\nGREEN: Implement related command using search_vector + hydration\n\nVERIFY:\n```bash\ncargo test related:: && cargo clippy --all-targets -- -D warnings\ncargo run --release -- -J related issues 3864 -n 5 | jq '.data.results[0].similarity_score'\n```\n\n## Acceptance Criteria\n- [ ] lore related issues N returns top-K semantically similar entities\n- [ ] lore related mrs N works for merge requests\n- [ ] lore related 'free text' works as concept search (requires Ollama)\n- [ ] Results exclude the input entity itself\n- [ ] similarity_score is 0-1 range (higher = more similar), converted from L2 distance\n- [ ] Robot mode includes shared_labels (from documents.label_names JSON), shared_authors per result\n- [ ] Human mode shows ranked list with titles, scores, common labels\n- [ ] No embeddings in DB: exit code 14 with message \"Run 'lore embed' first\"\n- [ ] Ollama unavailable (query mode only): exit code 14 with suggestion\n- [ ] Performance: <1s for 61K documents\n- [ ] Command registered in main.rs and robot-docs\n\n## Edge Cases\n- Entity has no embedding (added after last lore embed): embed its content_text on-the-fly via OllamaClient, or exit 14 if Ollama unavailable\n- All results have very low similarity (<0.3): include warning \"No strongly related entities found\"\n- Entity is a discussion (not issue/MR): should still work (documents table has discussion docs)\n- Multiple documents per entity (discussion docs): use the entity-level document, not discussion subdocs\n- Free text query very short (1-2 words): may produce noisy results, add warning\n- Entity not found in DB: exit code 17 with suggestion to sync\n- Ambiguous project: exit code 18 with suggestion to use -p flag\n- documents.label_names may be NULL or invalid JSON — parse_label_names handles both gracefully\n\n## Dependency Context\n- **bd-1ksf (hybrid search)**: BLOCKER. Shares OllamaClient infrastructure. Also ensures async search.rs patterns are established. Related reuses the same vector search infrastructure.\n\n## Files to Create/Modify\n- NEW: src/cli/commands/related.rs\n- src/cli/commands/mod.rs (add pub mod related; re-export)\n- src/main.rs (register Related subcommand in Commands enum, add handle_related fn)\n- Reuse: search_vector() from src/search/vector.rs, OllamaClient from src/embedding/ollama.rs","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-02-12T15:46:58.665923Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:56:03.704646Z","closed_at":"2026-02-19T13:56:03.704592Z","close_reason":"Implemented lore related command with entity and query modes, vector similarity search, hydration, human + robot output, 11 unit tests. All quality gates pass.","compaction_level":0,"original_size":0,"labels":["cli-imp","intelligence","search"],"dependencies":[{"issue_id":"bd-8con","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-8con","depends_on_id":"bd-1ksf","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} {"id":"bd-8t4","title":"Extract cross-references from resource_state_events","description":"## Background\nresource_state_events includes source_merge_request (with iid) for 'closed by MR' events. After state events are stored (Gate 1), post-processing extracts these into entity_references for the cross-reference graph.\n\n## Approach\nCreate src/core/references.rs (new module) or add to events_db.rs:\n\n```rust\n/// Extract cross-references from stored state events and insert into entity_references.\n/// Looks for state events with source_merge_request_id IS NOT NULL (meaning \"closed by MR\").\n/// \n/// Directionality: source = MR (that caused the close), target = issue (that was closed)\npub fn extract_refs_from_state_events(\n conn: &Connection,\n project_id: i64,\n) -> Result // returns count of new references inserted\n```\n\nSQL logic:\n```sql\nINSERT OR IGNORE INTO entity_references (\n source_entity_type, source_entity_id,\n target_entity_type, target_entity_id,\n reference_type, source_method, created_at\n)\nSELECT\n 'merge_request',\n mr.id,\n 'issue',\n rse.issue_id,\n 'closes',\n 'api_state_event',\n rse.created_at\nFROM resource_state_events rse\nJOIN merge_requests mr ON mr.project_id = rse.project_id AND mr.iid = rse.source_merge_request_id\nWHERE rse.source_merge_request_id IS NOT NULL\n AND rse.issue_id IS NOT NULL\n AND rse.project_id = ?1;\n```\n\nKey: source_merge_request_id stores the MR iid, so we JOIN on merge_requests.iid to get the local DB id.\n\nRegister in src/core/mod.rs: `pub mod references;`\n\nCall this after drain_dependent_queue in the sync pipeline (after all state events are stored).\n\n## Acceptance Criteria\n- [ ] State events with source_merge_request_id produce 'closes' references\n- [ ] Source = MR (resolved by iid), target = issue\n- [ ] source_method = 'api_state_event'\n- [ ] INSERT OR IGNORE prevents duplicates with api_closes_issues data\n- [ ] Returns count of newly inserted references\n- [ ] No-op when no state events have source_merge_request_id\n\n## Files\n- src/core/references.rs (new)\n- src/core/mod.rs (add `pub mod references;`)\n- src/cli/commands/sync.rs (call after drain step)\n\n## TDD Loop\nRED: tests/references_tests.rs:\n- `test_extract_refs_from_state_events_basic` - seed a \"closed\" state event with source_merge_request_id, verify entity_reference created\n- `test_extract_refs_dedup_with_closes_issues` - insert ref from closes_issues API first, verify state event extraction doesn't duplicate\n- `test_extract_refs_no_source_mr` - state events without source_merge_request_id produce no refs\n\nSetup: create_test_db with migrations 001-011, seed project + issue + MR + state events.\n\nGREEN: Implement extract_refs_from_state_events\n\nVERIFY: `cargo test references -- --nocapture`\n\n## Edge Cases\n- source_merge_request_id may reference an MR not synced locally (cross-project close) — the JOIN will produce no match, which is correct behavior (ref simply not created)\n- Multiple state events can reference the same MR for the same issue (reopen + re-close) — INSERT OR IGNORE handles dedup\n- The merge_requests table might not have the MR yet if sync is still running — call this after all dependent fetches complete","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:32:33.619606Z","created_by":"tayloreernisse","updated_at":"2026-02-04T20:13:28.219791Z","closed_at":"2026-02-04T20:13:28.219633Z","compaction_level":0,"original_size":0,"labels":["extraction","gate-2","phase-b"],"dependencies":[{"issue_id":"bd-8t4","depends_on_id":"bd-1ep","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-8t4","depends_on_id":"bd-1se","type":"parent-child","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-8t4","depends_on_id":"bd-hu3","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} {"id":"bd-91j1","title":"Comprehensive robot-docs as agent bootstrap","description":"## Background\nAgents reach for glab because they already know it from training data. lore robot-docs exists but is not comprehensive enough to serve as a zero-training bootstrap. An agent encountering lore for the first time should be able to use any command correctly after reading robot-docs output alone.\n\n## Current State (Verified 2026-02-12)\n- `handle_robot_docs()` at src/main.rs:2069\n- Called at no-args in robot mode (main.rs:165) and via Commands::RobotDocs { brief } (main.rs:229)\n- Current output top-level keys: name, version, description, activation, commands, aliases, exit_codes, clap_error_codes, error_format, workflows\n- Missing: response_schema per command, example_output per command, quick_start section, glab equivalence table\n- --brief flag exists but returns shorter version of same structure\n- main.rs is 2579 lines total\n\n## Current robot-docs Output Structure\n```json\n{\n \"name\": \"lore\",\n \"version\": \"0.6.1\",\n \"description\": \"...\",\n \"activation\": { \"flags\": [\"--robot\", \"-J\"], \"env\": \"LORE_ROBOT=1\", \"auto_detect\": \"non-TTY\" },\n \"commands\": [{ \"name\": \"...\", \"description\": \"...\", \"flags\": [...], \"example\": \"...\" }],\n \"aliases\": { ... },\n \"exit_codes\": { ... },\n \"clap_error_codes\": { ... },\n \"error_format\": { ... },\n \"workflows\": { ... }\n}\n```\n\n## Approach\n\n### 1. Add quick_start section\nTop-level key with glab-to-lore translation and lore-exclusive feature summary:\n```json\n\"quick_start\": {\n \"glab_equivalents\": [\n { \"glab\": \"glab issue list\", \"lore\": \"lore -J issues -n 50\", \"note\": \"Richer: includes labels, status, closing MRs\" },\n { \"glab\": \"glab issue view 123\", \"lore\": \"lore -J issues 123\", \"note\": \"Includes discussions, work-item status\" },\n { \"glab\": \"glab mr list\", \"lore\": \"lore -J mrs\", \"note\": \"Includes draft status, reviewers\" },\n { \"glab\": \"glab mr view 456\", \"lore\": \"lore -J mrs 456\", \"note\": \"Includes discussions, file changes\" },\n { \"glab\": \"glab api '/projects/:id/issues'\", \"lore\": \"lore -J issues -p project\", \"note\": \"Fuzzy project matching\" }\n ],\n \"lore_exclusive\": [\n \"search: FTS5 + vector hybrid search across all entities\",\n \"who: Expert/workload/reviews analysis per file path or person\",\n \"timeline: Chronological event reconstruction across entities\",\n \"stats: Database statistics with document/note/discussion counts\",\n \"count: Entity counts with state breakdowns\"\n ]\n}\n```\n\n### 2. Add response_schema per command\nFor each command in the commands array, add a `response_schema` field showing the JSON shape:\n```json\n{\n \"name\": \"issues\",\n \"response_schema\": {\n \"ok\": \"boolean\",\n \"data\": { \"type\": \"array|object\", \"fields\": [\"iid\", \"title\", \"state\", \"...\"] },\n \"meta\": { \"elapsed_ms\": \"integer\" }\n }\n}\n```\nCommands with multiple output shapes (list vs detail) need both documented.\n\n### 3. Add example_output per command\nRealistic truncated JSON for each command. Keep each example under 500 bytes.\n\n### 4. Token budget enforcement\n- --brief mode: ONLY quick_start + command names + invocation syntax. Target <4000 tokens (~16000 bytes).\n- Full mode: everything. Target <12000 tokens (~48000 bytes).\n- Measure with: `cargo run --release -- --robot robot-docs --brief | wc -c`\n\n## TDD Loop\nRED: Tests in src/main.rs or new src/cli/commands/robot_docs.rs:\n- test_robot_docs_has_quick_start: parse output JSON, assert quick_start.glab_equivalents array has >= 5 entries\n- test_robot_docs_brief_size: --brief output < 16000 bytes\n- test_robot_docs_full_size: full output < 48000 bytes\n- test_robot_docs_has_response_schemas: every command entry has response_schema key\n- test_robot_docs_commands_complete: assert all registered commands appear (issues, mrs, search, who, timeline, count, stats, sync, embed, doctor, health, ingest, generate-docs, show)\n\nGREEN: Add quick_start, response_schema, example_output to robot-docs output\n\nVERIFY:\n```bash\ncargo test robot_docs && cargo clippy --all-targets -- -D warnings\ncargo run --release -- --robot robot-docs | jq '.quick_start.glab_equivalents | length'\n# Should return >= 5\ncargo run --release -- --robot robot-docs --brief | wc -c\n# Should be < 16000\n```\n\n## Acceptance Criteria\n- [ ] robot-docs JSON has quick_start.glab_equivalents array with >= 5 entries\n- [ ] robot-docs JSON has quick_start.lore_exclusive array\n- [ ] Every command entry has response_schema showing the JSON shape\n- [ ] Every command entry has example_output with realistic truncated data\n- [ ] --brief output is under 16000 bytes (~4000 tokens)\n- [ ] Full output is under 48000 bytes (~12000 tokens)\n- [ ] An agent reading ONLY robot-docs can correctly invoke any lore command\n- [ ] cargo test passes with new robot_docs tests\n\n## Edge Cases\n- Commands with multiple output shapes (e.g., issues list vs issues detail via iid) need both schemas documented\n- --fields flag changes output shape -- document the effect in the response_schema\n- robot-docs output must be stable across versions (agents may cache it)\n- Version field should match Cargo.toml version\n\n## Files to Modify\n- src/main.rs fn handle_robot_docs() (~line 2069) — add quick_start section, response_schema, example_output\n- Consider extracting to src/cli/commands/robot_docs.rs if the function exceeds 200 lines","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-12T15:44:40.495479Z","created_by":"tayloreernisse","updated_at":"2026-02-12T16:49:01.043915Z","closed_at":"2026-02-12T16:49:01.043832Z","close_reason":"Robot-docs enhanced with quick_start (glab equivalents, lore exclusives, read/write split) and example_output for issues/mrs/search/who","compaction_level":0,"original_size":0,"labels":["cli","cli-imp","robot-mode"],"dependencies":[{"issue_id":"bd-91j1","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} {"id":"bd-9av","title":"[CP1] gi sync-status enhancement","description":"Enhance sync-status from CP0 stub to show issue cursors.\n\n## Changes to src/cli/commands/sync_status.rs\n\nUpdate the existing stub to show:\n- Last run timestamp and duration\n- Cursor positions per project (issues resource_type)\n- Entity counts (issues, discussions, notes)\n\n## Output Format\nLast sync: 2026-01-25 10:30:00 (succeeded, 45s)\n\nCursors:\n group/project-one\n issues: 2026-01-25T10:25:00Z (gitlab_id: 12345678)\n\nCounts:\n Issues: 1,234\n Discussions: 5,678\n Notes: 23,456 (4,567 system)\n\nFiles: src/cli/commands/sync_status.rs\nDone when: Shows cursor positions and counts after ingestion","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:58:27.246825Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.968507Z","closed_at":"2026-01-25T17:02:01.968507Z","deleted_at":"2026-01-25T17:02:01.968503Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} -{"id":"bd-9dd","title":"Implement 'lore trace' command with human and robot output","description":"## Background\n\nThe trace command is Gate 5's capstone CLI. It answers 'Why was this code introduced?' by building file -> MR -> issue -> discussion chains.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 5.3.\n\n## Codebase Context\n\n- CLI pattern: same as file-history (Commands enum, handler in main.rs)\n- trace.rs (bd-2n4): run_trace() returns TraceResult with chains\n- Path parsing: support 'src/foo.rs:45' syntax (line number for future Tier 2)\n- merge_requests.merged_at exists (migration 006) — use COALESCE(merged_at, updated_at) for ordering\n\n## Approach\n\n### 1. TraceArgs (`src/cli/mod.rs`):\n```rust\n#[derive(Parser)]\npub struct TraceArgs {\n pub path: String, // supports :line suffix\n #[arg(short = 'p', long)] pub project: Option,\n #[arg(long)] pub discussions: bool,\n #[arg(long = \"no-follow-renames\")] pub no_follow_renames: bool,\n #[arg(short = 'n', long = \"limit\", default_value = \"20\")] pub limit: usize,\n}\n```\n\n### 2. Path parsing:\n```rust\nfn parse_trace_path(input: &str) -> (String, Option) {\n if let Some((path, line)) = input.rsplit_once(':') {\n if let Ok(n) = line.parse::() { return (path.to_string(), Some(n)); }\n }\n (input.to_string(), None)\n}\n```\nIf line present: warn 'Line-level tracing requires Tier 2. Showing file-level results.'\n\n### 3. Human output shows chains with MR -> issue -> discussion context\n\n### 4. Robot JSON:\n```json\n{\"ok\": true, \"data\": {\"path\": \"...\", \"resolved_paths\": [...], \"trace_chains\": [...]}, \"meta\": {\"tier\": \"api_only\", \"line_requested\": null}}\n```\n\n## Acceptance Criteria\n\n- [ ] `lore trace src/foo.rs` with human output\n- [ ] `lore --robot trace src/foo.rs` with JSON\n- [ ] :line suffix parses and emits Tier 2 warning\n- [ ] -p, --discussions, --no-follow-renames, -n all work\n- [ ] Rename-aware via resolve_rename_chain\n- [ ] meta.tier = 'api_only'\n- [ ] Added to VALID_COMMANDS and robot-docs\n- [ ] `cargo check --all-targets` passes\n\n## Files\n\n- `src/cli/mod.rs` (TraceArgs + Commands::Trace)\n- `src/cli/commands/trace.rs` (NEW)\n- `src/cli/commands/mod.rs` (re-export)\n- `src/main.rs` (handler + VALID_COMMANDS + robot-docs)\n\n## TDD Loop\n\nRED:\n- `test_parse_trace_path_simple` - \"src/foo.rs\" -> (path, None)\n- `test_parse_trace_path_with_line` - \"src/foo.rs:42\" -> (path, Some(42))\n- `test_parse_trace_path_windows` - \"C:/foo.rs\" -> (path, None) — don't misparse drive letter\n\nGREEN: Implement CLI wiring and handlers.\n\nVERIFY: `cargo check --all-targets`\n\n## Edge Cases\n\n- Windows paths: don't misparse C: as line number\n- No MR data: friendly message with suggestion to sync\n- Very deep rename chain: bounded by resolve_rename_chain","status":"in_progress","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:32.788530Z","created_by":"tayloreernisse","updated_at":"2026-02-17T19:08:40.322237Z","compaction_level":0,"original_size":0,"labels":["cli","gate-5","phase-b"],"dependencies":[{"issue_id":"bd-9dd","depends_on_id":"bd-1ht","type":"parent-child","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-9dd","depends_on_id":"bd-2n4","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} +{"id":"bd-9dd","title":"Implement 'lore trace' command with human and robot output","description":"## Background\n\nThe trace command is Gate 5's capstone CLI. It answers 'Why was this code introduced?' by building file -> MR -> issue -> discussion chains.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 5.3.\n\n## Codebase Context\n\n- CLI pattern: same as file-history (Commands enum, handler in main.rs)\n- trace.rs (bd-2n4): run_trace() returns TraceResult with chains\n- Path parsing: support 'src/foo.rs:45' syntax (line number for future Tier 2)\n- merge_requests.merged_at exists (migration 006) — use COALESCE(merged_at, updated_at) for ordering\n\n## Approach\n\n### 1. TraceArgs (`src/cli/mod.rs`):\n```rust\n#[derive(Parser)]\npub struct TraceArgs {\n pub path: String, // supports :line suffix\n #[arg(short = 'p', long)] pub project: Option,\n #[arg(long)] pub discussions: bool,\n #[arg(long = \"no-follow-renames\")] pub no_follow_renames: bool,\n #[arg(short = 'n', long = \"limit\", default_value = \"20\")] pub limit: usize,\n}\n```\n\n### 2. Path parsing:\n```rust\nfn parse_trace_path(input: &str) -> (String, Option) {\n if let Some((path, line)) = input.rsplit_once(':') {\n if let Ok(n) = line.parse::() { return (path.to_string(), Some(n)); }\n }\n (input.to_string(), None)\n}\n```\nIf line present: warn 'Line-level tracing requires Tier 2. Showing file-level results.'\n\n### 3. Human output shows chains with MR -> issue -> discussion context\n\n### 4. Robot JSON:\n```json\n{\"ok\": true, \"data\": {\"path\": \"...\", \"resolved_paths\": [...], \"trace_chains\": [...]}, \"meta\": {\"tier\": \"api_only\", \"line_requested\": null}}\n```\n\n## Acceptance Criteria\n\n- [ ] `lore trace src/foo.rs` with human output\n- [ ] `lore --robot trace src/foo.rs` with JSON\n- [ ] :line suffix parses and emits Tier 2 warning\n- [ ] -p, --discussions, --no-follow-renames, -n all work\n- [ ] Rename-aware via resolve_rename_chain\n- [ ] meta.tier = 'api_only'\n- [ ] Added to VALID_COMMANDS and robot-docs\n- [ ] `cargo check --all-targets` passes\n\n## Files\n\n- `src/cli/mod.rs` (TraceArgs + Commands::Trace)\n- `src/cli/commands/trace.rs` (NEW)\n- `src/cli/commands/mod.rs` (re-export)\n- `src/main.rs` (handler + VALID_COMMANDS + robot-docs)\n\n## TDD Loop\n\nRED:\n- `test_parse_trace_path_simple` - \"src/foo.rs\" -> (path, None)\n- `test_parse_trace_path_with_line` - \"src/foo.rs:42\" -> (path, Some(42))\n- `test_parse_trace_path_windows` - \"C:/foo.rs\" -> (path, None) — don't misparse drive letter\n\nGREEN: Implement CLI wiring and handlers.\n\nVERIFY: `cargo check --all-targets`\n\n## Edge Cases\n\n- Windows paths: don't misparse C: as line number\n- No MR data: friendly message with suggestion to sync\n- Very deep rename chain: bounded by resolve_rename_chain","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:32.788530Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:46:36.973805Z","closed_at":"2026-02-19T13:46:36.973541Z","close_reason":"Implementation complete: trace CLI command with human + robot output, all tests passing","compaction_level":0,"original_size":0,"labels":["cli","gate-5","phase-b"],"dependencies":[{"issue_id":"bd-9dd","depends_on_id":"bd-1ht","type":"parent-child","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-9dd","depends_on_id":"bd-2n4","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} {"id":"bd-9lbr","title":"lore explain: auto-generate issue/MR narrative","description":"## Background\nGiven an issue or MR, auto-generate a structured narrative of what happened: who was involved, what decisions were made, what changed, and what is unresolved. Template-based v1 (no LLM dependency), deterministic and reproducible.\n\n## Current Infrastructure (Verified 2026-02-12)\n- show.rs: IssueDetail (line 69) and MrDetail (line 14) — entity detail with discussions\n- timeline.rs: 5-stage pipeline SHIPPED — chronological event reconstruction\n- notes table: 282K rows with body, author, created_at, is_system, discussion_id\n- discussions table: links notes to parent entity (noteable_type, noteable_id), has resolved flag\n- resource_state_events table: state changes with created_at, user_username (src/core/events_db.rs)\n- resource_label_events table: label add/remove with created_at, user_username\n- entity_references table (src/core/references.rs): cross-references between entities (closing MRs, related issues). Column names: `source_entity_type`, `source_entity_id`, `target_entity_type`, `target_entity_id`, `target_project_path`, `target_entity_iid`, `reference_type`, `source_method`\n\n## Approach\nNew command: `lore explain issues N` / `lore explain mrs N`\n\n### Data Assembly (reuse existing internals as library calls)\n1. Entity detail: reuse show.rs query logic for IssueDetail/MrDetail\n2. Timeline events: reuse timeline pipeline with entity-scoped seed\n3. Discussion notes:\n```sql\nSELECT n.id, n.body, n.author_username, n.created_at\nFROM notes n\nJOIN discussions d ON n.discussion_id = d.id\nWHERE d.noteable_type = ? AND d.noteable_id = ?\n AND n.is_system = 0\nORDER BY n.created_at\n```\n4. Cross-references:\n```sql\nSELECT target_entity_type, target_entity_id, target_project_path,\n target_entity_iid, reference_type, source_method\nFROM entity_references\nWHERE (source_entity_type = ?1 AND source_entity_id = ?2)\nUNION ALL\nSELECT source_entity_type, source_entity_id, NULL,\n NULL, reference_type, source_method\nFROM entity_references\nWHERE (target_entity_type = ?1 AND target_entity_id = ?2)\n```\n\n### Key Decisions Heuristic\nNotes from assignees/author that follow state or label changes within 1 hour:\n```rust\nstruct StateOrLabelEvent {\n created_at: i64, // ms epoch\n user: String,\n description: String, // e.g. \"state: opened -> closed\" or \"label: +bug\"\n}\n\nfn extract_key_decisions(\n state_events: &[ResourceStateEvent],\n label_events: &[ResourceLabelEvent],\n notes: &[Note],\n) -> Vec {\n // Merge both event types into a unified chronological list\n let mut events: Vec = Vec::new();\n for e in state_events {\n events.push(StateOrLabelEvent {\n created_at: e.created_at,\n user: e.user_username.clone(),\n description: format!(\"state: {} -> {}\", e.from_state.as_deref().unwrap_or(\"?\"), e.to_state),\n });\n }\n for e in label_events {\n let action = if e.action == \"add\" { \"+\" } else { \"-\" };\n events.push(StateOrLabelEvent {\n created_at: e.created_at,\n user: e.user_username.clone(),\n description: format!(\"label: {}{}\", action, e.label_name.as_deref().unwrap_or(\"?\")),\n });\n }\n events.sort_by_key(|e| e.created_at);\n\n let mut decisions = Vec::new();\n let one_hour_ms: i64 = 60 * 60 * 1000;\n\n for event in &events {\n // Find notes by same actor within 60 min after the event\n for note in notes {\n if note.author_username == event.user\n && note.created_at >= event.created_at\n && note.created_at <= event.created_at + one_hour_ms\n {\n decisions.push(KeyDecision {\n timestamp: event.created_at,\n actor: event.user.clone(),\n action: event.description.clone(),\n context_note: truncate(¬e.body, 500),\n });\n break; // one note per event\n }\n }\n }\n decisions.truncate(10); // Cap at 10 key decisions\n decisions\n}\n```\n\n### Narrative Sections\n1. **Header**: title, author, opened date, state, assignees, labels, status_name\n2. **Description excerpt**: first 500 chars of description (or full if shorter)\n3. **Key decisions**: notes correlated with state/label changes (heuristic above)\n4. **Activity summary**: counts of state changes, label changes, notes, time range\n5. **Open threads**: discussions WHERE resolved = false\n6. **Related entities**: closing MRs (with state), related issues from entity_references\n7. **Timeline excerpt**: first 20 events from timeline pipeline\n\n## Robot Mode Output Schema\n```json\n{\n \"ok\": true,\n \"data\": {\n \"entity\": {\n \"type\": \"issue\", \"iid\": 3864, \"title\": \"...\", \"state\": \"opened\",\n \"author\": \"teernisse\", \"assignees\": [\"teernisse\"],\n \"labels\": [\"customer:BNSF\"], \"created_at\": \"...\", \"updated_at\": \"...\",\n \"url\": \"...\", \"status_name\": \"In progress\"\n },\n \"description_excerpt\": \"First 500 chars of description...\",\n \"key_decisions\": [{\n \"timestamp\": \"2026-01-15T...\",\n \"actor\": \"teernisse\",\n \"action\": \"state: opened -> in_progress\",\n \"context_note\": \"Starting work on the BNSF throw time integration...\"\n }],\n \"activity\": {\n \"state_changes\": 3, \"label_changes\": 5, \"notes\": 42,\n \"first_event\": \"2026-01-10T...\", \"last_event\": \"2026-02-12T...\"\n },\n \"open_threads\": [{\n \"discussion_id\": \"abc123\",\n \"started_by\": \"cseiber\",\n \"started_at\": \"2026-02-01T...\",\n \"note_count\": 5,\n \"last_note_at\": \"2026-02-10T...\"\n }],\n \"related\": {\n \"closing_mrs\": [{ \"iid\": 200, \"title\": \"...\", \"state\": \"merged\" }],\n \"related_issues\": [{ \"iid\": 3800, \"title\": \"Rail Break Card\", \"relation\": \"related\" }]\n },\n \"timeline_excerpt\": [{ \"timestamp\": \"...\", \"event_type\": \"...\", \"actor\": \"...\", \"summary\": \"...\" }]\n },\n \"meta\": { \"elapsed_ms\": 350 }\n}\n```\n\n## Clap Registration\n```rust\n// In src/main.rs Commands enum, add:\nExplain {\n /// Entity type: \"issues\" or \"mrs\"\n entity_type: String,\n /// Entity IID\n iid: i64,\n /// Scope to project (fuzzy match)\n #[arg(short, long)]\n project: Option,\n},\n```\n\n## TDD Loop\nRED: Tests in src/cli/commands/explain.rs:\n- test_explain_issue_basic: insert issue + notes + state events, run explain, assert all sections present (entity, description_excerpt, key_decisions, activity, open_threads, related, timeline_excerpt)\n- test_explain_key_decision_heuristic: insert state change event + note by same author within 30 min, assert note appears in key_decisions\n- test_explain_key_decision_ignores_unrelated_notes: insert note by different author, assert it does NOT appear in key_decisions\n- test_explain_open_threads: insert 2 discussions (1 resolved, 1 unresolved), assert only unresolved in open_threads\n- test_explain_no_notes: issue with zero notes produces header + description + empty sections\n- test_explain_mr: insert MR with merged_at, assert entity includes type=\"merge_request\"\n- test_explain_activity_counts: insert 3 state events + 2 label events + 10 notes, assert counts match\n\nGREEN: Implement explain command with section assembly\n\nVERIFY:\n```bash\ncargo test explain:: && cargo clippy --all-targets -- -D warnings\ncargo run --release -- -J explain issues 3864 | jq '.data | keys'\n# Should include: entity, description_excerpt, key_decisions, activity, open_threads, related, timeline_excerpt\n```\n\n## Acceptance Criteria\n- [ ] lore explain issues N produces structured output for any synced issue\n- [ ] lore explain mrs N produces structured output for any synced MR\n- [ ] Robot mode returns all 7 sections\n- [ ] Human mode renders readable narrative with headers and indentation\n- [ ] Key decisions heuristic: captures notes within 60 min of state/label changes by same actor\n- [ ] Works fully offline (no API calls, no LLM)\n- [ ] Performance: <500ms for issue with 50 notes\n- [ ] Command registered in main.rs and robot-docs\n- [ ] key_decisions capped at 10, timeline_excerpt capped at 20 events\n\n## Edge Cases\n- Issue with empty description: description_excerpt = \"(no description)\"\n- Issue with 500+ notes: timeline_excerpt capped at 20, key_decisions capped at 10\n- Issue not found in local DB: exit code 17 with suggestion to sync\n- Ambiguous project: exit code 18 with suggestion to use -p flag\n- MR with no review activity: activity section shows zeros\n- Cross-project references: show as unresolved with project path hint\n- Notes that are pure code blocks: include in key_decisions if correlated with events (they may contain implementation decisions)\n- ResourceStateEvent/ResourceLabelEvent field names: check src/core/events_db.rs for exact struct definitions before implementing\n\n## Dependency Context\n- **bd-2g50 (data gaps)**: BLOCKER. Provides `closed_at` field on IssueDetail for the header section. Without it, explain can still show state=\"closed\" but won't have the exact close timestamp.\n\n## Files to Create/Modify\n- NEW: src/cli/commands/explain.rs\n- src/cli/commands/mod.rs (add pub mod explain; re-export)\n- src/main.rs (register Explain subcommand in Commands enum, add handle_explain fn)\n- Reuse: show.rs queries, timeline pipeline, notes/discussions/resource_events queries from src/core/events_db.rs","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-12T15:46:41.386454Z","created_by":"tayloreernisse","updated_at":"2026-02-12T16:31:34.538422Z","compaction_level":0,"original_size":0,"labels":["cli-imp","intelligence"],"dependencies":[{"issue_id":"bd-9lbr","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-9lbr","depends_on_id":"bd-2g50","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} {"id":"bd-9wl5","title":"NOTE-2G: Parent metadata change propagation to note documents","description":"## Background\nNote documents inherit labels and title from parent issue/MR. When parent metadata changes, note documents become stale. The existing pipeline already marks discussion documents dirty on parent changes — note documents need the same treatment.\n\n## Approach\nFind where ingestion detects parent entity changes and marks discussion documents dirty. The dirty marking for discussions happens in:\n- src/ingestion/discussions.rs line 127: mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)\n- src/ingestion/mr_discussions.rs line 162 and 362: mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)\n\nThese fire when a discussion is upserted (which happens when parent entity is re-ingested). For note documents, we need to additionally mark all non-system notes of that discussion as dirty:\n\nAfter each mark_dirty_tx for Discussion, add:\n // Mark child note documents dirty (they inherit parent metadata)\n let note_ids: Vec = tx.prepare(\"SELECT id FROM notes WHERE discussion_id = ? AND is_system = 0\")?\n .query_map([local_discussion_id], |r| r.get(0))?\n .collect::, _>>()?;\n for note_id in note_ids {\n dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, note_id)?;\n }\n\nAlternative (more efficient, set-based):\n INSERT INTO dirty_sources (source_type, source_id, queued_at)\n SELECT 'note', n.id, ?1\n FROM notes n\n WHERE n.discussion_id = ?2 AND n.is_system = 0\n ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0\n\nUse the set-based approach for better performance with large discussions.\n\n## Files\n- MODIFY: src/ingestion/discussions.rs (add note dirty marking after line 127)\n- MODIFY: src/ingestion/mr_discussions.rs (add note dirty marking after lines 162 and 362)\n\n## TDD Anchor\nRED: test_parent_title_change_marks_notes_dirty — change issue title, re-ingest discussions, assert note documents appear in dirty_sources.\nGREEN: Add set-based INSERT INTO dirty_sources after discussion dirty marking.\nVERIFY: cargo test parent_title_change_marks_notes -- --nocapture\nTests: test_parent_label_change_marks_notes_dirty (modify issue labels, re-ingest, check dirty queue)\n\n## Acceptance Criteria\n- [ ] Discussion upsert for issue marks child non-system note documents dirty\n- [ ] Discussion upsert for MR marks child non-system note documents dirty (both call sites)\n- [ ] Only non-system notes marked dirty (is_system = 0 filter)\n- [ ] Set-based SQL (not per-note loop) for performance\n- [ ] Both tests pass\n\n## Dependency Context\n- Depends on NOTE-2D (bd-2ezb): dirty tracking infrastructure for notes must exist (dirty_sources accepts source_type='note', regenerator handles it)\n\n## Edge Cases\n- Discussion with 0 non-system notes: set-based INSERT is a no-op\n- Discussion with 100+ notes: set-based approach handles efficiently in one SQL statement\n- Concurrent discussion ingestion: ON CONFLICT DO UPDATE handles race safely","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T17:02:40.292874Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:15.717576Z","closed_at":"2026-02-12T18:13:15.717528Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"]} {"id":"bd-a6yb","title":"Implement responsive breakpoints for all TUI screens","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-19T04:52:55.561576Z","created_by":"tayloreernisse","updated_at":"2026-02-19T05:10:12.531731Z","closed_at":"2026-02-19T05:10:12.531557Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-a6yb","depends_on_id":"bd-3t6r","type":"blocks","created_at":"2026-02-19T04:53:02.566163Z","created_by":"tayloreernisse"}]} diff --git a/.beads/last-touched b/.beads/last-touched index 6a0b456..a646e0c 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -bd-2o49 +bd-8con diff --git a/crates/lore-tui/src/app/tests.rs b/crates/lore-tui/src/app/tests.rs index 6fa8016..b75fb66 100644 --- a/crates/lore-tui/src/app/tests.rs +++ b/crates/lore-tui/src/app/tests.rs @@ -377,3 +377,120 @@ fn test_sync_completed_from_bootstrap_resets_navigation_and_state() { assert_eq!(app.navigation.depth(), 1); assert!(!app.state.bootstrap.sync_started); } + +#[test] +fn test_sync_completed_flushes_entity_caches() { + use crate::message::EntityKey; + use crate::state::issue_detail::{IssueDetailData, IssueMetadata}; + use crate::state::mr_detail::{MrDetailData, MrMetadata}; + use crate::state::{CachedIssuePayload, CachedMrPayload}; + use crate::view::common::cross_ref::CrossRef; + + let mut app = test_app(); + + // Populate caches with dummy data. + let issue_key = EntityKey::issue(1, 42); + app.state.issue_cache.put( + issue_key, + CachedIssuePayload { + data: IssueDetailData { + metadata: IssueMetadata { + iid: 42, + project_path: "g/p".into(), + title: "Test".into(), + description: String::new(), + state: "opened".into(), + author: "alice".into(), + assignees: vec![], + labels: vec![], + milestone: None, + due_date: None, + created_at: 0, + updated_at: 0, + web_url: String::new(), + discussion_count: 0, + }, + cross_refs: Vec::::new(), + }, + discussions: vec![], + }, + ); + + let mr_key = EntityKey::mr(1, 99); + app.state.mr_cache.put( + mr_key, + CachedMrPayload { + data: MrDetailData { + metadata: MrMetadata { + iid: 99, + project_path: "g/p".into(), + title: "MR".into(), + description: String::new(), + state: "opened".into(), + draft: false, + author: "bob".into(), + assignees: vec![], + reviewers: vec![], + labels: vec![], + source_branch: "feat".into(), + target_branch: "main".into(), + merge_status: String::new(), + created_at: 0, + updated_at: 0, + merged_at: None, + web_url: String::new(), + discussion_count: 0, + file_change_count: 0, + }, + cross_refs: Vec::::new(), + file_changes: vec![], + }, + discussions: vec![], + }, + ); + + assert_eq!(app.state.issue_cache.len(), 1); + assert_eq!(app.state.mr_cache.len(), 1); + + // Sync completes — caches should be flushed. + app.update(Msg::SyncCompleted { elapsed_ms: 500 }); + + assert!( + app.state.issue_cache.is_empty(), + "issue cache should be flushed after sync" + ); + assert!( + app.state.mr_cache.is_empty(), + "MR cache should be flushed after sync" + ); +} + +#[test] +fn test_sync_completed_refreshes_current_detail_view() { + use crate::message::EntityKey; + use crate::state::LoadState; + + let mut app = test_app(); + + // Navigate to an issue detail screen. + let key = EntityKey::issue(1, 42); + app.update(Msg::NavigateTo(Screen::IssueDetail(key))); + + // Simulate load completion so LoadState goes to Idle. + app.state.set_loading( + Screen::IssueDetail(EntityKey::issue(1, 42)), + LoadState::Idle, + ); + + // Sync completes while viewing the detail. + app.update(Msg::SyncCompleted { elapsed_ms: 300 }); + + // The detail screen should have been set to Refreshing. + assert_eq!( + *app.state + .load_state + .get(&Screen::IssueDetail(EntityKey::issue(1, 42))), + LoadState::Refreshing, + "detail view should refresh after sync" + ); +} diff --git a/crates/lore-tui/src/app/update.rs b/crates/lore-tui/src/app/update.rs index 2c553c6..d73eb02 100644 --- a/crates/lore-tui/src/app/update.rs +++ b/crates/lore-tui/src/app/update.rs @@ -542,6 +542,11 @@ impl LoreApp { Msg::SyncCompleted { elapsed_ms } => { self.state.sync.complete(elapsed_ms); + // Flush entity caches — sync may have updated any entity's + // metadata, discussions, or cross-refs in the DB. + self.state.issue_cache.clear(); + self.state.mr_cache.clear(); + // If we came from Bootstrap, replace nav history with Dashboard. if *self.navigation.current() == Screen::Bootstrap { self.state.bootstrap.sync_started = false; @@ -557,6 +562,19 @@ impl LoreApp { self.state.set_loading(dashboard.clone(), load_state); let _handle = self.supervisor.submit(TaskKey::LoadScreen(dashboard)); } + + // If currently on a detail view, refresh it so the user sees + // updated data without navigating away and back. + let current = self.navigation.current().clone(); + match ¤t { + Screen::IssueDetail(_) | Screen::MrDetail(_) => { + self.state + .set_loading(current.clone(), LoadState::Refreshing); + let _handle = self.supervisor.submit(TaskKey::LoadScreen(current)); + } + _ => {} + } + Cmd::none() } Msg::SyncCancelled => { diff --git a/migrations/028_surgical_sync_runs.sql b/migrations/028_surgical_sync_runs.sql new file mode 100644 index 0000000..b195adc --- /dev/null +++ b/migrations/028_surgical_sync_runs.sql @@ -0,0 +1,20 @@ +-- Migration 028: Extend sync_runs for surgical sync observability +-- Adds mode/phase tracking and surgical-specific counters. + +ALTER TABLE sync_runs ADD COLUMN mode TEXT; +ALTER TABLE sync_runs ADD COLUMN phase TEXT; +ALTER TABLE sync_runs ADD COLUMN surgical_iids_json TEXT; +ALTER TABLE sync_runs ADD COLUMN issues_fetched INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN mrs_fetched INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN issues_ingested INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN mrs_ingested INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN skipped_stale INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN docs_regenerated INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN docs_embedded INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN warnings_count INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN cancelled_at INTEGER; + +CREATE INDEX IF NOT EXISTS idx_sync_runs_mode_started + ON sync_runs(mode, started_at DESC); +CREATE INDEX IF NOT EXISTS idx_sync_runs_status_phase_started + ON sync_runs(status, phase, started_at DESC); diff --git a/src/cli/autocorrect.rs b/src/cli/autocorrect.rs index b851893..490d300 100644 --- a/src/cli/autocorrect.rs +++ b/src/cli/autocorrect.rs @@ -129,6 +129,10 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[ "--no-dry-run", "--timings", "--tui", + "--issue", + "--mr", + "--project", + "--preflight-only", ], ), ( diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index 7cfa18a..d5d7be7 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -8,11 +8,13 @@ pub mod generate_docs; pub mod ingest; pub mod init; pub mod list; +pub mod related; pub mod search; pub mod show; pub mod stats; pub mod sync; pub mod sync_status; +pub mod sync_surgical; pub mod timeline; pub mod trace; pub mod tui; @@ -39,6 +41,7 @@ pub use list::{ print_list_notes, print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl, query_issues, query_mrs, query_notes, run_list_issues, run_list_mrs, }; +pub use related::{print_related, print_related_json, run_related}; pub use search::{ SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search, }; @@ -49,6 +52,7 @@ pub use show::{ pub use stats::{print_stats, print_stats_json, run_stats}; pub use sync::{SyncOptions, SyncResult, print_sync, print_sync_json, run_sync}; pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status}; +pub use sync_surgical::run_sync_surgical; pub use timeline::{TimelineParams, print_timeline, print_timeline_json_with_meta, run_timeline}; pub use trace::{parse_trace_path, print_trace, print_trace_json}; pub use tui::{TuiArgs, find_lore_tui, run_tui}; diff --git a/src/cli/commands/related.rs b/src/cli/commands/related.rs new file mode 100644 index 0000000..bd87c08 --- /dev/null +++ b/src/cli/commands/related.rs @@ -0,0 +1,692 @@ +use std::collections::HashSet; + +use serde::Serialize; + +use crate::cli::render::{Icons, Theme}; +use crate::core::config::Config; +use crate::core::db::create_connection; +use crate::core::error::{LoreError, Result}; +use crate::core::paths::get_db_path; +use crate::core::project::resolve_project; +use crate::embedding::ollama::{OllamaClient, OllamaConfig}; +use crate::search::search_vector; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize)] +pub struct RelatedSource { + pub source_type: String, + pub iid: Option, + pub title: Option, +} + +#[derive(Debug, Serialize)] +pub struct RelatedResult { + pub source_type: String, + pub iid: i64, + pub title: String, + pub url: Option, + pub similarity_score: f64, + pub shared_labels: Vec, + pub project_path: Option, +} + +#[derive(Debug, Serialize)] +pub struct RelatedResponse { + pub source: RelatedSource, + pub query: Option, + pub results: Vec, + pub mode: String, +} + +// --------------------------------------------------------------------------- +// Pure helpers (unit-testable) +// --------------------------------------------------------------------------- + +/// Convert L2 distance to a 0-1 similarity score. +/// +/// Inverse relationship: closer (lower distance) = higher similarity. +/// The +1 prevents division by zero and ensures score is in (0, 1]. +fn distance_to_similarity(distance: f64) -> f64 { + 1.0 / (1.0 + distance) +} + +/// Parse the JSON `label_names` column into a set of labels. +fn parse_label_names(label_names_json: &Option) -> HashSet { + label_names_json + .as_deref() + .and_then(|s| serde_json::from_str::>(s).ok()) + .unwrap_or_default() + .into_iter() + .collect() +} + +// --------------------------------------------------------------------------- +// Internal row types +// --------------------------------------------------------------------------- + +struct DocRow { + id: i64, + content_text: String, + label_names: Option, + title: Option, +} + +struct HydratedDoc { + source_type: String, + iid: i64, + title: String, + url: Option, + label_names: Option, + project_path: Option, +} + +/// (source_type, source_id, label_names, url, project_id) +type DocMetaRow = (String, i64, Option, Option, i64); + +// --------------------------------------------------------------------------- +// Main entry point +// --------------------------------------------------------------------------- + +pub async fn run_related( + config: &Config, + entity_type: Option<&str>, + entity_iid: Option, + query_text: Option<&str>, + project: Option<&str>, + limit: usize, +) -> Result { + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + + // Check that embeddings exist at all. + let embedding_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM embedding_metadata WHERE last_error IS NULL", + [], + |row| row.get(0), + ) + .unwrap_or(0); + + if embedding_count == 0 { + return Err(LoreError::EmbeddingsNotBuilt); + } + + match (entity_type, entity_iid) { + (Some(etype), Some(iid)) => { + run_entity_mode(config, &conn, etype, iid, project, limit).await + } + _ => { + let text = query_text.unwrap_or(""); + if text.is_empty() { + return Err(LoreError::Other( + "Provide either an entity type + IID or a free-text query.".into(), + )); + } + run_query_mode(config, &conn, text, project, limit).await + } + } +} + +// --------------------------------------------------------------------------- +// Entity mode: find entities similar to a specific issue/MR +// --------------------------------------------------------------------------- + +async fn run_entity_mode( + config: &Config, + conn: &rusqlite::Connection, + entity_type: &str, + iid: i64, + project: Option<&str>, + limit: usize, +) -> Result { + let source_type = match entity_type { + "issues" | "issue" => "issue", + "mrs" | "mr" | "merge-requests" | "merge_request" => "merge_request", + other => { + return Err(LoreError::Other(format!( + "Unknown entity type '{other}'. Use 'issues' or 'mrs'." + ))); + } + }; + + // Resolve project (optional but needed for multi-project setups). + let project_id = match project { + Some(p) => Some(resolve_project(conn, p)?), + None => None, + }; + + // Find the source document. + let doc = find_entity_document(conn, source_type, iid, project_id)?; + + // Get or compute the embedding. + let embedding = get_or_compute_embedding(config, conn, &doc).await?; + + // KNN search (request extra to filter self). + let vector_results = search_vector(conn, &embedding, limit + 5)?; + + // Hydrate and filter. + let source_labels = parse_label_names(&doc.label_names); + let mut results = Vec::new(); + + for vr in vector_results { + // Exclude self. + if vr.document_id == doc.id { + continue; + } + + if let Some(hydrated) = hydrate_document(conn, vr.document_id)? { + let result_labels = parse_label_names(&hydrated.label_names); + let shared: Vec = source_labels + .intersection(&result_labels) + .cloned() + .collect(); + + results.push(RelatedResult { + source_type: hydrated.source_type, + iid: hydrated.iid, + title: hydrated.title, + url: hydrated.url, + similarity_score: distance_to_similarity(vr.distance), + shared_labels: shared, + project_path: hydrated.project_path, + }); + } + + if results.len() >= limit { + break; + } + } + + Ok(RelatedResponse { + source: RelatedSource { + source_type: source_type.to_string(), + iid: Some(iid), + title: doc.title, + }, + query: None, + results, + mode: "entity".to_string(), + }) +} + +// --------------------------------------------------------------------------- +// Query mode: embed free text and find similar entities +// --------------------------------------------------------------------------- + +async fn run_query_mode( + config: &Config, + conn: &rusqlite::Connection, + text: &str, + project: Option<&str>, + limit: usize, +) -> Result { + let ollama = OllamaClient::new(OllamaConfig { + base_url: config.embedding.base_url.clone(), + model: config.embedding.model.clone(), + timeout_secs: 60, + }); + + let embeddings = ollama.embed_batch(&[text]).await?; + + let embedding = embeddings + .into_iter() + .next() + .ok_or_else(|| LoreError::Other("Ollama returned empty embedding result.".to_string()))?; + + let vector_results = search_vector(conn, &embedding, limit)?; + + let _project_id = match project { + Some(p) => Some(resolve_project(conn, p)?), + None => None, + }; + + let mut results = Vec::new(); + for vr in vector_results { + if let Some(hydrated) = hydrate_document(conn, vr.document_id)? { + results.push(RelatedResult { + source_type: hydrated.source_type, + iid: hydrated.iid, + title: hydrated.title, + url: hydrated.url, + similarity_score: distance_to_similarity(vr.distance), + shared_labels: Vec::new(), // No source labels in query mode. + project_path: hydrated.project_path, + }); + } + + if results.len() >= limit { + break; + } + } + + Ok(RelatedResponse { + source: RelatedSource { + source_type: "query".to_string(), + iid: None, + title: None, + }, + query: Some(text.to_string()), + results, + mode: "query".to_string(), + }) +} + +// --------------------------------------------------------------------------- +// DB helpers +// --------------------------------------------------------------------------- + +fn find_entity_document( + conn: &rusqlite::Connection, + source_type: &str, + iid: i64, + project_id: Option, +) -> Result { + let (table, iid_col) = match source_type { + "issue" => ("issues", "iid"), + "merge_request" => ("merge_requests", "iid"), + _ => { + return Err(LoreError::Other(format!( + "Unknown source type: {source_type}" + ))); + } + }; + + // We build the query dynamically because the table name differs. + let project_filter = if project_id.is_some() { + "AND e.project_id = ?3".to_string() + } else { + String::new() + }; + + let sql = format!( + "SELECT d.id, d.content_text, d.label_names, d.title \ + FROM documents d \ + JOIN {table} e ON d.source_type = ?1 AND d.source_id = e.id \ + WHERE e.{iid_col} = ?2 {project_filter} \ + LIMIT 1" + ); + + let mut stmt = conn.prepare(&sql)?; + + let params: Vec> = if let Some(pid) = project_id { + vec![ + Box::new(source_type.to_string()), + Box::new(iid), + Box::new(pid), + ] + } else { + vec![Box::new(source_type.to_string()), Box::new(iid)] + }; + + let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + let doc = stmt + .query_row(param_refs.as_slice(), |row| { + Ok(DocRow { + id: row.get(0)?, + content_text: row.get(1)?, + label_names: row.get(2)?, + title: row.get(3)?, + }) + }) + .map_err(|_| { + LoreError::NotFound(format!( + "{source_type} #{iid} not found. Run 'lore sync' to fetch the latest data." + )) + })?; + + Ok(doc) +} + +/// Get the embedding for a document from the DB, or compute it on-the-fly via Ollama. +async fn get_or_compute_embedding( + config: &Config, + conn: &rusqlite::Connection, + doc: &DocRow, +) -> Result> { + // Try to find an existing embedding in the vec0 table. + use crate::embedding::chunk_ids::encode_rowid; + + let rowid = encode_rowid(doc.id, 0); + let result: Option> = conn + .query_row( + "SELECT embedding FROM embeddings WHERE rowid = ?1", + rusqlite::params![rowid], + |row| row.get(0), + ) + .ok(); + + if let Some(bytes) = result { + // Decode f32 vec from raw bytes. + let floats: Vec = bytes + .chunks_exact(4) + .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])) + .collect(); + if !floats.is_empty() { + return Ok(floats); + } + } + + // Fallback: embed the content on-the-fly via Ollama. + let ollama = OllamaClient::new(OllamaConfig { + base_url: config.embedding.base_url.clone(), + model: config.embedding.model.clone(), + timeout_secs: 60, + }); + + let embeddings = ollama.embed_batch(&[&doc.content_text]).await?; + + embeddings + .into_iter() + .next() + .ok_or_else(|| LoreError::Other("Ollama returned empty embedding result.".to_string())) +} + +/// Hydrate a document_id into a displayable result by joining back to the source entity. +fn hydrate_document(conn: &rusqlite::Connection, document_id: i64) -> Result> { + // First get the document metadata. + let doc_row: Option = conn + .query_row( + "SELECT d.source_type, d.source_id, d.label_names, d.url, d.project_id \ + FROM documents d WHERE d.id = ?1", + rusqlite::params![document_id], + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + )) + }, + ) + .ok(); + + let Some((source_type, source_id, label_names, url, project_id)) = doc_row else { + return Ok(None); + }; + + // Get the project path. + let project_path: Option = conn + .query_row( + "SELECT path_with_namespace FROM projects WHERE id = ?1", + rusqlite::params![project_id], + |row| row.get(0), + ) + .ok(); + + // Get the entity IID and title from the source table. + let (iid, title) = match source_type.as_str() { + "issue" => { + let row: Option<(i64, String)> = conn + .query_row( + "SELECT iid, title FROM issues WHERE id = ?1", + rusqlite::params![source_id], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .ok(); + match row { + Some((iid, title)) => (iid, title), + None => return Ok(None), + } + } + "merge_request" => { + let row: Option<(i64, String)> = conn + .query_row( + "SELECT iid, title FROM merge_requests WHERE id = ?1", + rusqlite::params![source_id], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .ok(); + match row { + Some((iid, title)) => (iid, title), + None => return Ok(None), + } + } + // Discussion/note documents: use the document title or a placeholder. + _ => return Ok(None), // Skip non-entity documents in results. + }; + + Ok(Some(HydratedDoc { + source_type, + iid, + title, + url, + label_names, + project_path, + })) +} + +// --------------------------------------------------------------------------- +// Human output +// --------------------------------------------------------------------------- + +pub fn print_related(response: &RelatedResponse) { + println!(); + + match &response.source.source_type.as_str() { + &"query" => { + println!( + "{}", + Theme::bold().render(&format!( + "Related to: \"{}\"", + response.query.as_deref().unwrap_or("") + )) + ); + } + _ => { + let entity_label = if response.source.source_type == "issue" { + format!("#{}", response.source.iid.unwrap_or(0)) + } else { + format!("!{}", response.source.iid.unwrap_or(0)) + }; + println!( + "{}", + Theme::bold().render(&format!( + "Related to {} {} {}", + response.source.source_type, + entity_label, + response + .source + .title + .as_deref() + .map(|t| format!("\"{}\"", t)) + .unwrap_or_default() + )) + ); + } + } + + if response.results.is_empty() { + println!( + "\n {} {}", + Icons::info(), + Theme::dim().render("No related entities found.") + ); + println!(); + return; + } + + println!(); + + for (i, r) in response.results.iter().enumerate() { + let icon = if r.source_type == "issue" { + Icons::issue_opened() + } else { + Icons::mr_opened() + }; + let prefix = if r.source_type == "issue" { "#" } else { "!" }; + + let score_pct = (r.similarity_score * 100.0) as u8; + let score_str = format!("{score_pct}%"); + + let labels_str = if r.shared_labels.is_empty() { + String::new() + } else { + format!(" [{}]", r.shared_labels.join(", ")) + }; + + let project_str = r + .project_path + .as_deref() + .map(|p| format!(" ({})", p)) + .unwrap_or_default(); + + println!( + " {:>2}. {} {}{:<5} {} {}{}{}", + i + 1, + icon, + prefix, + r.iid, + Theme::accent().render(&score_str), + r.title, + Theme::dim().render(&labels_str), + Theme::dim().render(&project_str), + ); + } + + println!(); +} + +// --------------------------------------------------------------------------- +// Robot (JSON) output +// --------------------------------------------------------------------------- + +pub fn print_related_json(response: &RelatedResponse, elapsed_ms: u64) { + let output = serde_json::json!({ + "ok": true, + "data": { + "source": response.source, + "query": response.query, + "mode": response.mode, + "results": response.results, + }, + "meta": { + "elapsed_ms": elapsed_ms, + "mode": response.mode, + "embedding_dims": 768, + "distance_metric": "l2", + } + }); + + println!("{}", serde_json::to_string(&output).unwrap_or_default()); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_distance_to_similarity_identical() { + let sim = distance_to_similarity(0.0); + assert!( + (sim - 1.0).abs() < f64::EPSILON, + "distance 0 should give similarity 1.0" + ); + } + + #[test] + fn test_distance_to_similarity_one() { + let sim = distance_to_similarity(1.0); + assert!( + (sim - 0.5).abs() < f64::EPSILON, + "distance 1 should give similarity 0.5" + ); + } + + #[test] + fn test_distance_to_similarity_large() { + let sim = distance_to_similarity(100.0); + assert!( + sim > 0.0 && sim < 0.02, + "large distance should give near-zero similarity" + ); + } + + #[test] + fn test_distance_to_similarity_range() { + for d in [0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 100.0] { + let sim = distance_to_similarity(d); + assert!( + (0.0..=1.0).contains(&sim), + "similarity {sim} out of [0, 1] range for distance {d}" + ); + } + } + + #[test] + fn test_distance_to_similarity_monotonic() { + let distances = [0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0]; + for w in distances.windows(2) { + let s1 = distance_to_similarity(w[0]); + let s2 = distance_to_similarity(w[1]); + assert!( + s1 >= s2, + "similarity should decrease with distance: d={} s={} vs d={} s={}", + w[0], + s1, + w[1], + s2 + ); + } + } + + #[test] + fn test_parse_label_names_valid_json() { + let json = Some(r#"["bug","frontend","urgent"]"#.to_string()); + let labels = parse_label_names(&json); + assert_eq!(labels.len(), 3); + assert!(labels.contains("bug")); + assert!(labels.contains("frontend")); + assert!(labels.contains("urgent")); + } + + #[test] + fn test_parse_label_names_null() { + let labels = parse_label_names(&None); + assert!(labels.is_empty()); + } + + #[test] + fn test_parse_label_names_invalid_json() { + let json = Some("not valid json".to_string()); + let labels = parse_label_names(&json); + assert!(labels.is_empty()); + } + + #[test] + fn test_parse_label_names_empty_array() { + let json = Some("[]".to_string()); + let labels = parse_label_names(&json); + assert!(labels.is_empty()); + } + + #[test] + fn test_shared_labels_intersection() { + let a = Some(r#"["bug","frontend","urgent"]"#.to_string()); + let b = Some(r#"["bug","backend","urgent","perf"]"#.to_string()); + let labels_a = parse_label_names(&a); + let labels_b = parse_label_names(&b); + let shared: HashSet = labels_a.intersection(&labels_b).cloned().collect(); + assert_eq!(shared.len(), 2); + assert!(shared.contains("bug")); + assert!(shared.contains("urgent")); + } + + #[test] + fn test_shared_labels_no_overlap() { + let a = Some(r#"["bug"]"#.to_string()); + let b = Some(r#"["feature"]"#.to_string()); + let labels_a = parse_label_names(&a); + let labels_b = parse_label_names(&b); + let shared: HashSet = labels_a.intersection(&labels_b).cloned().collect(); + assert!(shared.is_empty()); + } +} diff --git a/src/cli/commands/sync.rs b/src/cli/commands/sync.rs index 68de479..f87ab5c 100644 --- a/src/cli/commands/sync.rs +++ b/src/cli/commands/sync.rs @@ -26,6 +26,35 @@ pub struct SyncOptions { pub no_events: bool, pub robot_mode: bool, pub dry_run: bool, + pub issue_iids: Vec, + pub mr_iids: Vec, + pub project: Option, + pub preflight_only: bool, +} + +impl SyncOptions { + pub const MAX_SURGICAL_TARGETS: usize = 100; + + pub fn is_surgical(&self) -> bool { + !self.issue_iids.is_empty() || !self.mr_iids.is_empty() + } +} + +#[derive(Debug, Default, Serialize)] +pub struct SurgicalIids { + pub issues: Vec, + pub merge_requests: Vec, +} + +#[derive(Debug, Serialize)] +pub struct EntitySyncResult { + pub entity_type: String, + pub iid: u64, + pub outcome: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub toctou_reason: Option, } #[derive(Debug, Default, Serialize)] @@ -49,6 +78,14 @@ pub struct SyncResult { pub issue_projects: Vec, #[serde(skip)] pub mr_projects: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub surgical_mode: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub surgical_iids: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub entity_results: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub preflight_only: Option, } /// Apply semantic color to a stage-completion icon glyph. @@ -66,6 +103,11 @@ pub async fn run_sync( run_id: Option<&str>, signal: &ShutdownSignal, ) -> Result { + // Surgical dispatch: if any IIDs specified, route to the surgical pipeline. + if options.is_surgical() { + return super::sync_surgical::run_sync_surgical(config, options, run_id, signal).await; + } + let generated_id; let run_id = match run_id { Some(id) => id, @@ -1029,4 +1071,93 @@ mod tests { assert!(rows[0].contains("0 statuses updated")); assert!(rows[0].contains("skipped (disabled)")); } + + #[test] + fn sync_result_default_omits_surgical_fields() { + let result = SyncResult::default(); + let json = serde_json::to_value(&result).unwrap(); + assert!(json.get("surgical_mode").is_none()); + assert!(json.get("surgical_iids").is_none()); + assert!(json.get("entity_results").is_none()); + assert!(json.get("preflight_only").is_none()); + } + + #[test] + fn sync_result_with_surgical_fields_serializes_correctly() { + let result = SyncResult { + surgical_mode: Some(true), + surgical_iids: Some(SurgicalIids { + issues: vec![7, 42], + merge_requests: vec![10], + }), + entity_results: Some(vec![ + EntitySyncResult { + entity_type: "issue".to_string(), + iid: 7, + outcome: "synced".to_string(), + error: None, + toctou_reason: None, + }, + EntitySyncResult { + entity_type: "issue".to_string(), + iid: 42, + outcome: "skipped_toctou".to_string(), + error: None, + toctou_reason: Some("updated_at changed".to_string()), + }, + ]), + preflight_only: Some(false), + ..SyncResult::default() + }; + let json = serde_json::to_value(&result).unwrap(); + assert_eq!(json["surgical_mode"], true); + assert_eq!(json["surgical_iids"]["issues"], serde_json::json!([7, 42])); + assert_eq!(json["entity_results"].as_array().unwrap().len(), 2); + assert_eq!(json["entity_results"][1]["outcome"], "skipped_toctou"); + assert_eq!(json["preflight_only"], false); + } + + #[test] + fn entity_sync_result_omits_none_fields() { + let entity = EntitySyncResult { + entity_type: "merge_request".to_string(), + iid: 10, + outcome: "synced".to_string(), + error: None, + toctou_reason: None, + }; + let json = serde_json::to_value(&entity).unwrap(); + assert!(json.get("error").is_none()); + assert!(json.get("toctou_reason").is_none()); + assert!(json.get("entity_type").is_some()); + } + + #[test] + fn is_surgical_with_issues() { + let opts = SyncOptions { + issue_iids: vec![1], + ..SyncOptions::default() + }; + assert!(opts.is_surgical()); + } + + #[test] + fn is_surgical_with_mrs() { + let opts = SyncOptions { + mr_iids: vec![10], + ..SyncOptions::default() + }; + assert!(opts.is_surgical()); + } + + #[test] + fn is_surgical_empty() { + let opts = SyncOptions::default(); + assert!(!opts.is_surgical()); + } + + #[test] + fn max_surgical_targets_is_100() { + assert_eq!(SyncOptions::MAX_SURGICAL_TARGETS, 100); + } } diff --git a/src/cli/commands/sync_surgical.rs b/src/cli/commands/sync_surgical.rs new file mode 100644 index 0000000..3d439a8 --- /dev/null +++ b/src/cli/commands/sync_surgical.rs @@ -0,0 +1,462 @@ +//! Surgical (by-IID) sync orchestration. +//! +//! Coordinates the full pipeline for syncing specific issues/MRs by IID: +//! resolve project → preflight fetch → ingest with TOCTOU → enrichment → +//! scoped doc regeneration → embedding. + +use std::time::Instant; + +use tracing::{debug, warn}; + +use crate::Config; +use crate::cli::commands::embed::run_embed; +use crate::core::db::create_connection; +use crate::core::error::{LoreError, Result}; +use crate::core::lock::{AppLock, LockOptions}; +use crate::core::metrics::StageTiming; +use crate::core::paths::get_db_path; +use crate::core::project::resolve_project; +use crate::core::shutdown::ShutdownSignal; +use crate::core::sync_run::SyncRunRecorder; +use crate::documents::{SourceType, regenerate_documents_for_sources}; +use crate::gitlab::GitLabClient; +use crate::ingestion::surgical::{ + SurgicalTarget, enrich_entity_resource_events, enrich_mr_closes_issues, enrich_mr_file_changes, + ingest_issue_by_iid, ingest_mr_by_iid, preflight_fetch, +}; + +use super::sync::{EntitySyncResult, SurgicalIids, SyncOptions, SyncResult}; + +fn timing(name: &str, elapsed_ms: u64, items: usize, errors: usize) -> StageTiming { + StageTiming { + name: name.to_string(), + project: None, + elapsed_ms, + items_processed: items, + items_skipped: 0, + errors, + rate_limit_hits: 0, + retries: 0, + sub_stages: vec![], + } +} + +/// Run the surgical sync pipeline for specific IIDs within a single project. +/// +/// Unlike [`super::sync::run_sync`], this targets specific issues/MRs by IID +/// rather than paginating all entities across all projects. +pub async fn run_sync_surgical( + config: &Config, + options: SyncOptions, + run_id: Option<&str>, + signal: &ShutdownSignal, +) -> Result { + // ── Validate inputs ── + if !options.is_surgical() { + return Ok(SyncResult::default()); + } + + let project_str = options.project.as_deref().ok_or_else(|| { + LoreError::Other("Surgical sync requires --project (-p) to identify the target".into()) + })?; + + // ── Run ID ── + let generated_id; + let run_id = match run_id { + Some(id) => id, + None => { + generated_id = uuid::Uuid::new_v4().simple().to_string(); + &generated_id[..8] + } + }; + + // ── DB connections ── + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + let recorder_conn = create_connection(&db_path)?; + let lock_conn = create_connection(&db_path)?; + + // ── Resolve project ── + let project_id = resolve_project(&conn, project_str)?; + let (gitlab_project_id, project_path): (i64, String) = conn.query_row( + "SELECT gitlab_project_id, path_with_namespace FROM projects WHERE id = ?1", + [project_id], + |row| Ok((row.get(0)?, row.get(1)?)), + )?; + + // ── Build surgical targets ── + let mut targets = Vec::new(); + for &iid in &options.issue_iids { + targets.push(SurgicalTarget::Issue { iid }); + } + for &iid in &options.mr_iids { + targets.push(SurgicalTarget::MergeRequest { iid }); + } + + // ── Prepare result ── + let mut result = SyncResult { + run_id: run_id.to_string(), + surgical_mode: Some(true), + surgical_iids: Some(SurgicalIids { + issues: options.issue_iids.clone(), + merge_requests: options.mr_iids.clone(), + }), + ..SyncResult::default() + }; + let mut entity_results: Vec = Vec::new(); + let mut stage_timings: Vec = Vec::new(); + + // ── Start recorder ── + let recorder = SyncRunRecorder::start(&recorder_conn, "surgical-sync", run_id)?; + let iids_json = serde_json::to_string(&result.surgical_iids).unwrap_or_default(); + recorder.set_surgical_metadata(&recorder_conn, "surgical", "preflight", &iids_json)?; + + // ── GitLab client ── + let token = + std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet { + env_var: config.gitlab.token_env_var.clone(), + })?; + let client = GitLabClient::new( + &config.gitlab.base_url, + &token, + Some(config.sync.requests_per_second), + ); + + // ── Stage: Preflight fetch ── + let preflight_start = Instant::now(); + debug!(%run_id, "Surgical sync: preflight fetch"); + recorder.update_phase(&recorder_conn, "preflight")?; + + let preflight = preflight_fetch(&client, gitlab_project_id, &project_path, &targets).await?; + + for failure in &preflight.failures { + entity_results.push(EntitySyncResult { + entity_type: failure.target.entity_type().to_string(), + iid: failure.target.iid(), + outcome: "not_found".to_string(), + error: Some(failure.error.to_string()), + toctou_reason: None, + }); + } + + stage_timings.push(timing( + "preflight", + preflight_start.elapsed().as_millis() as u64, + preflight.issues.len() + preflight.merge_requests.len(), + preflight.failures.len(), + )); + + // ── Preflight-only mode ── + if options.preflight_only { + result.preflight_only = Some(true); + result.entity_results = Some(entity_results); + recorder.succeed(&recorder_conn, &stage_timings, 0, preflight.failures.len())?; + return Ok(result); + } + + // ── Cancellation check ── + if signal.is_cancelled() { + result.entity_results = Some(entity_results); + recorder.cancel(&recorder_conn, "Cancelled before ingest")?; + return Ok(result); + } + + // ── Acquire lock ── + let mut lock = AppLock::new( + lock_conn, + LockOptions { + name: "sync".to_string(), + stale_lock_minutes: config.sync.stale_lock_minutes, + heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds, + }, + ); + lock.acquire(options.force)?; + + // ── Stage: Ingest ── + let ingest_start = Instant::now(); + debug!(%run_id, "Surgical sync: ingesting entities"); + recorder.update_phase(&recorder_conn, "ingest")?; + + let mut dirty_sources: Vec<(SourceType, i64)> = Vec::new(); + + // Ingest issues + for issue in &preflight.issues { + match ingest_issue_by_iid(&conn, config, project_id, issue) { + Ok(ir) => { + if ir.skipped_stale { + entity_results.push(EntitySyncResult { + entity_type: "issue".to_string(), + iid: issue.iid as u64, + outcome: "skipped_stale".to_string(), + error: None, + toctou_reason: Some("DB has same or newer updated_at".to_string()), + }); + recorder.record_entity_result(&recorder_conn, "issue", "skipped_stale")?; + } else { + dirty_sources.extend(ir.dirty_source_keys); + result.issues_updated += 1; + entity_results.push(EntitySyncResult { + entity_type: "issue".to_string(), + iid: issue.iid as u64, + outcome: "ingested".to_string(), + error: None, + toctou_reason: None, + }); + recorder.record_entity_result(&recorder_conn, "issue", "ingested")?; + } + } + Err(e) => { + warn!(iid = issue.iid, error = %e, "Surgical issue ingest failed"); + entity_results.push(EntitySyncResult { + entity_type: "issue".to_string(), + iid: issue.iid as u64, + outcome: "error".to_string(), + error: Some(e.to_string()), + toctou_reason: None, + }); + } + } + } + + // Ingest MRs + for mr in &preflight.merge_requests { + match ingest_mr_by_iid(&conn, config, project_id, mr) { + Ok(mr_result) => { + if mr_result.skipped_stale { + entity_results.push(EntitySyncResult { + entity_type: "merge_request".to_string(), + iid: mr.iid as u64, + outcome: "skipped_stale".to_string(), + error: None, + toctou_reason: Some("DB has same or newer updated_at".to_string()), + }); + recorder.record_entity_result(&recorder_conn, "mr", "skipped_stale")?; + } else { + dirty_sources.extend(mr_result.dirty_source_keys); + result.mrs_updated += 1; + entity_results.push(EntitySyncResult { + entity_type: "merge_request".to_string(), + iid: mr.iid as u64, + outcome: "ingested".to_string(), + error: None, + toctou_reason: None, + }); + recorder.record_entity_result(&recorder_conn, "mr", "ingested")?; + } + } + Err(e) => { + warn!(iid = mr.iid, error = %e, "Surgical MR ingest failed"); + entity_results.push(EntitySyncResult { + entity_type: "merge_request".to_string(), + iid: mr.iid as u64, + outcome: "error".to_string(), + error: Some(e.to_string()), + toctou_reason: None, + }); + } + } + } + + stage_timings.push(timing( + "ingest", + ingest_start.elapsed().as_millis() as u64, + result.issues_updated + result.mrs_updated, + 0, + )); + + // ── Stage: Enrichment ── + if signal.is_cancelled() { + result.entity_results = Some(entity_results); + lock.release(); + recorder.cancel(&recorder_conn, "Cancelled before enrichment")?; + return Ok(result); + } + + let enrich_start = Instant::now(); + debug!(%run_id, "Surgical sync: enriching dependents"); + recorder.update_phase(&recorder_conn, "enrichment")?; + + // Enrich issues: resource events + if !options.no_events { + for issue in &preflight.issues { + let local_id = match conn.query_row( + "SELECT id FROM issues WHERE project_id = ? AND iid = ?", + (project_id, issue.iid), + |row| row.get::<_, i64>(0), + ) { + Ok(id) => id, + Err(_) => continue, + }; + + if let Err(e) = enrich_entity_resource_events( + &client, + &conn, + project_id, + gitlab_project_id, + "issue", + issue.iid, + local_id, + ) + .await + { + warn!(iid = issue.iid, error = %e, "Failed to enrich issue resource events"); + result.resource_events_failed += 1; + } else { + result.resource_events_fetched += 1; + } + } + } + + // Enrich MRs: resource events, closes_issues, file changes + for mr in &preflight.merge_requests { + let local_mr_id = match conn.query_row( + "SELECT id FROM merge_requests WHERE project_id = ? AND iid = ?", + (project_id, mr.iid), + |row| row.get::<_, i64>(0), + ) { + Ok(id) => id, + Err(_) => continue, + }; + + if !options.no_events { + if let Err(e) = enrich_entity_resource_events( + &client, + &conn, + project_id, + gitlab_project_id, + "merge_request", + mr.iid, + local_mr_id, + ) + .await + { + warn!(iid = mr.iid, error = %e, "Failed to enrich MR resource events"); + result.resource_events_failed += 1; + } else { + result.resource_events_fetched += 1; + } + } + + if let Err(e) = enrich_mr_closes_issues( + &client, + &conn, + project_id, + gitlab_project_id, + mr.iid, + local_mr_id, + ) + .await + { + warn!(iid = mr.iid, error = %e, "Failed to enrich MR closes_issues"); + } + + if let Err(e) = enrich_mr_file_changes( + &client, + &conn, + project_id, + gitlab_project_id, + mr.iid, + local_mr_id, + ) + .await + { + warn!(iid = mr.iid, error = %e, "Failed to enrich MR file changes"); + result.mr_diffs_failed += 1; + } else { + result.mr_diffs_fetched += 1; + } + } + + stage_timings.push(timing( + "enrichment", + enrich_start.elapsed().as_millis() as u64, + result.resource_events_fetched + result.mr_diffs_fetched, + result.resource_events_failed + result.mr_diffs_failed, + )); + + // ── Stage: Scoped doc regeneration ── + if !options.no_docs && !dirty_sources.is_empty() { + if signal.is_cancelled() { + result.entity_results = Some(entity_results); + lock.release(); + recorder.cancel(&recorder_conn, "Cancelled before doc generation")?; + return Ok(result); + } + + let docs_start = Instant::now(); + debug!(%run_id, count = dirty_sources.len(), "Surgical sync: regenerating docs"); + recorder.update_phase(&recorder_conn, "docs")?; + + match regenerate_documents_for_sources(&conn, &dirty_sources) { + Ok(docs_result) => { + result.documents_regenerated = docs_result.regenerated; + result.documents_errored = docs_result.errored; + } + Err(e) => { + warn!(error = %e, "Surgical doc regeneration failed"); + } + } + + stage_timings.push(timing( + "docs", + docs_start.elapsed().as_millis() as u64, + result.documents_regenerated, + result.documents_errored, + )); + } + + // ── Stage: Embedding ── + if !options.no_embed { + if signal.is_cancelled() { + result.entity_results = Some(entity_results); + lock.release(); + recorder.cancel(&recorder_conn, "Cancelled before embedding")?; + return Ok(result); + } + + let embed_start = Instant::now(); + debug!(%run_id, "Surgical sync: embedding"); + recorder.update_phase(&recorder_conn, "embed")?; + + match run_embed(config, false, false, None, signal).await { + Ok(embed_result) => { + result.documents_embedded = embed_result.docs_embedded; + result.embedding_failed = embed_result.failed; + } + Err(e) => { + // Embedding failure is non-fatal (Ollama may be unavailable) + warn!(error = %e, "Surgical embedding failed (non-fatal)"); + } + } + + stage_timings.push(timing( + "embed", + embed_start.elapsed().as_millis() as u64, + result.documents_embedded, + result.embedding_failed, + )); + } + + // ── Finalize ── + lock.release(); + result.entity_results = Some(entity_results); + + let total_items = result.issues_updated + result.mrs_updated; + let total_errors = + result.resource_events_failed + result.mr_diffs_failed + result.documents_errored; + recorder.succeed(&recorder_conn, &stage_timings, total_items, total_errors)?; + + debug!( + %run_id, + issues = result.issues_updated, + mrs = result.mrs_updated, + docs = result.documents_regenerated, + "Surgical sync complete" + ); + + Ok(result) +} + +#[cfg(test)] +#[path = "sync_surgical_tests.rs"] +mod tests; diff --git a/src/cli/commands/sync_surgical_tests.rs b/src/cli/commands/sync_surgical_tests.rs new file mode 100644 index 0000000..fcbfef8 --- /dev/null +++ b/src/cli/commands/sync_surgical_tests.rs @@ -0,0 +1,323 @@ +//! Tests for `sync_surgical.rs` — surgical sync orchestration. + +use std::path::Path; + +use wiremock::matchers::{method, path, path_regex}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +use crate::cli::commands::sync::SyncOptions; +use crate::cli::commands::sync_surgical::run_sync_surgical; +use crate::core::config::{Config, GitLabConfig, ProjectConfig}; +use crate::core::db::{create_connection, run_migrations}; +use crate::core::shutdown::ShutdownSignal; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn setup_temp_db() -> (tempfile::NamedTempFile, rusqlite::Connection) { + let tmp = tempfile::NamedTempFile::new().unwrap(); + let conn = create_connection(tmp.path()).unwrap(); + run_migrations(&conn).unwrap(); + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) + VALUES (1, 42, 'group/repo', 'https://gitlab.example.com/group/repo')", + [], + ) + .unwrap(); + (tmp, conn) +} + +fn test_config(base_url: &str, db_path: &Path) -> Config { + Config { + gitlab: GitLabConfig { + base_url: base_url.to_string(), + token_env_var: "LORE_TEST_TOKEN".to_string(), + }, + projects: vec![ProjectConfig { + path: "group/repo".to_string(), + }], + default_project: None, + sync: crate::core::config::SyncConfig { + requests_per_second: 1000.0, + stale_lock_minutes: 30, + heartbeat_interval_seconds: 10, + ..Default::default() + }, + storage: crate::core::config::StorageConfig { + db_path: Some(db_path.to_string_lossy().to_string()), + backup_dir: None, + compress_raw_payloads: false, + }, + embedding: Default::default(), + logging: Default::default(), + scoring: Default::default(), + } +} + +fn issue_json(iid: i64) -> serde_json::Value { + serde_json::json!({ + "id": 1000 + iid, + "iid": iid, + "project_id": 42, + "title": format!("Test issue #{iid}"), + "description": "desc", + "state": "opened", + "created_at": "2026-02-17T10:00:00.000+00:00", + "updated_at": "2026-02-17T12:00:00.000+00:00", + "closed_at": null, + "author": { "id": 1, "username": "alice", "name": "Alice" }, + "assignees": [], + "labels": ["bug"], + "milestone": null, + "due_date": null, + "web_url": format!("https://gitlab.example.com/group/repo/-/issues/{iid}") + }) +} + +#[allow(dead_code)] // Used by MR integration tests added later +fn mr_json(iid: i64) -> serde_json::Value { + serde_json::json!({ + "id": 2000 + iid, + "iid": iid, + "project_id": 42, + "title": format!("Test MR !{iid}"), + "description": "desc", + "state": "opened", + "draft": false, + "work_in_progress": false, + "source_branch": "feat", + "target_branch": "main", + "sha": "abc123", + "references": { "short": format!("!{iid}"), "full": format!("group/repo!{iid}") }, + "detailed_merge_status": "mergeable", + "created_at": "2026-02-17T10:00:00.000+00:00", + "updated_at": "2026-02-17T12:00:00.000+00:00", + "merged_at": null, + "closed_at": null, + "author": { "id": 2, "username": "bob", "name": "Bob" }, + "merge_user": null, + "merged_by": null, + "labels": [], + "assignees": [], + "reviewers": [], + "web_url": format!("https://gitlab.example.com/group/repo/-/merge_requests/{iid}"), + "merge_commit_sha": null, + "squash_commit_sha": null + }) +} + +/// Mount all enrichment endpoint mocks (resource events, closes_issues, diffs) as empty. +async fn mount_empty_enrichment_mocks(server: &MockServer) { + // Resource events for issues + Mock::given(method("GET")) + .and(path_regex( + r"/api/v4/projects/\d+/issues/\d+/resource_state_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path_regex( + r"/api/v4/projects/\d+/issues/\d+/resource_label_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path_regex( + r"/api/v4/projects/\d+/issues/\d+/resource_milestone_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(server) + .await; + + // Resource events for MRs + Mock::given(method("GET")) + .and(path_regex( + r"/api/v4/projects/\d+/merge_requests/\d+/resource_state_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path_regex( + r"/api/v4/projects/\d+/merge_requests/\d+/resource_label_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(server) + .await; + Mock::given(method("GET")) + .and(path_regex( + r"/api/v4/projects/\d+/merge_requests/\d+/resource_milestone_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(server) + .await; + + // Closes issues + Mock::given(method("GET")) + .and(path_regex( + r"/api/v4/projects/\d+/merge_requests/\d+/closes_issues", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(server) + .await; + + // Diffs + Mock::given(method("GET")) + .and(path_regex(r"/api/v4/projects/\d+/merge_requests/\d+/diffs")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(server) + .await; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn ingest_one_issue_updates_result() { + let server = MockServer::start().await; + let (tmp, _conn) = setup_temp_db(); + + // Set token env var + // SAFETY: Tests are single-threaded within each test function. + unsafe { std::env::set_var("LORE_TEST_TOKEN", "test-token") }; + + // Mock preflight issue fetch + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/7")) + .respond_with(ResponseTemplate::new(200).set_body_json(issue_json(7))) + .mount(&server) + .await; + + mount_empty_enrichment_mocks(&server).await; + + let config = test_config(&server.uri(), tmp.path()); + let options = SyncOptions { + robot_mode: true, + issue_iids: vec![7], + project: Some("group/repo".to_string()), + no_embed: true, // skip embed (no Ollama in tests) + ..SyncOptions::default() + }; + let signal = ShutdownSignal::new(); + let result = run_sync_surgical(&config, options, Some("test01"), &signal) + .await + .unwrap(); + + assert_eq!(result.surgical_mode, Some(true)); + assert_eq!(result.issues_updated, 1); + assert!(result.entity_results.is_some()); + let entities = result.entity_results.unwrap(); + assert_eq!(entities.len(), 1); + assert_eq!(entities[0].outcome, "ingested"); +} + +#[tokio::test] +async fn preflight_only_returns_early() { + let server = MockServer::start().await; + let (tmp, _conn) = setup_temp_db(); + + // SAFETY: Tests are single-threaded within each test function. + unsafe { std::env::set_var("LORE_TEST_TOKEN", "test-token") }; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/7")) + .respond_with(ResponseTemplate::new(200).set_body_json(issue_json(7))) + .mount(&server) + .await; + + let config = test_config(&server.uri(), tmp.path()); + let options = SyncOptions { + robot_mode: true, + issue_iids: vec![7], + project: Some("group/repo".to_string()), + preflight_only: true, + ..SyncOptions::default() + }; + let signal = ShutdownSignal::new(); + let result = run_sync_surgical(&config, options, Some("test02"), &signal) + .await + .unwrap(); + + assert_eq!(result.preflight_only, Some(true)); + assert_eq!(result.issues_updated, 0); // No actual ingest +} + +#[tokio::test] +async fn cancellation_before_ingest_cancels_recorder() { + let server = MockServer::start().await; + let (tmp, _conn) = setup_temp_db(); + + // SAFETY: Tests are single-threaded within each test function. + unsafe { std::env::set_var("LORE_TEST_TOKEN", "test-token") }; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/7")) + .respond_with(ResponseTemplate::new(200).set_body_json(issue_json(7))) + .mount(&server) + .await; + + let config = test_config(&server.uri(), tmp.path()); + let options = SyncOptions { + robot_mode: true, + issue_iids: vec![7], + project: Some("group/repo".to_string()), + ..SyncOptions::default() + }; + let signal = ShutdownSignal::new(); + signal.cancel(); // Cancel before we start + let result = run_sync_surgical(&config, options, Some("test03"), &signal) + .await + .unwrap(); + + assert_eq!(result.issues_updated, 0); +} + +fn dummy_config() -> Config { + Config { + gitlab: GitLabConfig { + base_url: "https://unused.example.com".to_string(), + token_env_var: "LORE_TEST_TOKEN".to_string(), + }, + projects: vec![], + default_project: None, + sync: Default::default(), + storage: Default::default(), + embedding: Default::default(), + logging: Default::default(), + scoring: Default::default(), + } +} + +#[tokio::test] +async fn missing_project_returns_error() { + let options = SyncOptions { + issue_iids: vec![7], + project: None, // Missing! + ..SyncOptions::default() + }; + let config = dummy_config(); + let signal = ShutdownSignal::new(); + let err = run_sync_surgical(&config, options, Some("test04"), &signal) + .await + .unwrap_err(); + + assert!(err.to_string().contains("--project")); +} + +#[tokio::test] +async fn empty_iids_returns_default_result() { + let config = dummy_config(); + let options = SyncOptions::default(); // No IIDs + let signal = ShutdownSignal::new(); + let result = run_sync_surgical(&config, options, None, &signal) + .await + .unwrap(); + + assert_eq!(result.issues_updated, 0); + assert_eq!(result.mrs_updated, 0); + assert!(result.surgical_mode.is_none()); // Not surgical mode +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index e51cb70..eaa3f47 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -246,6 +246,10 @@ pub enum Commands { /// Launch the interactive TUI dashboard Tui(TuiArgs), + /// Find semantically related entities via vector similarity + #[command(visible_alias = "similar")] + Related(RelatedArgs), + /// Detect discussion divergence from original intent Drift { /// Entity type (currently only "issues" supported) @@ -814,6 +818,22 @@ pub struct SyncArgs { /// Show sync progress in interactive TUI #[arg(long)] pub tui: bool, + + /// Surgically sync specific issues by IID (repeatable) + #[arg(long, value_parser = clap::value_parser!(u64).range(1..))] + pub issue: Vec, + + /// Surgically sync specific merge requests by IID (repeatable) + #[arg(long, value_parser = clap::value_parser!(u64).range(1..))] + pub mr: Vec, + + /// Scope to a single project (required for surgical sync if no defaultProject) + #[arg(short = 'p', long)] + pub project: Option, + + /// Run preflight validation only (no DB writes). Requires --issue or --mr. + #[arg(long)] + pub preflight_only: bool, } #[derive(Parser)] @@ -1054,6 +1074,32 @@ pub struct TraceArgs { pub limit: usize, } +#[derive(Parser)] +#[command(after_help = "\x1b[1mExamples:\x1b[0m + lore related issues 42 # Find issues similar to #42 + lore related mrs 99 -p group/repo # MRs similar to !99 + lore related 'authentication timeout' # Concept search")] +pub struct RelatedArgs { + /// Entity type ('issues' or 'mrs') OR free-text query + pub query_or_type: String, + + /// Entity IID (when first arg is entity type) + pub iid: Option, + + /// Maximum results + #[arg( + short = 'n', + long = "limit", + default_value = "10", + help_heading = "Output" + )] + pub limit: usize, + + /// Scope to project (fuzzy match) + #[arg(short = 'p', long, help_heading = "Filters")] + pub project: Option, +} + #[derive(Parser)] pub struct CountArgs { /// Entity type to count (issues, mrs, discussions, notes, events) diff --git a/src/core/db.rs b/src/core/db.rs index d07ffe3..0a2f063 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -93,6 +93,10 @@ const MIGRATIONS: &[(&str, &str)] = &[ "027", include_str!("../../migrations/027_tui_list_indexes.sql"), ), + ( + "028", + include_str!("../../migrations/028_surgical_sync_runs.sql"), + ), ]; pub fn create_connection(db_path: &Path) -> Result { diff --git a/src/core/error.rs b/src/core/error.rs index 84ccad7..a1b7889 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -21,6 +21,7 @@ pub enum ErrorCode { EmbeddingFailed, NotFound, Ambiguous, + SurgicalPreflightFailed, } impl std::fmt::Display for ErrorCode { @@ -44,6 +45,7 @@ impl std::fmt::Display for ErrorCode { Self::EmbeddingFailed => "EMBEDDING_FAILED", Self::NotFound => "NOT_FOUND", Self::Ambiguous => "AMBIGUOUS", + Self::SurgicalPreflightFailed => "SURGICAL_PREFLIGHT_FAILED", }; write!(f, "{code}") } @@ -70,6 +72,7 @@ impl ErrorCode { Self::EmbeddingFailed => 16, Self::NotFound => 17, Self::Ambiguous => 18, + Self::SurgicalPreflightFailed => 6, } } } @@ -153,6 +156,14 @@ pub enum LoreError { #[error("No embeddings found. Run: lore embed")] EmbeddingsNotBuilt, + + #[error("Surgical preflight failed for {entity_type} !{iid} in {project}: {reason}")] + SurgicalPreflightFailed { + entity_type: String, + iid: u64, + project: String, + reason: String, + }, } impl LoreError { @@ -179,6 +190,7 @@ impl LoreError { Self::OllamaModelNotFound { .. } => ErrorCode::OllamaModelNotFound, Self::EmbeddingFailed { .. } => ErrorCode::EmbeddingFailed, Self::EmbeddingsNotBuilt => ErrorCode::EmbeddingFailed, + Self::SurgicalPreflightFailed { .. } => ErrorCode::SurgicalPreflightFailed, } } @@ -227,6 +239,9 @@ impl LoreError { Some("Check Ollama logs or retry with 'lore embed --retry-failed'") } Self::EmbeddingsNotBuilt => Some("Generate embeddings first: lore embed"), + Self::SurgicalPreflightFailed { .. } => Some( + "Verify the IID exists and you have access to the project.\n\n Example:\n lore issues -p \n lore mrs -p ", + ), Self::Json(_) | Self::Io(_) | Self::Transform(_) | Self::Other(_) => None, } } @@ -254,6 +269,9 @@ impl LoreError { Self::EmbeddingFailed { .. } => vec!["lore embed --retry-failed"], Self::MigrationFailed { .. } => vec!["lore migrate"], Self::GitLabNetworkError { .. } => vec!["lore doctor"], + Self::SurgicalPreflightFailed { .. } => { + vec!["lore issues -p ", "lore mrs -p "] + } _ => vec![], } } @@ -293,3 +311,72 @@ impl From<&LoreError> for RobotErrorOutput { } pub type Result = std::result::Result; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn surgical_preflight_failed_display() { + let err = LoreError::SurgicalPreflightFailed { + entity_type: "issue".to_string(), + iid: 42, + project: "group/repo".to_string(), + reason: "not found on GitLab".to_string(), + }; + let msg = err.to_string(); + assert!(msg.contains("issue"), "missing entity_type: {msg}"); + assert!(msg.contains("42"), "missing iid: {msg}"); + assert!(msg.contains("group/repo"), "missing project: {msg}"); + assert!(msg.contains("not found on GitLab"), "missing reason: {msg}"); + } + + #[test] + fn surgical_preflight_failed_error_code() { + let code = ErrorCode::SurgicalPreflightFailed; + assert_eq!(code.exit_code(), 6); + } + + #[test] + fn surgical_preflight_failed_code_mapping() { + let err = LoreError::SurgicalPreflightFailed { + entity_type: "merge_request".to_string(), + iid: 99, + project: "ns/proj".to_string(), + reason: "404".to_string(), + }; + assert_eq!(err.code(), ErrorCode::SurgicalPreflightFailed); + } + + #[test] + fn surgical_preflight_failed_has_suggestion() { + let err = LoreError::SurgicalPreflightFailed { + entity_type: "issue".to_string(), + iid: 7, + project: "g/p".to_string(), + reason: "not found".to_string(), + }; + assert!(err.suggestion().is_some()); + } + + #[test] + fn surgical_preflight_failed_has_actions() { + let err = LoreError::SurgicalPreflightFailed { + entity_type: "issue".to_string(), + iid: 7, + project: "g/p".to_string(), + reason: "not found".to_string(), + }; + assert!(!err.actions().is_empty()); + } + + #[test] + fn surgical_preflight_failed_display_code_string() { + let code = ErrorCode::SurgicalPreflightFailed; + assert_eq!(code.to_string(), "SURGICAL_PREFLIGHT_FAILED"); + } +} diff --git a/src/core/sync_run.rs b/src/core/sync_run.rs index a07b250..8447c95 100644 --- a/src/core/sync_run.rs +++ b/src/core/sync_run.rs @@ -20,6 +20,67 @@ impl SyncRunRecorder { Ok(Self { row_id }) } + /// Returns the database row ID for this sync run. + pub fn row_id(&self) -> i64 { + self.row_id + } + + /// Set surgical-specific metadata after `start()`. + /// + /// Takes `&self` so the recorder can continue to be used for phase + /// updates and entity result recording before finalization. + pub fn set_surgical_metadata( + &self, + conn: &Connection, + mode: &str, + phase: &str, + iids_json: &str, + ) -> Result<()> { + conn.execute( + "UPDATE sync_runs SET mode = ?1, phase = ?2, surgical_iids_json = ?3 + WHERE id = ?4", + rusqlite::params![mode, phase, iids_json, self.row_id], + )?; + Ok(()) + } + + /// Update the pipeline phase and refresh the heartbeat timestamp. + pub fn update_phase(&self, conn: &Connection, phase: &str) -> Result<()> { + conn.execute( + "UPDATE sync_runs SET phase = ?1, heartbeat_at = ?2 WHERE id = ?3", + rusqlite::params![phase, now_ms(), self.row_id], + )?; + Ok(()) + } + + /// Increment a surgical counter column for the given entity type and stage. + /// + /// Unknown `(entity_type, stage)` combinations are silently ignored. + /// Column names are derived from a hardcoded match — no SQL injection risk. + pub fn record_entity_result( + &self, + conn: &Connection, + entity_type: &str, + stage: &str, + ) -> Result<()> { + let column = match (entity_type, stage) { + ("issue", "fetched") => "issues_fetched", + ("issue", "ingested") => "issues_ingested", + ("mr", "fetched") => "mrs_fetched", + ("mr", "ingested") => "mrs_ingested", + ("issue" | "mr", "skipped_stale") => "skipped_stale", + ("doc", "regenerated") => "docs_regenerated", + ("doc", "embedded") => "docs_embedded", + (_, "warning") => "warnings_count", + _ => return Ok(()), + }; + conn.execute( + &format!("UPDATE sync_runs SET {column} = {column} + 1 WHERE id = ?1"), + rusqlite::params![self.row_id], + )?; + Ok(()) + } + pub fn succeed( self, conn: &Connection, @@ -63,6 +124,18 @@ impl SyncRunRecorder { )?; Ok(()) } + + /// Finalize the run as cancelled. Consumes self to prevent further use. + pub fn cancel(self, conn: &Connection, reason: &str) -> Result<()> { + let now = now_ms(); + conn.execute( + "UPDATE sync_runs SET finished_at = ?1, cancelled_at = ?2, + status = 'cancelled', error = ?3 + WHERE id = ?4", + rusqlite::params![now, now, reason, self.row_id], + )?; + Ok(()) + } } #[cfg(test)] diff --git a/src/core/sync_run_tests.rs b/src/core/sync_run_tests.rs index b17c816..a130a6a 100644 --- a/src/core/sync_run_tests.rs +++ b/src/core/sync_run_tests.rs @@ -146,3 +146,247 @@ fn test_sync_run_recorder_fail_with_partial_metrics() { assert_eq!(parsed.len(), 1); assert_eq!(parsed[0].name, "ingest_issues"); } + +// --------------------------------------------------------------------------- +// Migration 028: Surgical sync columns +// --------------------------------------------------------------------------- + +#[test] +fn sync_run_surgical_columns_exist() { + let conn = setup_test_db(); + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, phase, surgical_iids_json) + VALUES (1000, 1000, 'running', 'sync', 'surgical', 'preflight', '{\"issues\":[7],\"mrs\":[101]}')", + [], + ) + .unwrap(); + let (mode, phase, iids_json): (String, String, String) = conn + .query_row( + "SELECT mode, phase, surgical_iids_json FROM sync_runs WHERE mode = 'surgical'", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(mode, "surgical"); + assert_eq!(phase, "preflight"); + assert!(iids_json.contains("7")); +} + +#[test] +fn sync_run_counter_defaults_are_zero() { + let conn = setup_test_db(); + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command) + VALUES (2000, 2000, 'running', 'sync')", + [], + ) + .unwrap(); + let row_id = conn.last_insert_rowid(); + let (issues_fetched, mrs_fetched, docs_regenerated, warnings_count): (i64, i64, i64, i64) = + conn.query_row( + "SELECT issues_fetched, mrs_fetched, docs_regenerated, warnings_count FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)), + ) + .unwrap(); + assert_eq!(issues_fetched, 0); + assert_eq!(mrs_fetched, 0); + assert_eq!(docs_regenerated, 0); + assert_eq!(warnings_count, 0); +} + +#[test] +fn sync_run_nullable_columns_default_to_null() { + let conn = setup_test_db(); + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command) + VALUES (3000, 3000, 'running', 'sync')", + [], + ) + .unwrap(); + let row_id = conn.last_insert_rowid(); + let (mode, phase, cancelled_at): (Option, Option, Option) = conn + .query_row( + "SELECT mode, phase, cancelled_at FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert!(mode.is_none()); + assert!(phase.is_none()); + assert!(cancelled_at.is_none()); +} + +#[test] +fn sync_run_counter_round_trip() { + let conn = setup_test_db(); + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, issues_fetched, mrs_ingested, docs_embedded) + VALUES (4000, 4000, 'succeeded', 'sync', 'surgical', 3, 2, 5)", + [], + ) + .unwrap(); + let row_id = conn.last_insert_rowid(); + let (issues_fetched, mrs_ingested, docs_embedded): (i64, i64, i64) = conn + .query_row( + "SELECT issues_fetched, mrs_ingested, docs_embedded FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(issues_fetched, 3); + assert_eq!(mrs_ingested, 2); + assert_eq!(docs_embedded, 5); +} + +// --------------------------------------------------------------------------- +// bd-arka: SyncRunRecorder surgical lifecycle methods +// --------------------------------------------------------------------------- + +#[test] +fn surgical_lifecycle_start_metadata_succeed() { + let conn = setup_test_db(); + let recorder = SyncRunRecorder::start(&conn, "sync", "surg001").unwrap(); + let row_id = recorder.row_id(); + + recorder + .set_surgical_metadata( + &conn, + "surgical", + "preflight", + r#"{"issues":[7,8],"mrs":[101]}"#, + ) + .unwrap(); + + recorder.update_phase(&conn, "ingest").unwrap(); + recorder + .record_entity_result(&conn, "issue", "fetched") + .unwrap(); + recorder + .record_entity_result(&conn, "issue", "fetched") + .unwrap(); + recorder + .record_entity_result(&conn, "issue", "ingested") + .unwrap(); + recorder + .record_entity_result(&conn, "mr", "fetched") + .unwrap(); + recorder + .record_entity_result(&conn, "mr", "ingested") + .unwrap(); + + recorder.succeed(&conn, &[], 3, 0).unwrap(); + + let (mode, phase, iids, issues_fetched, mrs_fetched, issues_ingested, mrs_ingested, status): ( + String, + String, + String, + i64, + i64, + i64, + i64, + String, + ) = conn + .query_row( + "SELECT mode, phase, surgical_iids_json, issues_fetched, mrs_fetched, + issues_ingested, mrs_ingested, status + FROM sync_runs WHERE id = ?1", + [row_id], + |r| { + Ok(( + r.get(0)?, + r.get(1)?, + r.get(2)?, + r.get(3)?, + r.get(4)?, + r.get(5)?, + r.get(6)?, + r.get(7)?, + )) + }, + ) + .unwrap(); + + assert_eq!(mode, "surgical"); + assert_eq!(phase, "ingest"); // Last phase set before succeed + assert!(iids.contains("101")); + assert_eq!(issues_fetched, 2); + assert_eq!(mrs_fetched, 1); + assert_eq!(issues_ingested, 1); + assert_eq!(mrs_ingested, 1); + assert_eq!(status, "succeeded"); +} + +#[test] +fn surgical_lifecycle_cancel() { + let conn = setup_test_db(); + let recorder = SyncRunRecorder::start(&conn, "sync", "cancel01").unwrap(); + let row_id = recorder.row_id(); + + recorder + .set_surgical_metadata(&conn, "surgical", "preflight", "{}") + .unwrap(); + recorder + .cancel(&conn, "User requested cancellation") + .unwrap(); + + let (status, error, cancelled_at, finished_at): ( + String, + Option, + Option, + Option, + ) = conn + .query_row( + "SELECT status, error, cancelled_at, finished_at FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)), + ) + .unwrap(); + + assert_eq!(status, "cancelled"); + assert_eq!(error.as_deref(), Some("User requested cancellation")); + assert!(cancelled_at.is_some()); + assert!(finished_at.is_some()); +} + +#[test] +fn record_entity_result_ignores_unknown() { + let conn = setup_test_db(); + let recorder = SyncRunRecorder::start(&conn, "sync", "unk001").unwrap(); + // Should not panic or error on unknown combinations + recorder + .record_entity_result(&conn, "widget", "exploded") + .unwrap(); +} + +#[test] +fn record_entity_result_doc_counters() { + let conn = setup_test_db(); + let recorder = SyncRunRecorder::start(&conn, "sync", "cnt001").unwrap(); + let row_id = recorder.row_id(); + + recorder + .record_entity_result(&conn, "doc", "regenerated") + .unwrap(); + recorder + .record_entity_result(&conn, "doc", "regenerated") + .unwrap(); + recorder + .record_entity_result(&conn, "doc", "embedded") + .unwrap(); + recorder + .record_entity_result(&conn, "issue", "skipped_stale") + .unwrap(); + + let (docs_regen, docs_embed, skipped): (i64, i64, i64) = conn + .query_row( + "SELECT docs_regenerated, docs_embedded, skipped_stale FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + + assert_eq!(docs_regen, 2); + assert_eq!(docs_embed, 1); + assert_eq!(skipped, 1); +} diff --git a/src/documents/mod.rs b/src/documents/mod.rs index 3681cb8..a14b7ee 100644 --- a/src/documents/mod.rs +++ b/src/documents/mod.rs @@ -7,7 +7,10 @@ pub use extractor::{ extract_discussion_document, extract_issue_document, extract_mr_document, extract_note_document, extract_note_document_cached, }; -pub use regenerator::{RegenerateResult, regenerate_dirty_documents}; +pub use regenerator::{ + RegenerateForSourcesResult, RegenerateResult, regenerate_dirty_documents, + regenerate_documents_for_sources, +}; pub use truncation::{ MAX_DISCUSSION_BYTES, MAX_DOCUMENT_BYTES_HARD, NoteContent, TruncationReason, TruncationResult, truncate_discussion, truncate_hard_cap, truncate_utf8, diff --git a/src/documents/regenerator.rs b/src/documents/regenerator.rs index baaadb3..504eb88 100644 --- a/src/documents/regenerator.rs +++ b/src/documents/regenerator.rs @@ -268,6 +268,75 @@ fn get_document_id(conn: &Connection, source_type: SourceType, source_id: i64) - Ok(id) } +// --------------------------------------------------------------------------- +// Scoped regeneration for surgical sync +// --------------------------------------------------------------------------- + +/// Result of regenerating documents for specific source keys. +#[derive(Debug, Default)] +pub struct RegenerateForSourcesResult { + pub regenerated: usize, + pub unchanged: usize, + pub errored: usize, + /// IDs of documents that were regenerated or confirmed unchanged, + /// for downstream scoped embedding. + pub document_ids: Vec, +} + +/// Regenerate documents for specific source keys only. +/// +/// Unlike [`regenerate_dirty_documents`], this does NOT read from the +/// `dirty_sources` table. It processes exactly the provided keys and +/// returns the document IDs for scoped embedding. +pub fn regenerate_documents_for_sources( + conn: &Connection, + source_keys: &[(SourceType, i64)], +) -> Result { + let mut result = RegenerateForSourcesResult::default(); + let mut cache = ParentMetadataCache::new(); + + for (source_type, source_id) in source_keys { + match regenerate_one(conn, *source_type, *source_id, &mut cache) { + Ok(changed) => { + if changed { + result.regenerated += 1; + } else { + result.unchanged += 1; + } + clear_dirty(conn, *source_type, *source_id)?; + + // Collect document_id for scoped embedding + match get_document_id(conn, *source_type, *source_id) { + Ok(doc_id) => result.document_ids.push(doc_id), + Err(_) => { + // Document was deleted (source no longer exists) — no ID to return + } + } + } + Err(e) => { + warn!( + source_type = %source_type, + source_id, + error = %e, + "Scoped regeneration failed" + ); + record_dirty_error(conn, *source_type, *source_id, &e.to_string())?; + result.errored += 1; + } + } + } + + debug!( + regenerated = result.regenerated, + unchanged = result.unchanged, + errored = result.errored, + document_ids = result.document_ids.len(), + "Scoped document regeneration complete" + ); + + Ok(result) +} + #[cfg(test)] #[path = "regenerator_tests.rs"] mod tests; diff --git a/src/documents/regenerator_tests.rs b/src/documents/regenerator_tests.rs index 04bde8c..7eb820b 100644 --- a/src/documents/regenerator_tests.rs +++ b/src/documents/regenerator_tests.rs @@ -518,3 +518,65 @@ fn test_note_regeneration_cache_invalidates_across_parents() { assert!(beta_content.contains("parent_iid: 99")); assert!(beta_content.contains("parent_title: Issue Beta")); } + +// --------------------------------------------------------------------------- +// Scoped regeneration (bd-hs6j) +// --------------------------------------------------------------------------- + +#[test] +fn scoped_regen_only_processes_specified_sources() { + let conn = setup_db(); + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Issue A', 'opened', 1000, 2000, 3000)", + [], + ).unwrap(); + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (2, 20, 1, 43, 'Issue B', 'opened', 1000, 2000, 3000)", + [], + ).unwrap(); + mark_dirty(&conn, SourceType::Issue, 1).unwrap(); + mark_dirty(&conn, SourceType::Issue, 2).unwrap(); + + // Regenerate only issue 1 + let result = regenerate_documents_for_sources(&conn, &[(SourceType::Issue, 1)]).unwrap(); + + assert_eq!(result.regenerated, 1); + assert_eq!(result.document_ids.len(), 1); + + // Issue 1 dirty cleared, issue 2 still dirty + let remaining = get_dirty_sources(&conn).unwrap(); + assert_eq!(remaining.len(), 1); + assert_eq!(remaining[0], (SourceType::Issue, 2)); +} + +#[test] +fn scoped_regen_returns_document_ids() { + let conn = setup_db(); + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000)", + [], + ).unwrap(); + mark_dirty(&conn, SourceType::Issue, 1).unwrap(); + + let result = regenerate_documents_for_sources(&conn, &[(SourceType::Issue, 1)]).unwrap(); + + assert!(!result.document_ids.is_empty()); + let exists: bool = conn + .query_row( + "SELECT EXISTS(SELECT 1 FROM documents WHERE id = ?1)", + [result.document_ids[0]], + |r| r.get(0), + ) + .unwrap(); + assert!(exists); +} + +#[test] +fn scoped_regen_handles_missing_source() { + let conn = setup_db(); + // Source key 9999 doesn't exist in issues table + let result = regenerate_documents_for_sources(&conn, &[(SourceType::Issue, 9999)]).unwrap(); + + // regenerate_one returns Ok(true) for deletions, but no doc_id to return + assert_eq!(result.document_ids.len(), 0); +} diff --git a/src/gitlab/client.rs b/src/gitlab/client.rs index 2c69f97..4e664ac 100644 --- a/src/gitlab/client.rs +++ b/src/gitlab/client.rs @@ -112,6 +112,20 @@ impl GitLabClient { self.request("/api/v4/version").await } + pub async fn get_issue_by_iid(&self, gitlab_project_id: i64, iid: i64) -> Result { + let path = format!("/api/v4/projects/{gitlab_project_id}/issues/{iid}"); + self.request(&path).await + } + + pub async fn get_mr_by_iid( + &self, + gitlab_project_id: i64, + iid: i64, + ) -> Result { + let path = format!("/api/v4/projects/{gitlab_project_id}/merge_requests/{iid}"); + self.request(&path).await + } + const MAX_RETRIES: u32 = 3; async fn request(&self, path: &str) -> Result { @@ -848,4 +862,143 @@ mod tests { let result = parse_link_header_next(&headers); assert!(result.is_none()); } + + // ───────────────────────────────────────────────────────────────── + // get_issue_by_iid / get_mr_by_iid + // ───────────────────────────────────────────────────────────────── + + use wiremock::matchers::{header, method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + fn mock_issue_json(iid: i64) -> serde_json::Value { + serde_json::json!({ + "id": 1000 + iid, + "iid": iid, + "project_id": 42, + "title": format!("Issue #{iid}"), + "description": null, + "state": "opened", + "created_at": "2024-01-15T10:00:00.000Z", + "updated_at": "2024-01-16T12:00:00.000Z", + "closed_at": null, + "author": { "id": 1, "username": "alice", "name": "Alice", "avatar_url": null }, + "assignees": [], + "labels": ["bug"], + "milestone": null, + "due_date": null, + "web_url": format!("https://gitlab.example.com/g/p/-/issues/{iid}") + }) + } + + fn mock_mr_json(iid: i64) -> serde_json::Value { + serde_json::json!({ + "id": 2000 + iid, + "iid": iid, + "project_id": 42, + "title": format!("MR !{iid}"), + "description": null, + "state": "opened", + "draft": false, + "work_in_progress": false, + "source_branch": "feat", + "target_branch": "main", + "sha": "abc123", + "references": { "short": format!("!{iid}"), "full": format!("g/p!{iid}") }, + "detailed_merge_status": "mergeable", + "created_at": "2024-02-01T08:00:00.000Z", + "updated_at": "2024-02-02T09:00:00.000Z", + "merged_at": null, + "closed_at": null, + "author": { "id": 2, "username": "bob", "name": "Bob", "avatar_url": null }, + "merge_user": null, + "merged_by": null, + "labels": [], + "assignees": [], + "reviewers": [], + "web_url": format!("https://gitlab.example.com/g/p/-/merge_requests/{iid}"), + "merge_commit_sha": null, + "squash_commit_sha": null + }) + } + + fn test_client(base_url: &str) -> GitLabClient { + GitLabClient::new(base_url, "test-token", Some(1000.0)) + } + + #[tokio::test] + async fn get_issue_by_iid_success() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/7")) + .and(header("PRIVATE-TOKEN", "test-token")) + .respond_with(ResponseTemplate::new(200).set_body_json(mock_issue_json(7))) + .mount(&server) + .await; + + let client = test_client(&server.uri()); + let issue = client.get_issue_by_iid(42, 7).await.unwrap(); + + assert_eq!(issue.iid, 7); + assert_eq!(issue.title, "Issue #7"); + assert_eq!(issue.state, "opened"); + } + + #[tokio::test] + async fn get_issue_by_iid_not_found() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/999")) + .respond_with(ResponseTemplate::new(404)) + .mount(&server) + .await; + + let client = test_client(&server.uri()); + let err = client.get_issue_by_iid(42, 999).await.unwrap_err(); + + assert!( + matches!(err, LoreError::GitLabNotFound { .. }), + "Expected GitLabNotFound, got: {err:?}" + ); + } + + #[tokio::test] + async fn get_mr_by_iid_success() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/merge_requests/99")) + .and(header("PRIVATE-TOKEN", "test-token")) + .respond_with(ResponseTemplate::new(200).set_body_json(mock_mr_json(99))) + .mount(&server) + .await; + + let client = test_client(&server.uri()); + let mr = client.get_mr_by_iid(42, 99).await.unwrap(); + + assert_eq!(mr.iid, 99); + assert_eq!(mr.title, "MR !99"); + assert_eq!(mr.source_branch, "feat"); + assert_eq!(mr.target_branch, "main"); + } + + #[tokio::test] + async fn get_mr_by_iid_not_found() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/merge_requests/999")) + .respond_with(ResponseTemplate::new(404)) + .mount(&server) + .await; + + let client = test_client(&server.uri()); + let err = client.get_mr_by_iid(42, 999).await.unwrap_err(); + + assert!( + matches!(err, LoreError::GitLabNotFound { .. }), + "Expected GitLabNotFound, got: {err:?}" + ); + } } diff --git a/src/ingestion/issues.rs b/src/ingestion/issues.rs index cd912c9..48da301 100644 --- a/src/ingestion/issues.rs +++ b/src/ingestion/issues.rs @@ -140,7 +140,7 @@ fn passes_cursor_filter_with_ts(gitlab_id: i64, issue_ts: i64, cursor: &SyncCurs true } -fn process_single_issue( +pub(crate) fn process_single_issue( conn: &Connection, config: &Config, project_id: i64, diff --git a/src/ingestion/merge_requests.rs b/src/ingestion/merge_requests.rs index 0d1453e..baa5763 100644 --- a/src/ingestion/merge_requests.rs +++ b/src/ingestion/merge_requests.rs @@ -135,13 +135,13 @@ pub async fn ingest_merge_requests( Ok(result) } -struct ProcessMrResult { - labels_created: usize, - assignees_linked: usize, - reviewers_linked: usize, +pub(crate) struct ProcessMrResult { + pub(crate) labels_created: usize, + pub(crate) assignees_linked: usize, + pub(crate) reviewers_linked: usize, } -fn process_single_mr( +pub(crate) fn process_single_mr( conn: &Connection, config: &Config, project_id: i64, diff --git a/src/ingestion/mod.rs b/src/ingestion/mod.rs index aa64675..8d5f3cb 100644 --- a/src/ingestion/mod.rs +++ b/src/ingestion/mod.rs @@ -6,6 +6,7 @@ pub mod merge_requests; pub mod mr_diffs; pub mod mr_discussions; pub mod orchestrator; +pub(crate) mod surgical; pub use discussions::{IngestDiscussionsResult, ingest_issue_discussions}; pub use issues::{IngestIssuesResult, IssueForDiscussionSync, ingest_issues}; diff --git a/src/ingestion/orchestrator.rs b/src/ingestion/orchestrator.rs index cab8650..113c5d7 100644 --- a/src/ingestion/orchestrator.rs +++ b/src/ingestion/orchestrator.rs @@ -1097,7 +1097,7 @@ async fn drain_resource_events( } /// Store resource events using the provided connection (caller manages the transaction). -fn store_resource_events( +pub(crate) fn store_resource_events( conn: &Connection, project_id: i64, entity_type: &str, @@ -1406,7 +1406,7 @@ async fn drain_mr_closes_issues( Ok(result) } -fn store_closes_issues_refs( +pub(crate) fn store_closes_issues_refs( conn: &Connection, project_id: i64, mr_local_id: i64, diff --git a/src/ingestion/surgical.rs b/src/ingestion/surgical.rs new file mode 100644 index 0000000..bcebff4 --- /dev/null +++ b/src/ingestion/surgical.rs @@ -0,0 +1,462 @@ +//! Surgical (by-IID) sync pipeline. +//! +//! Provides targeted fetch and ingest for individual issues and merge requests, +//! as opposed to the bulk pagination paths in `issues.rs` / `merge_requests.rs`. +//! +//! Consumed by the orchestration layer (bd-1i4i) and dispatch wiring (bd-3bec). +#![allow(dead_code)] // Public API consumed by downstream beads not yet wired. + +use rusqlite::Connection; +use tracing::debug; + +use crate::Config; +use crate::core::error::{LoreError, Result}; +use crate::documents::SourceType; +use crate::gitlab::GitLabClient; +use crate::gitlab::types::{GitLabIssue, GitLabMergeRequest}; +use crate::ingestion::dirty_tracker; +use crate::ingestion::issues::process_single_issue; +use crate::ingestion::merge_requests::{ProcessMrResult, process_single_mr}; +use crate::ingestion::mr_diffs::upsert_mr_file_changes; +use crate::ingestion::orchestrator::{store_closes_issues_refs, store_resource_events}; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/// A single entity to fetch surgically by IID. +#[derive(Debug, Clone)] +pub enum SurgicalTarget { + Issue { iid: u64 }, + MergeRequest { iid: u64 }, +} + +impl SurgicalTarget { + pub fn entity_type(&self) -> &'static str { + match self { + Self::Issue { .. } => "issue", + Self::MergeRequest { .. } => "merge_request", + } + } + + pub fn iid(&self) -> u64 { + match self { + Self::Issue { iid } | Self::MergeRequest { iid } => *iid, + } + } +} + +/// Outcome of a failed preflight fetch for one target. +#[derive(Debug)] +pub struct PreflightFailure { + pub target: SurgicalTarget, + pub error: LoreError, +} + +/// Collected results from preflight fetching multiple targets. +#[derive(Debug, Default)] +pub struct PreflightResult { + pub issues: Vec, + pub merge_requests: Vec, + pub failures: Vec, +} + +/// Result of ingesting a single issue by IID. +#[derive(Debug)] +pub struct IngestIssueResult { + pub upserted: bool, + pub labels_created: usize, + pub skipped_stale: bool, + pub dirty_source_keys: Vec<(SourceType, i64)>, +} + +/// Result of ingesting a single MR by IID. +#[derive(Debug)] +pub struct IngestMrResult { + pub upserted: bool, + pub labels_created: usize, + pub assignees_linked: usize, + pub reviewers_linked: usize, + pub skipped_stale: bool, + pub dirty_source_keys: Vec<(SourceType, i64)>, +} + +// --------------------------------------------------------------------------- +// TOCTOU guard +// --------------------------------------------------------------------------- + +/// Returns `true` if the payload is stale (same age or older than the DB row). +/// +/// `payload_updated_at` is an ISO 8601 string from the GitLab API. +/// `db_updated_at_ms` is the ms-epoch value from the local DB, or `None` if +/// the entity has never been ingested. +pub fn is_stale(payload_updated_at: &str, db_updated_at_ms: Option) -> Result { + let Some(db_ms) = db_updated_at_ms else { + return Ok(false); // First-ever ingest — not stale. + }; + + let payload_ms = chrono::DateTime::parse_from_rfc3339(payload_updated_at) + .map(|dt| dt.timestamp_millis()) + .map_err(|e| { + LoreError::Other(format!( + "Failed to parse timestamp '{payload_updated_at}': {e}" + )) + })?; + + Ok(payload_ms <= db_ms) +} + +// --------------------------------------------------------------------------- +// Preflight fetch +// --------------------------------------------------------------------------- + +/// Fetch one or more entities by IID from GitLab, collecting successes and failures. +/// +/// A 404 for any individual target is recorded as a [`PreflightFailure`] with +/// a [`LoreError::SurgicalPreflightFailed`] error; other targets proceed. +/// Hard errors (auth, network) propagate immediately. +pub async fn preflight_fetch( + client: &GitLabClient, + gitlab_project_id: i64, + project_path: &str, + targets: &[SurgicalTarget], +) -> Result { + let mut result = PreflightResult::default(); + + for target in targets { + match target { + SurgicalTarget::Issue { iid } => { + match client + .get_issue_by_iid(gitlab_project_id, *iid as i64) + .await + { + Ok(issue) => result.issues.push(issue), + Err(LoreError::GitLabNotFound { .. }) => { + result.failures.push(PreflightFailure { + target: target.clone(), + error: LoreError::SurgicalPreflightFailed { + entity_type: "issue".to_string(), + iid: *iid, + project: project_path.to_string(), + reason: "not found on GitLab".to_string(), + }, + }); + } + Err(e) if e.is_permanent_api_error() => { + return Err(e); + } + Err(e) => { + result.failures.push(PreflightFailure { + target: target.clone(), + error: e, + }); + } + } + } + SurgicalTarget::MergeRequest { iid } => { + match client.get_mr_by_iid(gitlab_project_id, *iid as i64).await { + Ok(mr) => result.merge_requests.push(mr), + Err(LoreError::GitLabNotFound { .. }) => { + result.failures.push(PreflightFailure { + target: target.clone(), + error: LoreError::SurgicalPreflightFailed { + entity_type: "merge_request".to_string(), + iid: *iid, + project: project_path.to_string(), + reason: "not found on GitLab".to_string(), + }, + }); + } + Err(e) if e.is_permanent_api_error() => { + return Err(e); + } + Err(e) => { + result.failures.push(PreflightFailure { + target: target.clone(), + error: e, + }); + } + } + } + } + } + + Ok(result) +} + +// --------------------------------------------------------------------------- +// Ingest single issue by IID +// --------------------------------------------------------------------------- + +/// Ingest a single pre-fetched issue into the local DB. +/// +/// Applies a TOCTOU guard: if the DB already has a row with the same or newer +/// `updated_at`, the ingest is skipped and `skipped_stale` is set to `true`. +pub fn ingest_issue_by_iid( + conn: &Connection, + config: &Config, + project_id: i64, + issue: &GitLabIssue, +) -> Result { + let db_updated_at = get_issue_updated_at(conn, project_id, issue.iid)?; + + if is_stale(&issue.updated_at, db_updated_at)? { + debug!( + iid = issue.iid, + "Surgical issue ingest: skipping stale payload" + ); + return Ok(IngestIssueResult { + upserted: false, + labels_created: 0, + skipped_stale: true, + dirty_source_keys: vec![], + }); + } + + let labels_created = process_single_issue(conn, config, project_id, issue)?; + + let local_issue_id: i64 = conn.query_row( + "SELECT id FROM issues WHERE project_id = ? AND iid = ?", + (project_id, issue.iid), + |row| row.get(0), + )?; + + // Mark dirty for downstream scoped doc regeneration. + dirty_tracker::mark_dirty(conn, SourceType::Issue, local_issue_id)?; + + debug!( + iid = issue.iid, + local_id = local_issue_id, + labels_created, + "Surgical issue ingest: upserted" + ); + + Ok(IngestIssueResult { + upserted: true, + labels_created, + skipped_stale: false, + dirty_source_keys: vec![(SourceType::Issue, local_issue_id)], + }) +} + +// --------------------------------------------------------------------------- +// Ingest single MR by IID +// --------------------------------------------------------------------------- + +/// Ingest a single pre-fetched merge request into the local DB. +/// +/// Same TOCTOU guard as [`ingest_issue_by_iid`]. +pub fn ingest_mr_by_iid( + conn: &Connection, + config: &Config, + project_id: i64, + mr: &GitLabMergeRequest, +) -> Result { + let db_updated_at = get_mr_updated_at(conn, project_id, mr.iid)?; + + if is_stale(&mr.updated_at, db_updated_at)? { + debug!(iid = mr.iid, "Surgical MR ingest: skipping stale payload"); + return Ok(IngestMrResult { + upserted: false, + labels_created: 0, + assignees_linked: 0, + reviewers_linked: 0, + skipped_stale: true, + dirty_source_keys: vec![], + }); + } + + let ProcessMrResult { + labels_created, + assignees_linked, + reviewers_linked, + } = process_single_mr(conn, config, project_id, mr)?; + + let local_mr_id: i64 = conn.query_row( + "SELECT id FROM merge_requests WHERE project_id = ? AND iid = ?", + (project_id, mr.iid), + |row| row.get(0), + )?; + + dirty_tracker::mark_dirty(conn, SourceType::MergeRequest, local_mr_id)?; + + debug!( + iid = mr.iid, + local_id = local_mr_id, + labels_created, + assignees_linked, + reviewers_linked, + "Surgical MR ingest: upserted" + ); + + Ok(IngestMrResult { + upserted: true, + labels_created, + assignees_linked, + reviewers_linked, + skipped_stale: false, + dirty_source_keys: vec![(SourceType::MergeRequest, local_mr_id)], + }) +} + +// --------------------------------------------------------------------------- +// Per-entity dependent enrichment (bd-kanh) +// --------------------------------------------------------------------------- + +/// Fetch and store resource events (state, label, milestone) for a single entity. +/// +/// Updates the `resource_events_synced_for_updated_at` watermark so the bulk +/// pipeline will not redundantly re-fetch these events. +pub async fn enrich_entity_resource_events( + client: &GitLabClient, + conn: &Connection, + project_id: i64, + gitlab_project_id: i64, + entity_type: &str, + iid: i64, + local_id: i64, +) -> Result<()> { + let (state_events, label_events, milestone_events) = client + .fetch_all_resource_events(gitlab_project_id, entity_type, iid) + .await?; + + store_resource_events( + conn, + project_id, + entity_type, + local_id, + &state_events, + &label_events, + &milestone_events, + )?; + + // Update watermark. + let sql = match entity_type { + "issue" => { + "UPDATE issues SET resource_events_synced_for_updated_at = updated_at WHERE id = ?" + } + "merge_request" => { + "UPDATE merge_requests SET resource_events_synced_for_updated_at = updated_at WHERE id = ?" + } + other => { + debug!( + entity_type = other, + "Unknown entity type for resource events watermark" + ); + return Ok(()); + } + }; + conn.execute(sql, [local_id])?; + + debug!( + entity_type, + iid, + local_id, + state = state_events.len(), + label = label_events.len(), + milestone = milestone_events.len(), + "Surgical: enriched resource events" + ); + + Ok(()) +} + +/// Fetch and store closes-issues references for a single merge request. +/// +/// Updates the `closes_issues_synced_for_updated_at` watermark. +pub async fn enrich_mr_closes_issues( + client: &GitLabClient, + conn: &Connection, + project_id: i64, + gitlab_project_id: i64, + iid: i64, + local_mr_id: i64, +) -> Result<()> { + let refs = client + .fetch_mr_closes_issues(gitlab_project_id, iid) + .await?; + + store_closes_issues_refs(conn, project_id, local_mr_id, &refs)?; + + conn.execute( + "UPDATE merge_requests SET closes_issues_synced_for_updated_at = updated_at WHERE id = ?", + [local_mr_id], + )?; + + debug!( + iid, + local_mr_id, + refs = refs.len(), + "Surgical: enriched closes-issues refs" + ); + + Ok(()) +} + +/// Fetch and store MR file-change diffs for a single merge request. +/// +/// Updates the `diffs_synced_for_updated_at` watermark. +pub async fn enrich_mr_file_changes( + client: &GitLabClient, + conn: &Connection, + project_id: i64, + gitlab_project_id: i64, + iid: i64, + local_mr_id: i64, +) -> Result<()> { + let diffs = client.fetch_mr_diffs(gitlab_project_id, iid).await?; + + upsert_mr_file_changes(conn, local_mr_id, project_id, &diffs)?; + + conn.execute( + "UPDATE merge_requests SET diffs_synced_for_updated_at = updated_at WHERE id = ?", + [local_mr_id], + )?; + + debug!( + iid, + local_mr_id, + diffs = diffs.len(), + "Surgical: enriched MR file changes" + ); + + Ok(()) +} + +// --------------------------------------------------------------------------- +// DB helpers +// --------------------------------------------------------------------------- + +fn get_issue_updated_at(conn: &Connection, project_id: i64, iid: i64) -> Result> { + let result = conn.query_row( + "SELECT updated_at FROM issues WHERE project_id = ? AND iid = ?", + (project_id, iid), + |row| row.get(0), + ); + match result { + Ok(ts) => Ok(Some(ts)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(e.into()), + } +} + +fn get_mr_updated_at(conn: &Connection, project_id: i64, iid: i64) -> Result> { + let result = conn.query_row( + "SELECT updated_at FROM merge_requests WHERE project_id = ? AND iid = ?", + (project_id, iid), + |row| row.get(0), + ); + match result { + Ok(ts) => Ok(Some(ts)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(e.into()), + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +#[path = "surgical_tests.rs"] +mod tests; diff --git a/src/ingestion/surgical_tests.rs b/src/ingestion/surgical_tests.rs new file mode 100644 index 0000000..7b5ac6b --- /dev/null +++ b/src/ingestion/surgical_tests.rs @@ -0,0 +1,913 @@ +//! Tests for `surgical.rs` — surgical (by-IID) sync pipeline. + +use std::path::Path; + +use crate::core::config::{Config, GitLabConfig, ProjectConfig}; +use crate::core::db::{create_connection, run_migrations}; +use crate::gitlab::types::{ + GitLabAuthor, GitLabIssue, GitLabMergeRequest, GitLabReferences, GitLabReviewer, +}; +use crate::ingestion::surgical::{SurgicalTarget, ingest_issue_by_iid, ingest_mr_by_iid, is_stale}; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn setup_db() -> rusqlite::Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + seed_project(&conn); + conn +} + +fn seed_project(conn: &rusqlite::Connection) { + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) + VALUES (1, 42, 'group/repo', 'https://gitlab.example.com/group/repo')", + [], + ) + .unwrap(); +} + +fn test_config() -> Config { + Config { + gitlab: GitLabConfig { + base_url: "https://gitlab.example.com".to_string(), + token_env_var: "GITLAB_TOKEN".to_string(), + }, + projects: vec![ProjectConfig { + path: "group/repo".to_string(), + }], + default_project: None, + sync: Default::default(), + storage: Default::default(), + embedding: Default::default(), + logging: Default::default(), + scoring: Default::default(), + } +} + +fn make_test_issue(iid: i64, updated_at: &str) -> GitLabIssue { + GitLabIssue { + id: 1000 + iid, + iid, + project_id: 42, + title: format!("Test issue #{iid}"), + description: Some("Test description".to_string()), + state: "opened".to_string(), + created_at: "2024-01-01T00:00:00.000+00:00".to_string(), + updated_at: updated_at.to_string(), + closed_at: None, + author: GitLabAuthor { + id: 1, + username: "alice".to_string(), + name: "Alice".to_string(), + }, + assignees: vec![], + labels: vec!["bug".to_string()], + milestone: None, + due_date: None, + web_url: format!("https://gitlab.example.com/group/repo/-/issues/{iid}"), + } +} + +fn make_test_mr(iid: i64, updated_at: &str) -> GitLabMergeRequest { + GitLabMergeRequest { + id: 2000 + iid, + iid, + project_id: 42, + title: format!("Test MR !{iid}"), + description: Some("MR description".to_string()), + state: "opened".to_string(), + draft: false, + work_in_progress: false, + source_branch: "feat".to_string(), + target_branch: "main".to_string(), + sha: Some("abc123def456".to_string()), + references: Some(GitLabReferences { + short: format!("!{iid}"), + full: format!("group/repo!{iid}"), + }), + detailed_merge_status: Some("mergeable".to_string()), + merge_status_legacy: None, + created_at: "2024-01-01T00:00:00.000+00:00".to_string(), + updated_at: updated_at.to_string(), + merged_at: None, + closed_at: None, + author: GitLabAuthor { + id: 2, + username: "bob".to_string(), + name: "Bob".to_string(), + }, + merge_user: None, + merged_by: None, + labels: vec![], + assignees: vec![], + reviewers: vec![GitLabReviewer { + id: 3, + username: "carol".to_string(), + name: "Carol".to_string(), + }], + web_url: format!("https://gitlab.example.com/group/repo/-/merge_requests/{iid}"), + merge_commit_sha: None, + squash_commit_sha: None, + } +} + +fn get_dirty_keys(conn: &rusqlite::Connection) -> Vec<(String, i64)> { + let mut stmt = conn + .prepare("SELECT source_type, source_id FROM dirty_sources ORDER BY source_type, source_id") + .unwrap(); + stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .collect::, _>>() + .unwrap() +} + +// --------------------------------------------------------------------------- +// is_stale — TOCTOU guard +// --------------------------------------------------------------------------- + +#[test] +fn test_is_stale_parses_iso8601() { + // 2024-01-15T10:00:00.000Z → 1_705_312_800_000 ms + let payload_ts = "2024-01-15T10:00:00.000Z"; + let db_ts = Some(1_705_312_800_000i64); + // Same timestamp → stale (payload is NOT newer). + assert!(is_stale(payload_ts, db_ts).unwrap()); +} + +#[test] +fn test_is_stale_handles_none_db_value() { + // First-ever ingest: no row in DB → not stale. + let payload_ts = "2024-01-15T10:00:00.000Z"; + assert!(!is_stale(payload_ts, None).unwrap()); +} + +#[test] +fn test_is_stale_newer_payload_is_not_stale() { + // DB has T1, payload has T2 (1 second later) → not stale. + let payload_ts = "2024-01-15T10:00:01.000Z"; + let db_ts = Some(1_705_312_800_000i64); + assert!(!is_stale(payload_ts, db_ts).unwrap()); +} + +#[test] +fn test_is_stale_older_payload_is_stale() { + // DB has T2, payload has T1 (1 second earlier) → stale. + let payload_ts = "2024-01-15T09:59:59.000Z"; + let db_ts = Some(1_705_312_800_000i64); + assert!(is_stale(payload_ts, db_ts).unwrap()); +} + +#[test] +fn test_is_stale_parses_timezone_offset() { + // GitLab sometimes returns +00:00 instead of Z. + let payload_ts = "2024-01-15T10:00:00.000+00:00"; + let db_ts = Some(1_705_312_800_000i64); + assert!(is_stale(payload_ts, db_ts).unwrap()); +} + +#[test] +fn test_is_stale_with_z_suffix() { + // Z suffix (no ms) also parses correctly. + let payload_ts = "2024-01-15T10:00:00Z"; + let db_ts = Some(1_705_312_800_000i64); + assert!(is_stale(payload_ts, db_ts).unwrap()); +} + +#[test] +fn test_is_stale_invalid_timestamp_returns_error() { + let result = is_stale("not-a-timestamp", Some(0)); + assert!(result.is_err()); +} + +// --------------------------------------------------------------------------- +// SurgicalTarget +// --------------------------------------------------------------------------- + +#[test] +fn test_surgical_target_display_issue() { + let target = SurgicalTarget::Issue { iid: 42 }; + assert_eq!(target.entity_type(), "issue"); + assert_eq!(target.iid(), 42); +} + +#[test] +fn test_surgical_target_display_mr() { + let target = SurgicalTarget::MergeRequest { iid: 99 }; + assert_eq!(target.entity_type(), "merge_request"); + assert_eq!(target.iid(), 99); +} + +// --------------------------------------------------------------------------- +// ingest_issue_by_iid — full DB integration +// --------------------------------------------------------------------------- + +#[test] +fn test_ingest_issue_by_iid_upserts_and_marks_dirty() { + let conn = setup_db(); + let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + let config = test_config(); + + let result = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + + assert!(result.upserted); + assert!(!result.skipped_stale); + assert_eq!(result.labels_created, 1); // "bug" label + + // Verify dirty marking. + let dirty = get_dirty_keys(&conn); + assert!( + dirty.iter().any(|(t, _)| t == "issue"), + "Expected dirty issue entry, got: {dirty:?}" + ); +} + +#[test] +fn test_ingest_issue_returns_dirty_source_keys() { + let conn = setup_db(); + let issue = make_test_issue(7, "2026-02-17T12:00:00.000+00:00"); + let config = test_config(); + + let result = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + + assert_eq!(result.dirty_source_keys.len(), 1); + let (source_type, source_id) = &result.dirty_source_keys[0]; + assert_eq!(source_type.to_string(), "issue"); + assert!(*source_id > 0); +} + +#[test] +fn test_toctou_skips_stale_issue() { + let conn = setup_db(); + let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + let config = test_config(); + + // First ingest succeeds. + let first = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + assert!(first.upserted); + + // Same timestamp again → stale. + let second = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + assert!(second.skipped_stale); + assert!(!second.upserted); +} + +#[test] +fn test_toctou_allows_newer_issue() { + let conn = setup_db(); + let config = test_config(); + + // First ingest at T1. + let issue_t1 = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + let first = ingest_issue_by_iid(&conn, &config, 1, &issue_t1).unwrap(); + assert!(first.upserted); + + // Second ingest at T2 (1 minute later) → not stale. + let issue_t2 = make_test_issue(42, "2026-02-17T12:01:00.000+00:00"); + let second = ingest_issue_by_iid(&conn, &config, 1, &issue_t2).unwrap(); + assert!(second.upserted); + assert!(!second.skipped_stale); +} + +#[test] +fn test_ingest_issue_updates_existing() { + let conn = setup_db(); + let config = test_config(); + + // First ingest. + let mut issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + + // Update title and timestamp. + issue.title = "Updated title".to_string(); + issue.updated_at = "2026-02-17T13:00:00.000+00:00".to_string(); + ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + + // Verify the title was updated in DB. + let title: String = conn + .query_row( + "SELECT title FROM issues WHERE project_id = 1 AND iid = 42", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(title, "Updated title"); +} + +// --------------------------------------------------------------------------- +// ingest_mr_by_iid — full DB integration +// --------------------------------------------------------------------------- + +#[test] +fn test_ingest_mr_by_iid_upserts_and_marks_dirty() { + let conn = setup_db(); + let mr = make_test_mr(99, "2026-02-17T12:00:00.000+00:00"); + let config = test_config(); + + let result = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + + assert!(result.upserted); + assert!(!result.skipped_stale); + assert_eq!(result.reviewers_linked, 1); // "carol" + + let dirty = get_dirty_keys(&conn); + assert!( + dirty.iter().any(|(t, _)| t == "merge_request"), + "Expected dirty MR entry, got: {dirty:?}" + ); +} + +#[test] +fn test_ingest_mr_returns_dirty_source_keys() { + let conn = setup_db(); + let mr = make_test_mr(99, "2026-02-17T12:00:00.000+00:00"); + let config = test_config(); + + let result = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + + assert_eq!(result.dirty_source_keys.len(), 1); + let (source_type, source_id) = &result.dirty_source_keys[0]; + assert_eq!(source_type.to_string(), "merge_request"); + assert!(*source_id > 0); +} + +#[test] +fn test_toctou_skips_stale_mr() { + let conn = setup_db(); + let mr = make_test_mr(99, "2026-02-17T12:00:00.000+00:00"); + let config = test_config(); + + let first = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + assert!(first.upserted); + + let second = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + assert!(second.skipped_stale); + assert!(!second.upserted); +} + +#[test] +fn test_toctou_allows_newer_mr() { + let conn = setup_db(); + let config = test_config(); + + let mr_t1 = make_test_mr(99, "2026-02-17T12:00:00.000+00:00"); + let first = ingest_mr_by_iid(&conn, &config, 1, &mr_t1).unwrap(); + assert!(first.upserted); + + let mr_t2 = make_test_mr(99, "2026-02-17T12:01:00.000+00:00"); + let second = ingest_mr_by_iid(&conn, &config, 1, &mr_t2).unwrap(); + assert!(second.upserted); + assert!(!second.skipped_stale); +} + +#[test] +fn test_ingest_mr_updates_existing() { + let conn = setup_db(); + let config = test_config(); + + let mut mr = make_test_mr(99, "2026-02-17T12:00:00.000+00:00"); + ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + + mr.title = "Updated MR title".to_string(); + mr.updated_at = "2026-02-17T13:00:00.000+00:00".to_string(); + ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + + let title: String = conn + .query_row( + "SELECT title FROM merge_requests WHERE project_id = 1 AND iid = 99", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(title, "Updated MR title"); +} + +// --------------------------------------------------------------------------- +// preflight_fetch — wiremock (async) +// --------------------------------------------------------------------------- + +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +use crate::gitlab::GitLabClient; +use crate::ingestion::surgical::preflight_fetch; + +#[tokio::test] +async fn test_preflight_fetch_returns_issues_and_mrs() { + let server = MockServer::start().await; + + let issue_json = serde_json::json!({ + "id": 1042, + "iid": 42, + "project_id": 100, + "title": "Fetched issue", + "description": null, + "state": "opened", + "created_at": "2026-02-17T10:00:00.000+00:00", + "updated_at": "2026-02-17T12:00:00.000+00:00", + "closed_at": null, + "author": { "id": 1, "username": "alice", "name": "Alice" }, + "assignees": [], + "labels": [], + "milestone": null, + "due_date": null, + "web_url": "https://gitlab.example.com/g/p/-/issues/42" + }); + + let mr_json = serde_json::json!({ + "id": 2099, + "iid": 99, + "project_id": 100, + "title": "Fetched MR", + "description": null, + "state": "opened", + "draft": false, + "work_in_progress": false, + "source_branch": "feat", + "target_branch": "main", + "sha": "abc", + "references": { "short": "!99", "full": "g/p!99" }, + "detailed_merge_status": "mergeable", + "created_at": "2026-02-17T10:00:00.000+00:00", + "updated_at": "2026-02-17T12:00:00.000+00:00", + "merged_at": null, + "closed_at": null, + "author": { "id": 2, "username": "bob", "name": "Bob" }, + "merge_user": null, + "merged_by": null, + "labels": [], + "assignees": [], + "reviewers": [], + "web_url": "https://gitlab.example.com/g/p/-/merge_requests/99", + "merge_commit_sha": null, + "squash_commit_sha": null + }); + + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/issues/42")) + .respond_with(ResponseTemplate::new(200).set_body_json(&issue_json)) + .mount(&server) + .await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/merge_requests/99")) + .respond_with(ResponseTemplate::new(200).set_body_json(&mr_json)) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(1000.0)); + let targets = vec![ + SurgicalTarget::Issue { iid: 42 }, + SurgicalTarget::MergeRequest { iid: 99 }, + ]; + + let result = preflight_fetch(&client, 100, "g/p", &targets) + .await + .unwrap(); + + assert_eq!(result.issues.len(), 1); + assert_eq!(result.issues[0].iid, 42); + assert_eq!(result.merge_requests.len(), 1); + assert_eq!(result.merge_requests[0].iid, 99); + assert!(result.failures.is_empty()); +} + +#[tokio::test] +async fn test_preflight_fetch_collects_failures() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/issues/999")) + .respond_with(ResponseTemplate::new(404)) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(1000.0)); + let targets = vec![SurgicalTarget::Issue { iid: 999 }]; + + let result = preflight_fetch(&client, 100, "g/p", &targets) + .await + .unwrap(); + + assert!(result.issues.is_empty()); + assert_eq!(result.failures.len(), 1); + assert_eq!(result.failures[0].target.iid(), 999); +} + +// --------------------------------------------------------------------------- +// Per-entity dependent helpers (bd-kanh) +// --------------------------------------------------------------------------- + +use crate::ingestion::surgical::{ + enrich_entity_resource_events, enrich_mr_closes_issues, enrich_mr_file_changes, +}; + +#[tokio::test] +async fn test_enrich_resource_events_stores_and_watermarks() { + let conn = setup_db(); + let config = test_config(); + let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + + let local_id: i64 = conn + .query_row( + "SELECT id FROM issues WHERE project_id = 1 AND iid = 42", + [], + |r| r.get(0), + ) + .unwrap(); + + let server = MockServer::start().await; + + // Mock all 3 resource event endpoints returning empty arrays + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/42/resource_state_events")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/42/resource_label_events")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path( + "/api/v4/projects/42/issues/42/resource_milestone_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(1000.0)); + enrich_entity_resource_events(&client, &conn, 1, 42, "issue", 42, local_id) + .await + .unwrap(); + + // Verify watermark was set + let watermark: Option = conn + .query_row( + "SELECT resource_events_synced_for_updated_at FROM issues WHERE id = ?", + [local_id], + |r| r.get(0), + ) + .unwrap(); + assert!(watermark.is_some()); +} + +#[tokio::test] +async fn test_enrich_mr_closes_issues_stores_refs() { + let conn = setup_db(); + let config = test_config(); + let mr = make_test_mr(99, "2026-02-17T12:00:00.000+00:00"); + ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + + let local_mr_id: i64 = conn + .query_row( + "SELECT id FROM merge_requests WHERE project_id = 1 AND iid = 99", + [], + |r| r.get(0), + ) + .unwrap(); + + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/merge_requests/99/closes_issues")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([ + { + "id": 1042, + "iid": 42, + "project_id": 42, + "title": "Closed issue", + "state": "closed", + "web_url": "https://gitlab.example.com/group/repo/-/issues/42" + } + ]))) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(1000.0)); + enrich_mr_closes_issues(&client, &conn, 1, 42, 99, local_mr_id) + .await + .unwrap(); + + // Verify entity_reference was created + let ref_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM entity_references + WHERE source_entity_type = 'merge_request' AND source_entity_id = ? + AND reference_type = 'closes'", + [local_mr_id], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(ref_count, 1); + + // Verify watermark + let watermark: Option = conn + .query_row( + "SELECT closes_issues_synced_for_updated_at FROM merge_requests WHERE id = ?", + [local_mr_id], + |r| r.get(0), + ) + .unwrap(); + assert!(watermark.is_some()); +} + +#[tokio::test] +async fn test_enrich_mr_file_changes_stores_diffs() { + let conn = setup_db(); + let config = test_config(); + let mr = make_test_mr(99, "2026-02-17T12:00:00.000+00:00"); + ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + + let local_mr_id: i64 = conn + .query_row( + "SELECT id FROM merge_requests WHERE project_id = 1 AND iid = 99", + [], + |r| r.get(0), + ) + .unwrap(); + + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/merge_requests/99/diffs")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([ + { + "old_path": "src/main.rs", + "new_path": "src/main.rs", + "new_file": false, + "renamed_file": false, + "deleted_file": false + } + ]))) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(1000.0)); + enrich_mr_file_changes(&client, &conn, 1, 42, 99, local_mr_id) + .await + .unwrap(); + + // Verify file change was stored + let fc_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM mr_file_changes WHERE merge_request_id = ?", + [local_mr_id], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(fc_count, 1); + + // Verify watermark + let watermark: Option = conn + .query_row( + "SELECT diffs_synced_for_updated_at FROM merge_requests WHERE id = ?", + [local_mr_id], + |r| r.get(0), + ) + .unwrap(); + assert!(watermark.is_some()); +} + +// --------------------------------------------------------------------------- +// Integration tests (bd-3jqx) +// --------------------------------------------------------------------------- + +/// Preflight fetch with a mix of success and 404 — verify partial results. +#[tokio::test] +async fn test_surgical_cancellation_during_preflight() { + // Test that preflight handles partial failures gracefully: one issue exists, + // another returns 404. The existing issue should succeed, the missing one + // should be recorded as a failure — not abort the entire preflight. + let server = MockServer::start().await; + + // Issue 7 exists + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/7")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "id": 1007, "iid": 7, "project_id": 42, + "title": "Existing issue", "description": "desc", + "state": "opened", + "created_at": "2026-02-17T10:00:00.000+00:00", + "updated_at": "2026-02-17T12:00:00.000+00:00", + "closed_at": null, + "author": {"id": 1, "username": "alice", "name": "Alice"}, + "assignees": [], "labels": [], "milestone": null, "due_date": null, + "web_url": "https://gitlab.example.com/group/repo/-/issues/7" + }))) + .mount(&server) + .await; + + // Issue 999 does not exist + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/issues/999")) + .respond_with(ResponseTemplate::new(404).set_body_json(serde_json::json!({ + "message": "404 Not Found" + }))) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(1000.0)); + let targets = vec![ + SurgicalTarget::Issue { iid: 7 }, + SurgicalTarget::Issue { iid: 999 }, + ]; + + let result = preflight_fetch(&client, 42, "group/repo", &targets) + .await + .unwrap(); + + assert_eq!(result.issues.len(), 1, "One issue should succeed"); + assert_eq!(result.issues[0].iid, 7); + assert_eq!(result.failures.len(), 1, "One issue should fail"); + assert_eq!(result.failures[0].target.iid(), 999); +} + +/// Preflight fetch for MRs: one succeeds, one gets 404. +#[tokio::test] +async fn test_surgical_timeout_during_fetch() { + // Tests mixed MR preflight: one MR found, one returns 404. + // The found MR proceeds; the missing MR is recorded as a failure. + let server = MockServer::start().await; + + // MR 10 exists + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/merge_requests/10")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "id": 2010, "iid": 10, "project_id": 42, + "title": "Test MR !10", "description": "desc", + "state": "opened", "draft": false, "work_in_progress": false, + "source_branch": "feat", "target_branch": "main", + "sha": "abc123", + "references": {"short": "!10", "full": "group/repo!10"}, + "detailed_merge_status": "mergeable", + "created_at": "2026-02-17T10:00:00.000+00:00", + "updated_at": "2026-02-17T12:00:00.000+00:00", + "merged_at": null, "closed_at": null, + "author": {"id": 2, "username": "bob", "name": "Bob"}, + "merge_user": null, "merged_by": null, + "labels": [], "assignees": [], "reviewers": [], + "web_url": "https://gitlab.example.com/group/repo/-/merge_requests/10", + "merge_commit_sha": null, "squash_commit_sha": null + }))) + .mount(&server) + .await; + + // MR 888 does not exist + Mock::given(method("GET")) + .and(path("/api/v4/projects/42/merge_requests/888")) + .respond_with(ResponseTemplate::new(404).set_body_json(serde_json::json!({ + "message": "404 Not Found" + }))) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(1000.0)); + let targets = vec![ + SurgicalTarget::MergeRequest { iid: 10 }, + SurgicalTarget::MergeRequest { iid: 888 }, + ]; + + let result = preflight_fetch(&client, 42, "group/repo", &targets) + .await + .unwrap(); + + assert_eq!(result.merge_requests.len(), 1, "One MR should succeed"); + assert_eq!(result.merge_requests[0].iid, 10); + assert_eq!(result.failures.len(), 1, "One MR should fail"); + assert_eq!(result.failures[0].target.iid(), 888); +} + +/// Verify that only the surgically ingested entity gets dirty-tracked. +#[tokio::test] +async fn test_surgical_embed_isolation() { + let conn = setup_db(); + let config = test_config(); + + // Pre-seed a second issue that should NOT be dirty-tracked + let existing_issue = make_test_issue(1, "2024-06-01T00:00:00.000+00:00"); + ingest_issue_by_iid(&conn, &config, 1, &existing_issue).unwrap(); + + // Clear any dirty entries from the pre-seed + conn.execute("DELETE FROM dirty_sources", []).unwrap(); + + // Now surgically ingest issue #42 + let new_issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + let result = ingest_issue_by_iid(&conn, &config, 1, &new_issue).unwrap(); + + assert!(result.upserted); + assert!(!result.skipped_stale); + + // Only issue 42 should be dirty-tracked + let dirty = get_dirty_keys(&conn); + assert_eq!( + dirty.len(), + 1, + "Only the surgically ingested issue should be dirty" + ); + assert_eq!(dirty[0].0, "issue"); + + // Verify the dirty source points to the correct local issue id + let local_id: i64 = conn + .query_row( + "SELECT id FROM issues WHERE project_id = 1 AND iid = 42", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(dirty[0].1, local_id); +} + +/// Verify that ingested data in the DB matches the GitLab payload fields exactly. +#[tokio::test] +async fn test_surgical_payload_integrity() { + let conn = setup_db(); + let config = test_config(); + + let issue = GitLabIssue { + id: 5555, + iid: 77, + project_id: 42, + title: "Payload integrity test".to_string(), + description: Some("Detailed description with **markdown**".to_string()), + state: "closed".to_string(), + created_at: "2025-03-10T08:30:00.000+00:00".to_string(), + updated_at: "2026-01-20T14:45:00.000+00:00".to_string(), + closed_at: Some("2026-01-20T14:45:00.000+00:00".to_string()), + author: GitLabAuthor { + id: 99, + username: "integrity_user".to_string(), + name: "Integrity Tester".to_string(), + }, + assignees: vec![GitLabAuthor { + id: 100, + username: "assignee1".to_string(), + name: "Assignee One".to_string(), + }], + labels: vec!["priority::high".to_string(), "type::bug".to_string()], + milestone: None, + due_date: Some("2026-02-01".to_string()), + web_url: "https://gitlab.example.com/group/repo/-/issues/77".to_string(), + }; + + let result = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + assert!(result.upserted); + + // Verify core fields in DB match the payload + let (db_title, db_state, db_description, db_author, db_web_url, db_iid, db_gitlab_id): ( + String, + String, + Option, + String, + String, + i64, + i64, + ) = conn + .query_row( + "SELECT title, state, description, author_username, web_url, iid, gitlab_id + FROM issues + WHERE project_id = 1 AND iid = 77", + [], + |r| { + Ok(( + r.get(0)?, + r.get(1)?, + r.get(2)?, + r.get(3)?, + r.get(4)?, + r.get(5)?, + r.get(6)?, + )) + }, + ) + .unwrap(); + + assert_eq!(db_title, "Payload integrity test"); + assert_eq!(db_state, "closed"); + assert_eq!( + db_description.as_deref(), + Some("Detailed description with **markdown**") + ); + assert_eq!(db_author, "integrity_user"); + assert_eq!( + db_web_url, + "https://gitlab.example.com/group/repo/-/issues/77" + ); + assert_eq!(db_iid, 77); + assert_eq!(db_gitlab_id, 5555); + + // Verify labels were created and linked + let label_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM issue_labels il + JOIN labels l ON il.label_id = l.id + JOIN issues i ON il.issue_id = i.id + WHERE i.iid = 77 AND i.project_id = 1", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(label_count, 2, "Both labels should be linked"); +} diff --git a/src/main.rs b/src/main.rs index afe1704..5274a77 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,21 +17,22 @@ use lore::cli::commands::{ print_event_count_json, print_file_history, print_file_history_json, print_generate_docs, print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json, print_list_notes, - print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl, print_search_results, - print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr, - print_show_mr_json, print_stats, print_stats_json, print_sync, print_sync_json, - print_sync_status, print_sync_status_json, print_timeline, print_timeline_json_with_meta, - print_trace, print_trace_json, print_who_human, print_who_json, query_notes, run_auth_test, - run_count, run_count_events, run_doctor, run_drift, run_embed, run_file_history, - run_generate_docs, run_ingest, run_ingest_dry_run, run_init, run_list_issues, run_list_mrs, - run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, run_timeline, - run_tui, run_who, + print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl, print_related, + print_related_json, print_search_results, print_search_results_json, print_show_issue, + print_show_issue_json, print_show_mr, print_show_mr_json, print_stats, print_stats_json, + print_sync, print_sync_json, print_sync_status, print_sync_status_json, print_timeline, + print_timeline_json_with_meta, print_trace, print_trace_json, print_who_human, print_who_json, + query_notes, run_auth_test, run_count, run_count_events, run_doctor, run_drift, run_embed, + run_file_history, run_generate_docs, run_ingest, run_ingest_dry_run, run_init, run_list_issues, + run_list_mrs, run_related, run_search, run_show_issue, run_show_mr, run_stats, run_sync, + run_sync_status, run_timeline, run_tui, run_who, }; use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme}; use lore::cli::robot::{RobotMeta, strip_schemas}; use lore::cli::{ Cli, Commands, CountArgs, EmbedArgs, FileHistoryArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, - MrsArgs, NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, TraceArgs, WhoArgs, + MrsArgs, NotesArgs, RelatedArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, TraceArgs, + WhoArgs, }; use lore::core::db::{ LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations, @@ -204,6 +205,9 @@ async fn main() { handle_file_history(cli.config.as_deref(), args, robot_mode) } Some(Commands::Trace(args)) => handle_trace(cli.config.as_deref(), args, robot_mode), + Some(Commands::Related(args)) => { + handle_related(cli.config.as_deref(), args, robot_mode).await + } Some(Commands::Tui(args)) => run_tui(&args, robot_mode), Some(Commands::Drift { entity_type, @@ -732,6 +736,8 @@ fn suggest_similar_command(invalid: &str) -> String { ("drift", "drift"), ("file-history", "file-history"), ("trace", "trace"), + ("related", "related"), + ("similar", "related"), ]; let invalid_lower = invalid.to_lowercase(); @@ -2214,6 +2220,14 @@ async fn handle_sync_cmd( if args.no_status { config.sync.fetch_work_item_status = false; } + // Dedup surgical IIDs + let mut issue_iids = args.issue; + let mut mr_iids = args.mr; + issue_iids.sort_unstable(); + issue_iids.dedup(); + mr_iids.sort_unstable(); + mr_iids.dedup(); + let options = SyncOptions { full: args.full && !args.no_full, force: args.force && !args.no_force, @@ -2222,8 +2236,46 @@ async fn handle_sync_cmd( no_events: args.no_events, robot_mode, dry_run, + issue_iids, + mr_iids, + project: args.project, + preflight_only: args.preflight_only, }; + // Surgical sync validation + if options.is_surgical() { + let total = options.issue_iids.len() + options.mr_iids.len(); + if total > SyncOptions::MAX_SURGICAL_TARGETS { + return Err(format!( + "Too many surgical targets ({total}). Maximum is {}.", + SyncOptions::MAX_SURGICAL_TARGETS + ) + .into()); + } + if options.full { + return Err("--full is incompatible with surgical sync (--issue / --mr).".into()); + } + if options.no_docs && !options.no_embed { + return Err( + "--no-docs without --no-embed in surgical mode would leave stale embeddings. \ + Add --no-embed or remove --no-docs." + .into(), + ); + } + if config + .effective_project(options.project.as_deref()) + .is_none() + { + return Err( + "Surgical sync requires a project. Use -p or set defaultProject in config." + .into(), + ); + } + } + if options.preflight_only && !options.is_surgical() { + return Err("--preflight-only requires --issue or --mr.".into()); + } + // For dry run, skip recording and just show the preview if dry_run { let signal = ShutdownSignal::new(); @@ -2231,6 +2283,31 @@ async fn handle_sync_cmd( return Ok(()); } + // Surgical sync manages its own recorder, lock, and signal internally. + // Dispatch early to avoid creating a redundant outer recorder. + if options.is_surgical() { + let signal = ShutdownSignal::new(); + let signal_for_handler = signal.clone(); + tokio::spawn(async move { + let _ = tokio::signal::ctrl_c().await; + eprintln!("\nInterrupted, finishing current batch... (Ctrl+C again to force quit)"); + signal_for_handler.cancel(); + let _ = tokio::signal::ctrl_c().await; + std::process::exit(130); + }); + + let start = std::time::Instant::now(); + let result = run_sync(&config, options, None, &signal).await?; + let elapsed = start.elapsed(); + + if robot_mode { + print_sync_json(&result, elapsed.as_millis() as u64, Some(metrics)); + } else { + print_sync(&result, elapsed, Some(metrics), args.timings); + } + return Ok(()); + } + let db_path = get_db_path(config.storage.db_path.as_deref()); let recorder_conn = create_connection(&db_path)?; let run_id = uuid::Uuid::new_v4().simple().to_string(); @@ -2504,13 +2581,24 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box generate-docs -> embed", - "flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--dry-run", "--no-dry-run"], + "description": "Full sync pipeline: ingest -> generate-docs -> embed. Supports surgical per-IID sync with --issue/--mr.", + "flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--dry-run", "--no-dry-run", "--issue ", "--mr ", "-p/--project ", "--preflight-only"], "example": "lore --robot sync", + "notes": { + "surgical_sync": "Pass --issue and/or --mr (repeatable) with -p to sync specific entities instead of a full pipeline. Incompatible with --full.", + "preflight_only": "--preflight-only validates that entities exist on GitLab without writing to the DB. Requires --issue or --mr." + }, "response_schema": { - "ok": "bool", - "data": {"issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "resource_events_synced": "int", "resource_events_failed": "int"}, - "meta": {"elapsed_ms": "int", "stages?": "[{name:string, elapsed_ms:int, items_processed:int}]"} + "bulk": { + "ok": "bool", + "data": {"issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "resource_events_synced": "int", "resource_events_failed": "int"}, + "meta": {"elapsed_ms": "int", "stages?": "[{name:string, elapsed_ms:int, items_processed:int}]"} + }, + "surgical": { + "ok": "bool", + "data": {"surgical_mode": "true", "surgical_iids": "{issues:[int], merge_requests:[int]}", "issues_updated": "int", "mrs_updated": "int", "entity_results": "[{entity_type:string, iid:int, outcome:string, error:string?, toctou_reason:string?}]", "preflight_only": "bool?", "documents_regenerated": "int", "documents_embedded": "int"}, + "meta": {"elapsed_ms": "int"} + } } }, "issues": { @@ -2967,6 +3055,63 @@ async fn handle_drift( Ok(()) } +async fn handle_related( + config_override: Option<&str>, + args: RelatedArgs, + robot_mode: bool, +) -> Result<(), Box> { + let start = std::time::Instant::now(); + let config = Config::load(config_override)?; + + // Determine mode: if first arg is a known entity type AND iid is provided, use entity mode. + // Otherwise treat the first arg as free-text query. + let is_entity_type = matches!( + args.query_or_type.as_str(), + "issues" | "issue" | "mrs" | "mr" | "merge-requests" + ); + + let effective_project = config + .effective_project(args.project.as_deref()) + .map(String::from); + + let response = if is_entity_type && args.iid.is_some() { + run_related( + &config, + Some(args.query_or_type.as_str()), + args.iid, + None, + effective_project.as_deref(), + args.limit, + ) + .await? + } else if is_entity_type && args.iid.is_none() { + return Err(format!( + "Entity type '{}' requires an IID. Usage: lore related {} ", + args.query_or_type, args.query_or_type + ) + .into()); + } else { + run_related( + &config, + None, + None, + Some(args.query_or_type.as_str()), + effective_project.as_deref(), + args.limit, + ) + .await? + }; + + let elapsed_ms = start.elapsed().as_millis() as u64; + + if robot_mode { + print_related_json(&response, elapsed_ms); + } else { + print_related(&response); + } + Ok(()) +} + #[allow(clippy::too_many_arguments)] async fn handle_list_compat( config_override: Option<&str>,