diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index e45a420..2589879 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -40,7 +40,7 @@ {"id":"bd-1hoq","title":"Restructure expert SQL with CTE-based dual-path matching","description":"## Background\nThe current query_expert() at who.rs:641 uses a 4-signal UNION ALL that only matches position_new_path and new_path, with flat COUNT-based scoring computed entirely in SQL. The new model needs dual-path matching, 5 signal types, state-aware timestamps, and returns per-signal rows for Rust-side decay computation (bd-13q8).\n\n## Approach\n**Important**: This bead builds the new SQL as a separate function WITHOUT modifying query_expert() yet. bd-13q8 wires it into query_expert(). This keeps this bead independently testable.\n\nAdd a new function:\n```rust\n/// Build the CTE-based expert scoring SQL for a given path query mode.\n/// Returns SQL string. Params: ?1=path, ?2=since_ms, ?3=project_id, ?4=as_of_ms, ?5=closed_mr_multiplier, ?6=reviewer_min_note_chars\nfn build_expert_sql(path_op: &str) -> String {\n // ... format the SQL with {path_op} inlined, all config values as bound params\n}\n```\n\n### SQL structure (8 CTEs + final SELECT):\n1. **matched_notes_raw**: UNION ALL on position_new_path + position_old_path\n2. **matched_notes**: DISTINCT dedup by id\n3. **matched_file_changes_raw**: UNION ALL on new_path + old_path\n4. **matched_file_changes**: DISTINCT dedup by (merge_request_id, project_id)\n5. **mr_activity**: Centralized state-aware timestamps AND state_mult. Joins merge_requests via matched_file_changes. Computes:\n - activity_ts: CASE WHEN state='merged' THEN COALESCE(merged_at, created_at) WHEN state='closed' THEN COALESCE(closed_at, created_at) ELSE COALESCE(updated_at, created_at) END\n - state_mult: CASE WHEN state='closed' THEN ?5 ELSE 1.0 END\n6. **reviewer_participation**: substantive DiffNotes WHERE LENGTH(TRIM(body)) >= ?6\n7. **raw**: 5 signals (diffnote_reviewer, diffnote_author, file_author, file_reviewer_participated, file_reviewer_assigned). Signals 1-2 compute state_mult inline. Signals 3-4a-4b reference mr_activity.\n8. **aggregated**: MR-level GROUP BY + note_group with COUNT\n\n### Returns 6 columns: (username TEXT, signal TEXT, mr_id INTEGER, qty INTEGER, ts INTEGER, state_mult REAL)\n\nSee plans/time-decay-expert-scoring.md section 3 for the full SQL template.\n\n## TDD Loop\n\n### RED (write first):\n```rust\n#[test]\nfn test_expert_sql_returns_expected_signal_rows() {\n let conn = setup_test_db();\n insert_project(&conn, 1, \"team/backend\");\n insert_mr(&conn, 1, 1, 100, \"alice\", \"merged\");\n insert_file_change(&conn, 1, 1, \"src/app.rs\", \"modified\");\n insert_reviewer(&conn, 1, \"bob\");\n insert_reviewer(&conn, 1, \"carol\");\n insert_discussion(&conn, 1, 1, Some(1), None, true, false);\n insert_diffnote(&conn, 1, 1, 1, \"carol\", \"src/app.rs\", \"This needs error handling for the edge case\");\n\n let sql = build_expert_sql(\"= ?1\");\n let mut stmt = conn.prepare(&sql).unwrap();\n let rows: Vec<(String, String, i64, i64, i64, f64)> = stmt\n .query_map(\n rusqlite::params![\"src/app.rs\", 0_i64, Option::::None, now_ms() + 1000, 0.5_f64, 20_i64],\n |row| Ok((\n row.get(0).unwrap(), row.get(1).unwrap(), row.get(2).unwrap(),\n row.get(3).unwrap(), row.get(4).unwrap(), row.get(5).unwrap(),\n ))\n ).unwrap().filter_map(|r| r.ok()).collect();\n\n // alice: file_author\n assert!(rows.iter().any(|(u, s, ..)| u == \"alice\" && s == \"file_author\"));\n // carol: file_reviewer_participated (left substantive DiffNote)\n assert!(rows.iter().any(|(u, s, ..)| u == \"carol\" && s == \"file_reviewer_participated\"));\n // bob: file_reviewer_assigned (no DiffNotes)\n assert!(rows.iter().any(|(u, s, ..)| u == \"bob\" && s == \"file_reviewer_assigned\"));\n // carol: note_group\n assert!(rows.iter().any(|(u, s, ..)| u == \"carol\" && s == \"note_group\"));\n // alice: diffnote_author\n assert!(rows.iter().any(|(u, s, ..)| u == \"alice\" && s == \"diffnote_author\"));\n // All merged rows have state_mult = 1.0\n assert!(rows.iter().all(|(.., sm)| (sm - 1.0).abs() < f64::EPSILON));\n}\n```\n\n### GREEN: Implement build_expert_sql() with the 8 CTEs.\n### VERIFY: cargo test -p lore -- test_expert_sql_returns_expected_signal_rows\n\n## Acceptance Criteria\n- [ ] test_expert_sql_returns_expected_signal_rows passes (all 5 signal types correct)\n- [ ] SQL compiles against :memory: DB with indexes from bd-2ao4 (migration 026)\n- [ ] 6 columns returned: username, signal, mr_id, qty, ts, state_mult (REAL, not TEXT)\n- [ ] 6 SQL params: ?1=path, ?2=since_ms, ?3=project_id, ?4=as_of_ms, ?5=closed_mr_multiplier, ?6=reviewer_min_note_chars\n- [ ] mr_activity CTE centralizes timestamp + state_mult (not repeated)\n- [ ] reviewer_participation uses ?6 not inlined literal\n- [ ] Existing query_expert() and all existing tests UNTOUCHED\n- [ ] build_expert_sql() is a pure function (no Connection param)\n\n## Files\n- MODIFY: src/cli/commands/who.rs (new build_expert_sql function + test, placed near query_expert at line ~641)\n\n## Edge Cases\n- ?5 (closed_mr_multiplier) bound as f64 — rusqlite handles this\n- ?6 (reviewer_min_note_chars) bound as i64 — SQLite LENGTH returns integer\n- Signals 1-2 compute state_mult inline (join through discussions, not mr_activity)\n- COALESCE fallback to created_at for NULL merged_at/closed_at/updated_at\n- Dedup in matched_notes/matched_file_changes prevents double-counting","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-09T16:59:44.665314Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.410514Z","closed_at":"2026-02-12T20:43:04.410470Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"],"dependencies":[{"issue_id":"bd-1hoq","depends_on_id":"bd-1soz","type":"blocks","created_at":"2026-02-09T17:01:11.108727Z","created_by":"tayloreernisse"},{"issue_id":"bd-1hoq","depends_on_id":"bd-2ao4","type":"blocks","created_at":"2026-02-09T17:01:11.053353Z","created_by":"tayloreernisse"},{"issue_id":"bd-1hoq","depends_on_id":"bd-2w1p","type":"blocks","created_at":"2026-02-09T17:01:10.996731Z","created_by":"tayloreernisse"}]} {"id":"bd-1ht","title":"Epic: Gate 5 - Code Trace (lore trace)","description":"## Background\n\nGate 5 implements 'lore trace' — answers 'Why was this code introduced?' by tracing from a file path through the MR that modified it, to the issue that motivated the MR, to the discussions with decision rationale. Capstone of Phase B.\n\nGate 5 ships Tier 1 only (API-only, no local git). Tier 2 (git blame via git2-rs) deferred to Phase C.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Gate 5 (Sections 5.1-5.7).\n\n## Prerequisites\n\n- Gates 1-2 COMPLETE: entity_references populated, resource events fetched\n- Gate 4 (bd-14q): provides mr_file_changes table + resolve_rename_chain algorithm\n- entity_references source_method: 'api' | 'note_parse' | 'description_parse'\n- discussions/notes tables for DiffNote content\n- merge_requests.merged_at exists (migration 006). Use COALESCE(merged_at, updated_at) for ordering.\n\n## Architecture\n\n- **No new tables.** Trace queries combine mr_file_changes, entity_references, discussions/notes\n- **Query flow:** file -> mr_file_changes -> MRs -> entity_references (closes/related) -> issues -> discussions with DiffNote context\n- **Tier 1:** File-level granularity only. Cannot trace a specific line to its introducing commit.\n- **Path parsing:** Supports 'src/foo.rs:45' syntax — line number parsed but deferred with Tier 2 warning.\n- **Rename aware:** Reuses file_history::resolve_rename_chain for multi-path matching.\n\n## Children (Execution Order)\n\n1. **bd-2n4** — Trace query logic: file -> MR -> issue -> discussion chain (src/core/trace.rs)\n2. **bd-9dd** — CLI command with human + robot output (src/cli/commands/trace.rs)\n\n## Gate Completion Criteria\n\n- [ ] `lore trace ` shows MRs with linked issues + discussion context\n- [ ] Output includes MR -> issue -> discussion chain\n- [ ] DiffNote snippets show content on the traced file\n- [ ] Cross-references from entity_references used for MR->issue linking\n- [ ] :line suffix parses and emits Tier 2 warning\n- [ ] Robot mode JSON with tier: 'api_only'\n- [ ] Graceful handling when no MR data found (suggest sync with fetchMrFileChanges)\n","status":"open","priority":1,"issue_type":"feature","created_at":"2026-02-02T21:31:01.141053Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:57:12.357740Z","compaction_level":0,"original_size":0,"labels":["epic","gate-5","phase-b"],"dependencies":[{"issue_id":"bd-1ht","depends_on_id":"bd-14q","type":"blocks","created_at":"2026-02-02T21:34:38.033428Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ht","depends_on_id":"bd-1se","type":"blocks","created_at":"2026-02-02T21:34:37.987232Z","created_by":"tayloreernisse"}]} {"id":"bd-1i2","title":"Integrate mark_dirty_tx into ingestion modules","description":"## Background\nThis bead integrates dirty source tracking into the existing ingestion pipelines. Every entity upserted during ingestion must be marked dirty so the document regenerator knows to update the corresponding search document. The critical constraint: mark_dirty_tx() must be called INSIDE the same transaction that upserts the entity — not after commit.\n\n**Key PRD clarification:** Mark ALL upserted entities dirty (not just changed ones). The regenerator's hash comparison handles \"unchanged\" detection cheaply — this avoids needing change detection in ingestion.\n\n## Approach\nModify 4 existing ingestion files to add mark_dirty_tx() calls inside existing transaction blocks per PRD Section 6.1.\n\n**1. src/ingestion/issues.rs:**\nInside the issue upsert loop, after each successful INSERT/UPDATE:\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Issue, issue_row.id)?;\n```\n\n**2. src/ingestion/merge_requests.rs:**\nInside the MR upsert loop:\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::MergeRequest, mr_row.id)?;\n```\n\n**3. src/ingestion/discussions.rs:**\nInside discussion insert (issue discussions, full-refresh transaction):\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, discussion_row.id)?;\n```\n\n**4. src/ingestion/mr_discussions.rs:**\nInside discussion upsert (write phase):\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, discussion_row.id)?;\n```\n\n**Discussion Sweep Cleanup (PRD Section 6.1 — CRITICAL):**\nWhen the MR discussion sweep deletes stale discussions (`last_seen_at < run_start_time`), **delete the corresponding document rows directly** — do NOT use the dirty queue for cleanup. The `ON DELETE CASCADE` on `document_labels`/`document_paths` and the `documents_embeddings_ad` trigger handle all downstream cleanup.\n\n**PRD-exact CTE pattern:**\n```sql\n-- In src/ingestion/mr_discussions.rs, during sweep phase.\n-- Uses a CTE to capture stale IDs atomically before cascading deletes.\n-- This is more defensive than two separate statements because the CTE\n-- guarantees the ID set is captured before any row is deleted.\nWITH stale AS (\n SELECT id FROM discussions\n WHERE merge_request_id = ? AND last_seen_at < ?\n)\n-- Step 1: delete orphaned documents (must happen while source_id still resolves)\nDELETE FROM documents\n WHERE source_type = 'discussion' AND source_id IN (SELECT id FROM stale);\n-- Step 2: delete the stale discussions themselves\nDELETE FROM discussions\n WHERE id IN (SELECT id FROM stale);\n```\n\n**NOTE:** If SQLite version doesn't support CTE-based multi-statement, execute as two sequential statements capturing IDs in Rust first:\n```rust\nlet stale_ids: Vec = conn.prepare(\n \"SELECT id FROM discussions WHERE merge_request_id = ? AND last_seen_at < ?\"\n)?.query_map(params![mr_id, run_start], |r| r.get(0))?\n .collect::, _>>()?;\n\nif !stale_ids.is_empty() {\n // Delete documents FIRST (while source_id still resolves)\n conn.execute(\n \"DELETE FROM documents WHERE source_type = 'discussion' AND source_id IN (...)\",\n ...\n )?;\n // Then delete the discussions\n conn.execute(\n \"DELETE FROM discussions WHERE id IN (...)\",\n ...\n )?;\n}\n```\n\n**IMPORTANT difference from dirty queue pattern:** The sweep deletes documents DIRECTLY (not via dirty_sources queue). This is because the source entity is being deleted — there's nothing for the regenerator to regenerate from. The cascade handles FTS, labels, paths, and embeddings cleanup.\n\n## Acceptance Criteria\n- [ ] Every upserted issue is marked dirty inside the same transaction\n- [ ] Every upserted MR is marked dirty inside the same transaction\n- [ ] Every upserted discussion (issue + MR) is marked dirty inside the same transaction\n- [ ] ALL upserted entities marked dirty (not just changed ones) — regenerator handles skip\n- [ ] mark_dirty_tx called with &Transaction (not &Connection)\n- [ ] mark_dirty_tx uses upsert with ON CONFLICT to reset backoff state (not INSERT OR IGNORE)\n- [ ] Discussion sweep deletes documents DIRECTLY (not via dirty queue)\n- [ ] Discussion sweep uses CTE (or Rust-side ID capture) to capture stale IDs before cascading deletes\n- [ ] Documents deleted BEFORE discussions (while source_id still resolves)\n- [ ] ON DELETE CASCADE handles document_labels, document_paths cleanup\n- [ ] documents_embeddings_ad trigger handles embedding cleanup\n- [ ] `cargo build` succeeds\n- [ ] Existing ingestion tests still pass\n\n## Files\n- `src/ingestion/issues.rs` — add mark_dirty_tx calls in upsert loop\n- `src/ingestion/merge_requests.rs` — add mark_dirty_tx calls in upsert loop\n- `src/ingestion/discussions.rs` — add mark_dirty_tx calls in insert loop\n- `src/ingestion/mr_discussions.rs` — add mark_dirty_tx calls + direct document deletion in sweep\n\n## TDD Loop\nRED: Existing tests should still pass (regression); new tests:\n- `test_issue_upsert_marks_dirty` — after issue ingest, dirty_sources has entry\n- `test_mr_upsert_marks_dirty` — after MR ingest, dirty_sources has entry\n- `test_discussion_upsert_marks_dirty` — after discussion ingest, dirty_sources has entry\n- `test_discussion_sweep_deletes_documents` — stale discussion documents deleted directly\n- `test_sweep_cascade_cleans_labels_paths` — ON DELETE CASCADE works\nGREEN: Add mark_dirty_tx calls in all 4 files, implement sweep with CTE\nVERIFY: `cargo test ingestion && cargo build`\n\n## Edge Cases\n- Upsert that doesn't change data: still marks dirty (regenerator hash check handles skip)\n- Transaction rollback: dirty mark also rolled back (atomic, inside same txn)\n- Discussion sweep with zero stale IDs: CTE returns empty, no DELETE executed\n- Large batch of upserts: each mark_dirty_tx is O(1) INSERT with ON CONFLICT\n- Sweep deletes document before discussion: order matters for source_id resolution","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:27:09.540279Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:39:17.241433Z","closed_at":"2026-01-30T17:39:17.241390Z","close_reason":"Added mark_dirty_tx calls in issues.rs, merge_requests.rs, discussions.rs, mr_discussions.rs (2 paths)","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1i2","depends_on_id":"bd-38q","type":"blocks","created_at":"2026-01-30T15:29:35.105551Z","created_by":"tayloreernisse"}]} -{"id":"bd-1j1","title":"Integration test: full Phase B sync pipeline","description":"## Background\n\nThis integration test proves the full Phase B sync pipeline works end-to-end. Since Gates 1 and 2 are already implemented and closed, this test validates that the complete pipeline — including Gate 4 mr_diffs draining — works together.\n\n## Codebase Context\n\n- **Gates 1-2 FULLY IMPLEMENTED (CLOSED):** resource events fetch, closes_issues API, system note parsing (note_parser.rs), entity_references extraction (references.rs)\n- **Gate 4 in progress:** migration 015 (mr_file_changes), fetch_mr_diffs, drain_mr_diffs — this test validates the full chain\n- Migrations 001-014 exist. Migration 015 (bd-1oo) adds mr_file_changes + commit SHAs.\n- Orchestrator has drain_resource_events() and drain_mr_closes_issues(). Gate 4 adds drain_mr_diffs().\n- wiremock crate used in existing tests (check dev-dependencies in Cargo.toml)\n- src/core/dependent_queue.rs: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- IngestProjectResult and IngestMrProjectResult track counts for all drain phases\n\n## Approach\n\nCreate tests/phase_b_integration.rs:\n\n### Test Setup\n\n1. In-memory SQLite DB with all migrations (001-015)\n2. wiremock mock server with:\n - /api/v4/projects/:id/issues — 2 test issues\n - /api/v4/projects/:id/merge_requests — 1 test MR\n - /api/v4/projects/:id/issues/:iid/resource_state_events — state events\n - /api/v4/projects/:id/issues/:iid/resource_label_events — label events\n - /api/v4/projects/:id/merge_requests/:iid/resource_state_events — merge event with source_merge_request_iid\n - /api/v4/projects/:id/merge_requests/:iid/closes_issues — linked issues\n - /api/v4/projects/:id/merge_requests/:iid/diffs — file changes\n - /api/v4/projects/:id/issues/:iid/discussions — discussion with system note \"mentioned in !1\"\n3. Config with fetch_resource_events=true and fetch_mr_file_changes=true (bd-jec)\n4. Use dependent_concurrency=1 to avoid timing issues\n\n### Test Flow\n\n```rust\n#[tokio::test]\nasync fn test_full_phase_b_pipeline() {\n // 1. Set up mock server + DB with migrations 001-015\n // 2. Run ingest issues + MRs (orchestrator functions)\n // 3. Verify pending_dependent_fetches enqueued: resource_events, mr_closes_issues, mr_diffs\n // 4. Drain all dependent fetch queues\n // 5. Assert: resource_state_events populated (count > 0)\n // 6. Assert: resource_label_events populated (count > 0)\n // 7. Assert: entity_references has closes ref with source_method='api'\n // 8. Assert: entity_references has mentioned ref with source_method='note_parse'\n // 9. Assert: mr_file_changes populated from diffs API\n // 10. Assert: pending_dependent_fetches fully drained (no stuck locks)\n}\n```\n\n### Assertions (SQL)\n\n```sql\nSELECT COUNT(*) FROM resource_state_events -- > 0\nSELECT COUNT(*) FROM resource_label_events -- > 0\nSELECT COUNT(*) FROM entity_references WHERE reference_type = 'closes' AND source_method = 'api' -- >= 1\nSELECT COUNT(*) FROM entity_references WHERE source_method = 'note_parse' -- >= 1\nSELECT COUNT(*) FROM mr_file_changes -- > 0\nSELECT COUNT(*) FROM pending_dependent_fetches WHERE locked_at IS NOT NULL -- = 0\n```\n\n## Acceptance Criteria\n\n- [ ] Test creates DB with migrations 001-015, mocks, and runs full pipeline\n- [ ] resource_state_events and resource_label_events populated\n- [ ] entity_references has closes ref (source_method='api') and mentioned ref (source_method='note_parse')\n- [ ] mr_file_changes populated from diffs mock\n- [ ] pending_dependent_fetches fully drained (no stuck locks, no retryable jobs)\n- [ ] Test runs in < 10 seconds\n- [ ] `cargo test --test phase_b_integration` passes\n\n## Files\n\n- tests/phase_b_integration.rs (NEW)\n\n## TDD Loop\n\nRED: Write test with all assertions — may fail if Gate 4 draining not yet wired.\n\nGREEN: Fix pipeline wiring (drain_mr_diffs in orchestrator).\n\nVERIFY: cargo test --test phase_b_integration -- --nocapture\n\n## Edge Cases\n\n- Paginated mock responses: include Link header for multi-page responses\n- Empty pages: verify graceful handling\n- Use dependent_concurrency=1 to avoid timing issues in test environment\n- Stale lock reclaim: test that locks older than stale_lock_minutes are reclaimed","status":"open","priority":3,"issue_type":"task","created_at":"2026-02-02T22:42:26.355071Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:16:55.266005Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1j1","depends_on_id":"bd-1ji","type":"blocks","created_at":"2026-02-02T22:43:27.941002Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-1se","type":"parent-child","created_at":"2026-02-02T22:43:40.577709Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T22:43:28.048311Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-8t4","type":"blocks","created_at":"2026-02-02T22:43:27.996061Z","created_by":"tayloreernisse"}]} +{"id":"bd-1j1","title":"Integration test: full Phase B sync pipeline","description":"## Background\n\nThis integration test proves the full Phase B sync pipeline works end-to-end. Since Gates 1 and 2 are already implemented and closed, this test validates that the complete pipeline — including Gate 4 mr_diffs draining — works together.\n\n## Codebase Context\n\n- **Gates 1-2 FULLY IMPLEMENTED (CLOSED):** resource events fetch, closes_issues API, system note parsing (note_parser.rs), entity_references extraction (references.rs)\n- **Gate 4 in progress:** migration 016 (mr_file_changes), fetch_mr_diffs, drain_mr_diffs — already wired in orchestrator (lines 708-726, 1514+)\n- **26 migrations exist** (001-026). LATEST_SCHEMA_VERSION = 26. In-memory DB must run all 26.\n- Orchestrator has drain_resource_events() (line 932), drain_mr_closes_issues() (line 1254), and drain_mr_diffs() (line 1514).\n- wiremock crate used in existing tests (check dev-dependencies in Cargo.toml)\n- src/core/dependent_queue.rs: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- IngestProjectResult and IngestMrProjectResult track counts for all drain phases\n\n## Approach\n\nCreate tests/phase_b_integration.rs:\n\n### Test Setup\n\n1. In-memory SQLite DB with all 26 migrations (001-026)\n2. wiremock mock server with:\n - /api/v4/projects/:id/issues — 2 test issues\n - /api/v4/projects/:id/merge_requests — 1 test MR\n - /api/v4/projects/:id/issues/:iid/resource_state_events — state events\n - /api/v4/projects/:id/issues/:iid/resource_label_events — label events\n - /api/v4/projects/:id/merge_requests/:iid/resource_state_events — merge event with source_merge_request_iid\n - /api/v4/projects/:id/merge_requests/:iid/closes_issues — linked issues\n - /api/v4/projects/:id/merge_requests/:iid/diffs — file changes\n - /api/v4/projects/:id/issues/:iid/discussions — discussion with system note \"mentioned in !1\"\n3. Config with fetch_resource_events=true and fetch_mr_file_changes=true\n4. Use dependent_concurrency=1 to avoid timing issues\n\n### Test Flow\n\n```rust\n#[tokio::test]\nasync fn test_full_phase_b_pipeline() {\n // 1. Set up mock server + DB with all 26 migrations\n // 2. Run ingest issues + MRs (orchestrator functions)\n // 3. Verify pending_dependent_fetches enqueued: resource_events, mr_closes_issues, mr_diffs\n // 4. Drain all dependent fetch queues\n // 5. Assert: resource_state_events populated (count > 0)\n // 6. Assert: resource_label_events populated (count > 0)\n // 7. Assert: entity_references has closes ref with source_method='api'\n // 8. Assert: entity_references has mentioned ref with source_method='note_parse'\n // 9. Assert: mr_file_changes populated from diffs API\n // 10. Assert: pending_dependent_fetches fully drained (no stuck locks)\n}\n```\n\n### Assertions (SQL)\n\n```sql\nSELECT COUNT(*) FROM resource_state_events -- > 0\nSELECT COUNT(*) FROM resource_label_events -- > 0\nSELECT COUNT(*) FROM entity_references WHERE reference_type = 'closes' AND source_method = 'api' -- >= 1\nSELECT COUNT(*) FROM entity_references WHERE source_method = 'note_parse' -- >= 1\nSELECT COUNT(*) FROM mr_file_changes -- > 0\nSELECT COUNT(*) FROM pending_dependent_fetches WHERE locked_at IS NOT NULL -- = 0\n```\n\n## Acceptance Criteria\n\n- [ ] Test creates DB with all 26 migrations, mocks, and runs full pipeline\n- [ ] resource_state_events and resource_label_events populated\n- [ ] entity_references has closes ref (source_method='api') and mentioned ref (source_method='note_parse')\n- [ ] mr_file_changes populated from diffs mock\n- [ ] pending_dependent_fetches fully drained (no stuck locks, no retryable jobs)\n- [ ] Test runs in < 10 seconds\n- [ ] `cargo test --test phase_b_integration` passes\n\n## Files\n\n- CREATE: tests/phase_b_integration.rs\n\n## TDD Anchor\n\nRED: Write test with all assertions — should pass if all Gates are wired correctly.\n\nGREEN: If anything fails, it indicates a missing orchestrator connection — fix the wiring.\n\nVERIFY: cargo test --test phase_b_integration -- --nocapture\n\n## Edge Cases\n\n- Paginated mock responses: include Link header for multi-page responses\n- Empty pages: verify graceful handling\n- Use dependent_concurrency=1 to avoid timing issues in test environment\n- Stale lock reclaim: test that locks older than stale_lock_minutes are reclaimed\n- If Gate 4 drain_mr_diffs is not fully wired yet, the mr_file_changes assertion will fail — this is the intended RED signal\n\n## Dependency Context\n\n- **bd-8t4 (resource_state_events extraction)**: CLOSED. Provides drain_resource_events() which populates resource_state_events and resource_label_events tables.\n- **bd-3ia (closes_issues)**: CLOSED. Provides drain_mr_closes_issues() which populates entity_references with reference_type='closes', source_method='api'.\n- **bd-1ji (note parsing)**: CLOSED. Provides note_parser.rs which extracts \"mentioned in !N\" patterns and stores as entity_references with source_method='note_parse'.\n- **dependent_queue.rs**: Provides the claim/complete/fail lifecycle. All three drain functions use this.\n- **orchestrator.rs**: Contains all drain functions. drain_mr_diffs() at line 1514+ populates mr_file_changes.","status":"open","priority":3,"issue_type":"task","created_at":"2026-02-02T22:42:26.355071Z","created_by":"tayloreernisse","updated_at":"2026-02-17T16:52:30.970742Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1j1","depends_on_id":"bd-1ji","type":"blocks","created_at":"2026-02-02T22:43:27.941002Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-1se","type":"parent-child","created_at":"2026-02-02T22:43:40.577709Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T22:43:28.048311Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-8t4","type":"blocks","created_at":"2026-02-02T22:43:27.996061Z","created_by":"tayloreernisse"}]} {"id":"bd-1j5o","title":"Verification: quality gates, query plan check, real-world validation","description":"## Background\n\nPost-implementation verification checkpoint. Runs after all code beads complete to validate the full scoring model works correctly against real data, not just test fixtures.\n\n## Approach\n\nExecute 8 verification steps in order. Each step has a binary pass/fail outcome.\n\n### Step 1: Compiler check\n```bash\ncargo check --all-targets\n```\nPass: exit 0\n\n### Step 2: Clippy\n```bash\ncargo clippy --all-targets -- -D warnings\n```\nPass: exit 0\n\n### Step 3: Formatting\n```bash\ncargo fmt --check\n```\nPass: exit 0\n\n### Step 4: Test suite\n```bash\ncargo test -p lore\n```\nPass: all tests green, including 31 new decay/scoring tests\n\n### Step 5: UBS scan\n```bash\nubs src/cli/commands/who.rs src/core/config.rs src/core/db.rs\n```\nPass: exit 0\n\n### Step 6: Query plan verification (manual)\nRun against real database:\n```bash\ncargo run --release -- who --path MeasurementQualityDialog.tsx -vvv 2>&1 | grep -i \"query plan\"\n```\nOr use sqlite3 CLI with EXPLAIN QUERY PLAN on the expert SQL (both exact and prefix modes).\n\nPass criteria (6 checks):\n- matched_notes_raw branch 1 uses existing new_path index\n- matched_notes_raw branch 2 uses idx_notes_old_path_author\n- matched_file_changes_raw uses idx_mfc_new_path_project_mr and idx_mfc_old_path_project_mr\n- reviewer_participation uses idx_notes_diffnote_discussion_author\n- mr_activity CTE joins merge_requests via primary key from matched_file_changes\n- Path resolution probes (old_path leg) use idx_notes_old_path_project_created\nDocument observed plan as SQL comment near the CTE.\n\n### Step 7: Performance baseline (manual)\n```bash\ntime cargo run --release -- who --path MeasurementQualityDialog.tsx\ntime cargo run --release -- who --path src/\ntime cargo run --release -- who --path Dialog.tsx\n```\nPass criteria (soft SLOs):\n- Exact path: p95 < 200ms\n- Prefix: p95 < 300ms\n- Suffix: p95 < 500ms\nRecord timings as SQL comment for future regression reference.\n\n### Step 8: Real-world validation\n```bash\ncargo run --release -- who --path MeasurementQualityDialog.tsx\ncargo run --release -- who --path MeasurementQualityDialog.tsx --explain-score\ncargo run --release -- who --path MeasurementQualityDialog.tsx --as-of 2025-06-01\ncargo run --release -- who --path MeasurementQualityDialog.tsx --all-history\n```\nPass criteria:\n- [ ] Recency discounting visible (recent authors rank above old reviewers)\n- [ ] --explain-score components sum to total (within f64 tolerance)\n- [ ] --as-of produces identical results on repeated runs\n- [ ] Assigned-only reviewers rank below participated reviewers on same MR\n- [ ] Known renamed file path resolves and credits old expertise\n- [ ] LGTM-only reviewers classified as assigned-only\n- [ ] Closed MRs at ~50% contribution visible via --explain-score\n\n## Acceptance Criteria\n- [ ] Steps 1-5 pass (exit 0)\n- [ ] Step 6: query plan documented with all 6 index usage points confirmed\n- [ ] Step 7: timing baselines recorded\n- [ ] Step 8: all 7 real-world checks pass\n\n## Files\n- All files modified by child beads (read-only verification)\n- Add SQL comments near CTE with observed EXPLAIN QUERY PLAN output\n\n## Edge Cases\n- SQLite planner may choose different plans across versions — document version\n- Timing varies by hardware — record machine specs alongside baselines\n- Real DB may have NULL merged_at on old MRs — state-aware fallback handles this","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-09T17:00:59.287720Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.415816Z","closed_at":"2026-02-12T20:43:04.415772Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"],"dependencies":[{"issue_id":"bd-1j5o","depends_on_id":"bd-1b50","type":"blocks","created_at":"2026-02-09T17:01:11.693095Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j5o","depends_on_id":"bd-1vti","type":"blocks","created_at":"2026-02-09T17:01:11.600519Z","created_by":"tayloreernisse"}]} {"id":"bd-1je","title":"Implement pending discussion queue","description":"## Background\nThe pending discussion queue tracks discussions that need to be fetched from GitLab. When an issue or MR is updated, its discussions may need re-fetching. This queue is separate from dirty_sources (which tracks entities needing document regeneration) — it tracks entities needing API calls to GitLab. The queue uses the same backoff pattern as dirty_sources for consistency.\n\n## Approach\nCreate `src/ingestion/discussion_queue.rs`:\n\n```rust\nuse crate::core::backoff::compute_next_attempt_at;\n\n/// Noteable type for discussion queue.\n#[derive(Debug, Clone, Copy)]\npub enum NoteableType {\n Issue,\n MergeRequest,\n}\n\nimpl NoteableType {\n pub fn as_str(&self) -> &'static str {\n match self {\n Self::Issue => \"Issue\",\n Self::MergeRequest => \"MergeRequest\",\n }\n }\n}\n\npub struct PendingFetch {\n pub project_id: i64,\n pub noteable_type: NoteableType,\n pub noteable_iid: i64,\n pub attempt_count: i32,\n}\n\n/// Queue a discussion fetch. ON CONFLICT DO UPDATE resets backoff (consistent with dirty_sources).\npub fn queue_discussion_fetch(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n) -> Result<()>;\n\n/// Get next batch of pending fetches (WHERE next_attempt_at IS NULL OR <= now).\npub fn get_pending_fetches(conn: &Connection, limit: usize) -> Result>;\n\n/// Mark fetch complete (remove from queue).\npub fn complete_fetch(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n) -> Result<()>;\n\n/// Record fetch error with backoff.\npub fn record_fetch_error(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n error: &str,\n) -> Result<()>;\n```\n\n## Acceptance Criteria\n- [ ] queue_discussion_fetch uses ON CONFLICT DO UPDATE (consistent with dirty_sources pattern)\n- [ ] Re-queuing resets: attempt_count=0, next_attempt_at=NULL, last_error=NULL\n- [ ] get_pending_fetches respects next_attempt_at backoff\n- [ ] get_pending_fetches returns entries ordered by queued_at ASC\n- [ ] complete_fetch removes entry from queue\n- [ ] record_fetch_error increments attempt_count, computes next_attempt_at via shared backoff\n- [ ] NoteableType.as_str() returns \"Issue\" or \"MergeRequest\" (matches DB CHECK constraint)\n- [ ] `cargo test discussion_queue` passes\n\n## Files\n- `src/ingestion/discussion_queue.rs` — new file\n- `src/ingestion/mod.rs` — add `pub mod discussion_queue;`\n\n## TDD Loop\nRED: Tests in `#[cfg(test)] mod tests`:\n- `test_queue_and_get` — queue entry, get returns it\n- `test_requeue_resets_backoff` — queue, error, re-queue -> attempt_count=0\n- `test_backoff_respected` — entry with future next_attempt_at not returned\n- `test_complete_removes` — complete_fetch removes entry\n- `test_error_increments_attempts` — error -> attempt_count=1, next_attempt_at set\nGREEN: Implement all functions\nVERIFY: `cargo test discussion_queue`\n\n## Edge Cases\n- Queue same (project_id, noteable_type, noteable_iid) twice: ON CONFLICT resets state\n- NoteableType must match DB CHECK constraint exactly (\"Issue\", \"MergeRequest\" — capitalized)\n- Empty queue: get_pending_fetches returns empty Vec","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:27:09.505548Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:31:35.496454Z","closed_at":"2026-01-30T17:31:35.496405Z","close_reason":"Implemented discussion_queue with queue/get/complete/record_error + 6 tests","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1je","depends_on_id":"bd-hrs","type":"blocks","created_at":"2026-01-30T15:29:35.034753Z","created_by":"tayloreernisse"},{"issue_id":"bd-1je","depends_on_id":"bd-mem","type":"blocks","created_at":"2026-01-30T15:29:35.071573Z","created_by":"tayloreernisse"}]} {"id":"bd-1ji","title":"Parse system notes for cross-reference patterns","description":"## Background\nSystem notes contain cross-reference patterns like 'mentioned in !{iid}', 'closed by !{iid}', etc. This is best-effort, English-only extraction that supplements the structured API data from bd-3ia and bd-8t4. Runs as a local post-processing step (no API calls).\n\n## Approach\nCreate src/core/note_parser.rs:\n\n```rust\nuse regex::Regex;\nuse lazy_static::lazy_static;\n\n/// A parsed cross-reference from a system note.\npub struct ParsedCrossRef {\n pub reference_type: String, // \"mentioned\" | \"closes\"\n pub target_entity_type: String, // \"issue\" | \"merge_request\" \n pub target_iid: i64,\n pub target_project_path: Option, // None = same project\n}\n\nlazy_static! {\n static ref MENTIONED_RE: Regex = Regex::new(\n r\"mentioned in (?:(?P[\\w\\-]+/[\\w\\-]+))?(?P[#!])(?P\\d+)\"\n ).unwrap();\n static ref CLOSED_BY_RE: Regex = Regex::new(\n r\"closed by (?:(?P[\\w\\-]+/[\\w\\-]+))?(?P[#!])(?P\\d+)\"\n ).unwrap();\n}\n\n/// Parse a system note body for cross-references.\npub fn parse_cross_refs(body: &str) -> Vec\n\n/// Extract cross-references from all system notes and insert into entity_references.\n/// Queries notes WHERE is_system = 1, parses body text, resolves to entity_references.\npub fn extract_refs_from_system_notes(\n conn: &Connection,\n project_id: i64,\n) -> Result\n\npub struct ExtractResult {\n pub inserted: usize,\n pub skipped_unresolvable: usize,\n pub parse_failures: usize, // logged at debug level\n}\n```\n\nSigil mapping: `#` = issue, `!` = merge_request\n\nResolution logic:\n1. If target_project_path is None (same project): look up entity by iid in local DB → set target_entity_id\n2. If target_project_path is Some: check if project is synced locally\n - If yes: resolve to local entity id\n - If no: store as unresolved (target_entity_id=NULL, target_project_path=path, target_entity_iid=iid)\n\nInsert with source_method='system_note_parse', INSERT OR IGNORE for dedup.\n\nCall after drain_dependent_queue and extract_refs_from_state_events in the sync pipeline.\n\n## Acceptance Criteria\n- [ ] 'mentioned in !123' → mentioned ref, target=MR iid 123\n- [ ] 'mentioned in #456' → mentioned ref, target=issue iid 456\n- [ ] 'mentioned in group/project!789' → cross-project mentioned ref\n- [ ] 'closed by !123' → closes ref\n- [ ] Cross-project refs stored as unresolved when target project not synced\n- [ ] source_method = 'system_note_parse'\n- [ ] Parse failures logged at debug level (not errors)\n- [ ] Idempotent (INSERT OR IGNORE)\n- [ ] Only processes is_system=1 notes\n\n## Files\n- src/core/note_parser.rs (new)\n- src/core/mod.rs (add `pub mod note_parser;`)\n- src/cli/commands/sync.rs (call after other ref extraction steps)\n\n## TDD Loop\nRED: tests/note_parser_tests.rs:\n- `test_parse_mentioned_in_mr` - \"mentioned in !567\" → ParsedCrossRef { mentioned, merge_request, 567 }\n- `test_parse_mentioned_in_issue` - \"mentioned in #234\" → ParsedCrossRef { mentioned, issue, 234 }\n- `test_parse_mentioned_cross_project` - \"mentioned in group/repo!789\" → with project path\n- `test_parse_closed_by_mr` - \"closed by !567\" → ParsedCrossRef { closes, merge_request, 567 }\n- `test_parse_multiple_refs` - note with two mentions → two refs\n- `test_parse_no_refs` - \"Updated the description\" → empty vec\n- `test_extract_refs_from_system_notes_integration` - seed DB with system notes, verify entity_references created\n\nGREEN: Implement regex patterns and extraction logic\n\nVERIFY: `cargo test note_parser -- --nocapture`\n\n## Edge Cases\n- Non-English GitLab instances: \"ajouté l'étiquette ~bug\" won't match — this is accepted limitation, logged at debug\n- Multi-level group paths: \"mentioned in top/sub/project#123\" — regex needs to handle arbitrary depth ([\\w\\-]+(?:/[\\w\\-]+)+)\n- Note body may contain markdown links that look like refs: \"[#123](url)\" — the regex should handle this correctly since the prefix \"mentioned in\" is required\n- Same ref mentioned multiple times in same note — dedup via INSERT OR IGNORE\n- Note may reference itself (e.g., system note on issue #123 says \"mentioned in #123\") — technically valid, store it","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-02T21:32:33.663304Z","created_by":"tayloreernisse","updated_at":"2026-02-04T20:13:33.398960Z","closed_at":"2026-02-04T20:13:33.398868Z","close_reason":"Completed: parse_cross_refs regex parser, extract_refs_from_system_notes DB function, wired into orchestrator. 17 tests passing.","compaction_level":0,"original_size":0,"labels":["gate-2","parsing","phase-b"],"dependencies":[{"issue_id":"bd-1ji","depends_on_id":"bd-1se","type":"parent-child","created_at":"2026-02-02T21:32:33.665218Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ji","depends_on_id":"bd-hu3","type":"blocks","created_at":"2026-02-02T22:41:50.672947Z","created_by":"tayloreernisse"}]} @@ -105,7 +105,7 @@ {"id":"bd-247","title":"Implement issue document extraction","description":"## Background\nIssue documents are the simplest document type — a structured header + description text. The extractor queries the existing issues and issue_labels tables (populated by ingestion) and assembles a DocumentData struct. This is one of three entity-specific extractors (issue, MR, discussion) that feed the document regeneration pipeline.\n\n## Approach\nImplement `extract_issue_document()` in `src/documents/extractor.rs`:\n\n```rust\n/// Extract a searchable document from an issue.\n/// Returns None if the issue has been deleted from the DB.\npub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result>\n```\n\n**SQL queries (from PRD Section 2.2):**\n```sql\n-- Main entity\nSELECT i.id, i.iid, i.title, i.description, i.state, i.author_username,\n i.created_at, i.updated_at, i.web_url,\n p.path_with_namespace, p.id AS project_id\nFROM issues i\nJOIN projects p ON p.id = i.project_id\nWHERE i.id = ?\n\n-- Labels\nSELECT l.name FROM issue_labels il\nJOIN labels l ON l.id = il.label_id\nWHERE il.issue_id = ?\nORDER BY l.name\n```\n\n**Document format:**\n```\n[[Issue]] #234: Authentication redesign\nProject: group/project-one\nURL: https://gitlab.example.com/group/project-one/-/issues/234\nLabels: [\"bug\", \"auth\"]\nState: opened\nAuthor: @johndoe\n\n--- Description ---\n\nWe need to modernize our authentication system...\n```\n\n**Implementation steps:**\n1. Query issue row — if not found, return Ok(None)\n2. Query labels via junction table\n3. Format header with [[Issue]] prefix\n4. Compute content_hash via compute_content_hash()\n5. Compute labels_hash via compute_list_hash()\n6. paths is always empty for issues (paths are only for DiffNote discussions)\n7. Return DocumentData with all fields populated\n\n## Acceptance Criteria\n- [ ] Deleted issue (not in DB) returns Ok(None)\n- [ ] Issue with no description: content_text has header only (no \"--- Description ---\" section)\n- [ ] Issue with no labels: Labels line shows \"[]\"\n- [ ] Issue with labels: Labels line shows sorted JSON array\n- [ ] content_hash is SHA-256 of the full content_text\n- [ ] labels_hash is SHA-256 of sorted label names joined by newline\n- [ ] paths_hash is empty string hash (issues have no paths)\n- [ ] project_id comes from the JOIN with projects table\n- [ ] `cargo test extract_issue` passes\n\n## Files\n- `src/documents/extractor.rs` — implement `extract_issue_document()`\n\n## TDD Loop\nRED: Test in `#[cfg(test)] mod tests`:\n- `test_issue_document_format` — verify header format matches PRD template\n- `test_issue_not_found` — returns Ok(None) for nonexistent issue_id\n- `test_issue_no_description` — no description section when description is NULL\n- `test_issue_labels_sorted` — labels appear in alphabetical order\n- `test_issue_hash_deterministic` — same issue produces same content_hash\nGREEN: Implement extract_issue_document with SQL queries\nVERIFY: `cargo test extract_issue`\n\n## Edge Cases\n- Issue with NULL description: skip \"--- Description ---\" section entirely\n- Issue with empty string description: include section but with empty body\n- Issue with very long description: no truncation here (hard cap applied by caller)\n- Labels with special characters (quotes, commas): JSON array handles escaping","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-30T15:25:45.490145Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:28:13.974948Z","closed_at":"2026-01-30T17:28:13.974891Z","close_reason":"Implemented extract_issue_document() with SQL queries, PRD-compliant format, and 7 tests","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-247","depends_on_id":"bd-36p","type":"blocks","created_at":"2026-01-30T15:29:15.677223Z","created_by":"tayloreernisse"},{"issue_id":"bd-247","depends_on_id":"bd-hrs","type":"blocks","created_at":"2026-01-30T15:29:15.712739Z","created_by":"tayloreernisse"}]} {"id":"bd-24j1","title":"OBSERV: Add #[instrument] spans to ingestion stages","description":"## Background\nTracing spans on each sync stage create the hierarchy that (1) makes log lines filterable by stage, (2) Phase 3's MetricsLayer reads to build StageTiming trees, and (3) gives meaningful context in -vv stderr output.\n\n## Approach\nAdd #[instrument] attributes or manual spans to these functions:\n\n### src/ingestion/orchestrator.rs\n1. ingest_project_issues_with_progress() (line ~110):\n```rust\n#[instrument(skip_all, fields(stage = \"ingest_issues\", project = %project_path))]\npub async fn ingest_project_issues_with_progress(...) -> Result {\n```\n\n2. The MR equivalent (ingest_project_mrs_with_progress or similar):\n```rust\n#[instrument(skip_all, fields(stage = \"ingest_mrs\", project = %project_path))]\n```\n\n3. Inside the issue ingest function, add child spans for sub-stages:\n```rust\nlet _fetch_span = tracing::info_span!(\"fetch_pages\", project = %project_path).entered();\n// ... fetch logic\ndrop(_fetch_span);\n\nlet _disc_span = tracing::info_span!(\"sync_discussions\", project = %project_path).entered();\n// ... discussion sync logic\ndrop(_disc_span);\n```\n\n4. drain_resource_events() (line ~566):\n```rust\nlet _span = tracing::info_span!(\"fetch_resource_events\", project = %project_path).entered();\n```\n\n### src/documents/regenerator.rs\n5. regenerate_dirty_documents() (line ~24):\n```rust\n#[instrument(skip_all, fields(stage = \"generate_docs\"))]\npub fn regenerate_dirty_documents(conn: &Connection) -> Result {\n```\n\n### src/embedding/pipeline.rs\n6. embed_documents() (line ~36):\n```rust\n#[instrument(skip_all, fields(stage = \"embed\"))]\npub async fn embed_documents(...) -> Result {\n```\n\n### Important: field declarations for Phase 3\nThe #[instrument] fields should include empty recording fields that Phase 3 (bd-16m8) will populate:\n```rust\n#[instrument(skip_all, fields(\n stage = \"ingest_issues\",\n project = %project_path,\n items_processed = tracing::field::Empty,\n items_skipped = tracing::field::Empty,\n errors = tracing::field::Empty,\n))]\n```\n\nThis declares the fields on the span so MetricsLayer can capture them when span.record() is called later.\n\n## Acceptance Criteria\n- [ ] JSON log lines show nested span context: sync > ingest_issues > fetch_pages\n- [ ] Each stage span has a \"stage\" field with the stage name\n- [ ] Per-project spans include \"project\" field\n- [ ] Spans are visible in -vv stderr output as bracketed context\n- [ ] Empty recording fields declared for items_processed, items_skipped, errors\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/ingestion/orchestrator.rs (spans on ingest functions and sub-stages)\n- src/documents/regenerator.rs (span on regenerate_dirty_documents)\n- src/embedding/pipeline.rs (span on embed_documents)\n\n## TDD Loop\nRED:\n - test_span_context_in_json_logs: mock sync, capture JSON, verify span chain\n - test_nested_span_chain: verify parent-child: sync > ingest_issues > fetch_pages\n - test_span_elapsed_on_close: create span, sleep 10ms, verify elapsed >= 10\nGREEN: Add #[instrument] and manual spans to all stage functions\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- #[instrument] on async fn: uses tracing::Instrument trait automatically. Works with tokio.\n- skip_all is essential: without it, #[instrument] tries to Debug-format all parameters, which may not implement Debug or may be expensive.\n- Manual span drop: for sub-stages within a single function, use explicit drop(_span) to end the span before the next sub-stage starts. Otherwise spans overlap.\n- tracing::field::Empty: declares a field that can be recorded later. If never recorded, it appears as empty/missing in output (not zero).","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-04T15:54:07.821068Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:19:34.307672Z","closed_at":"2026-02-04T17:19:34.307624Z","close_reason":"Added #[instrument] spans to ingest_project_issues_with_progress, ingest_project_merge_requests_with_progress, drain_resource_events, regenerate_dirty_documents, embed_documents","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-24j1","depends_on_id":"bd-2ni","type":"parent-child","created_at":"2026-02-04T15:54:07.821916Z","created_by":"tayloreernisse"},{"issue_id":"bd-24j1","depends_on_id":"bd-2rr","type":"blocks","created_at":"2026-02-04T15:55:19.798133Z","created_by":"tayloreernisse"}]} {"id":"bd-25hb","title":"NOTE-1C: Human and robot output formatting for notes","description":"## Background\nImplement the 4 output formatters for the notes command: human table, robot JSON, JSONL streaming, and CSV export.\n\n## Approach\nAdd to src/cli/commands/list.rs (after the query_notes function from NOTE-1A):\n\n1. pub fn print_list_notes(result: &NoteListResult) — human table:\n Use comfy-table (already in Cargo.toml) following the pattern of print_list_issues/print_list_mrs.\n Columns: ID | Author | Type | Body (truncated to 60 chars + \"...\") | Path:Line | Parent | Created\n ID: colored_cell with Cyan for gitlab_id\n Author: @username with Magenta\n Type: \"Diff\" for DiffNote, \"Disc\" for DiscussionNote, \"-\" for others\n Path: position_new_path:line (or \"-\" if no path)\n Parent: \"Issue #N\" or \"MR !N\" from noteable_type + parent_iid\n Created: format_relative_time (existing helper in list.rs)\n\n2. pub fn print_list_notes_json(result: &NoteListResult, elapsed_ms: u64, fields: Option<&[String]>) — robot JSON:\n Standard envelope: {\"ok\":true,\"data\":{\"notes\":[...],\"total_count\":N,\"showing\":M},\"meta\":{\"elapsed_ms\":U64}}\n Supports --fields via filter_fields() from crate::cli::robot\n Same pattern as print_list_issues_json.\n\n3. pub fn print_list_notes_jsonl(result: &NoteListResult) — one JSON object per line:\n Each line is one NoteListRowJson serialized. No envelope. Ideal for jq/notebook pipelines.\n Use serde_json::to_string for each row, println! each line.\n\n4. pub fn print_list_notes_csv(result: &NoteListResult) — CSV output:\n Check if csv crate is already used in the project. If not, use manual CSV with proper escaping:\n - Header row with field names matching NoteListRowJson\n - Quote fields containing commas, quotes, or newlines\n - Escape internal quotes by doubling them\n Alternatively, if adding csv crate (add csv = \"1\" to Cargo.toml [dependencies]), use csv::WriterBuilder for RFC 4180 compliance.\n\nHelper: Add a truncate_body(body: &str, max_len: usize) -> String function for the human table truncation.\n\n## Files\n- MODIFY: src/cli/commands/list.rs (4 print functions + truncate_body helper)\n- POSSIBLY MODIFY: Cargo.toml (add csv = \"1\" if using csv crate for CSV output)\n\n## TDD Anchor\nRED: test_truncate_note_body — assert 200-char body truncated to 60 + \"...\"\nGREEN: Implement truncate_body helper.\nVERIFY: cargo test truncate_note_body -- --nocapture\nTests: test_csv_output_basic (CSV output has correct header + escaped fields), test_jsonl_output_one_per_line (each line parses as valid JSON)\n\n## Acceptance Criteria\n- [ ] Human table renders with colored columns, truncated body, relative time\n- [ ] Robot JSON follows standard envelope with timing metadata\n- [ ] --fields filtering works on JSON output (via filter_fields)\n- [ ] JSONL outputs one valid JSON object per line\n- [ ] CSV properly escapes commas, quotes, and newlines in body text\n- [ ] Multi-byte chars handled correctly in CSV and truncation\n- [ ] All 3 tests pass\n\n## Dependency Context\n- Depends on NOTE-1A (bd-20p9): uses NoteListRow, NoteListRowJson, NoteListResult structs\n\n## Edge Cases\n- Empty body in table: show \"-\" or empty cell\n- Very long body with multi-byte chars: truncation must respect char boundaries (use .chars().take(n) not byte slicing)\n- JSONL with body containing newlines: serde_json::to_string escapes \\n correctly\n- CSV with body containing quotes: must double them per RFC 4180","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T17:00:53.482055Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:24.304235Z","closed_at":"2026-02-12T18:13:24.304188Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["cli","per-note","search"],"dependencies":[{"issue_id":"bd-25hb","depends_on_id":"bd-1oyf","type":"blocks","created_at":"2026-02-12T17:04:48.455566Z","created_by":"tayloreernisse"}]} -{"id":"bd-25s","title":"robot-docs: Add Ollama dependency discovery to manifest","description":"## Background\n\nAdd Ollama dependency discovery to robot-docs so agents know which commands need Ollama and which work without it.\n\n## Codebase Context\n\n- handle_robot_docs() in src/main.rs (line ~1646) returns RobotDocsData JSON\n- RobotDocsData has fields: commands, exit_codes, workflows, aliases, clap_error_codes\n- Currently 18 documented commands in the manifest\n- Ollama required for: embed, search --mode=semantic, search --mode=hybrid\n- Not required for: all Phase B temporal commands (timeline, file-history, trace), lexical search, count, ingest, stats, etc.\n- No dependencies field exists yet in RobotDocsData\n\n## Approach\n\nAdd dependencies field to RobotDocsData struct and populate in handle_robot_docs():\n\n```json\n{\n \"ollama\": {\n \"required_by\": [\"embed\", \"search --mode=semantic\", \"search --mode=hybrid\"],\n \"not_required_by\": [\"issues\", \"mrs\", \"search --mode=lexical\", \"timeline\", \"file-history\", \"trace\", \"count\", \"ingest\", \"stats\", \"sync\", \"doctor\", \"health\"],\n \"install\": {\"macos\": \"brew install ollama\", \"linux\": \"curl -fsSL https://ollama.ai/install.sh | sh\"},\n \"setup\": \"ollama pull nomic-embed-text\",\n \"note\": \"Lexical search and all temporal features work without Ollama.\"\n }\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `lore robot-docs | jq '.data.dependencies.ollama'` returns structured info\n- [ ] required_by and not_required_by lists are complete and accurate\n- [ ] Phase B commands listed in not_required_by\n- [ ] Install instructions for macos and linux\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- src/main.rs (update RobotDocsData struct + handle_robot_docs)\n\n## TDD Loop\n\nVERIFY: `lore robot-docs | jq '.data.dependencies.ollama.required_by'`\n\n## Edge Cases\n\n- Keep not_required_by up to date as new commands are added\n- Phase B commands (timeline, file-history, trace) must be in not_required_by once they exist","status":"open","priority":4,"issue_type":"feature","created_at":"2026-01-30T20:26:43.169688Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:17:09.991762Z","compaction_level":0,"original_size":0,"labels":["enhancement","robot-mode"]} +{"id":"bd-25s","title":"robot-docs: Add Ollama dependency discovery to manifest","description":"## Background\n\nAdd Ollama dependency discovery to robot-docs so agents know which commands need Ollama and which work without it. Currently robot-docs lists commands, exit codes, workflows, and aliases — but has no dependency information.\n\n## Codebase Context\n\n- handle_robot_docs() in src/main.rs (line ~1646) returns RobotDocsData JSON\n- RobotDocsData struct has fields: commands, exit_codes, workflows, aliases, clap_error_codes\n- Currently 18 documented commands in the manifest\n- Ollama required for: embed, search --mode=semantic, search --mode=hybrid\n- Not required for: all Phase B temporal commands (timeline, file-history, trace), lexical search, count, ingest, stats, sync, doctor, health, who, show, issues, mrs, etc.\n- No dependencies field exists yet in RobotDocsData\n\n## Approach\n\n### 1. Add dependencies field to RobotDocsData (src/main.rs):\n\n```rust\n#[derive(Serialize)]\nstruct RobotDocsData {\n // ... existing fields ...\n dependencies: DependencyInfo,\n}\n\n#[derive(Serialize)]\nstruct DependencyInfo {\n ollama: OllamaDependency,\n}\n\n#[derive(Serialize)]\nstruct OllamaDependency {\n required_by: Vec,\n not_required_by: Vec,\n install: HashMap, // {\"macos\": \"brew install ollama\", \"linux\": \"curl ...\"}\n setup: String, // \"ollama pull nomic-embed-text\"\n note: String,\n}\n```\n\n### 2. Populate in handle_robot_docs():\n\n```json\n{\n \"ollama\": {\n \"required_by\": [\"embed\", \"search --mode=semantic\", \"search --mode=hybrid\"],\n \"not_required_by\": [\"issues\", \"mrs\", \"search --mode=lexical\", \"timeline\", \"file-history\", \"count\", \"ingest\", \"stats\", \"sync\", \"doctor\", \"health\", \"who\", \"show\", \"status\"],\n \"install\": {\"macos\": \"brew install ollama\", \"linux\": \"curl -fsSL https://ollama.ai/install.sh | sh\"},\n \"setup\": \"ollama pull nomic-embed-text\",\n \"note\": \"Lexical search and all temporal features work without Ollama.\"\n }\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `lore robot-docs | jq '.data.dependencies.ollama'` returns structured info\n- [ ] required_by lists embed and semantic/hybrid search modes\n- [ ] not_required_by lists all commands that work without Ollama (including Phase B if they exist)\n- [ ] Install instructions for macos and linux\n- [ ] setup field includes \"ollama pull nomic-embed-text\"\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n- [ ] `cargo fmt --check` passes\n\n## Files\n\n- MODIFY: src/main.rs (add DependencyInfo/OllamaDependency structs, update RobotDocsData, populate in handle_robot_docs)\n\n## TDD Anchor\n\nNo unit test needed — this is static metadata. Verify with:\n\n```bash\ncargo check --all-targets\ncargo run --release -- robot-docs | jq '.data.dependencies.ollama.required_by'\ncargo run --release -- robot-docs | jq '.data.dependencies.ollama.not_required_by'\n```\n\n## Edge Cases\n\n- Keep not_required_by up to date as new commands are added — consider a comment in the code listing which commands to check\n- Phase B commands (timeline, file-history, trace) must be in not_required_by once they exist\n- If a command conditionally needs Ollama (like search with --mode flag), list the specific flag combination in required_by\n\n## Dependency Context\n\n- **RobotDocsData** (src/main.rs ~line 1646): the existing struct that this bead extends. Currently has commands (Vec), exit_codes (Vec), workflows (Vec), aliases (Vec), clap_error_codes (Vec). Adding a dependencies field is additive — no breaking changes.\n- **handle_robot_docs()**: the function that constructs and returns the JSON. All data is hardcoded in the function — no runtime introspection needed.","status":"open","priority":4,"issue_type":"feature","created_at":"2026-01-30T20:26:43.169688Z","created_by":"tayloreernisse","updated_at":"2026-02-17T16:53:20.425853Z","compaction_level":0,"original_size":0,"labels":["enhancement","robot-mode"]} {"id":"bd-26f2","title":"Implement common widgets (status bar, breadcrumb, loading, error toast, help overlay)","description":"## Background\nCommon widgets appear across all screens: the status bar shows context-sensitive key hints and sync status, the breadcrumb shows navigation depth, the loading spinner indicates background work, the error toast shows transient errors with auto-dismiss, and the help overlay (?) shows available keybindings.\n\n## Approach\nCreate crates/lore-tui/src/view/common/mod.rs and individual widget files:\n\nview/common/mod.rs:\n- render_breadcrumb(frame, area, nav: &NavigationStack, theme: &Theme): renders \"Dashboard > Issues > #42\" trail\n- render_status_bar(frame, area, registry: &CommandRegistry, screen: &Screen, mode: &InputMode, theme: &Theme): renders bottom bar with key hints and sync indicator\n- render_loading(frame, area, load_state: &LoadState, theme: &Theme): renders centered spinner for LoadingInitial, or subtle refresh indicator for Refreshing\n- render_error_toast(frame, area, msg: &str, theme: &Theme): renders floating toast at bottom-right with error message\n- render_help_overlay(frame, area, registry: &CommandRegistry, screen: &Screen, theme: &Theme): renders centered modal with keybinding list from registry\n\nCreate crates/lore-tui/src/view/mod.rs:\n- render_screen(frame, app: &LoreApp): top-level dispatch — renders breadcrumb + screen content + status bar + optional overlays (help, error toast, command palette)\n\n## Acceptance Criteria\n- [ ] Breadcrumb renders all stack entries with \" > \" separator\n- [ ] Status bar shows contextual hints from CommandRegistry\n- [ ] Loading spinner animates via tick subscription\n- [ ] Error toast auto-positions at bottom-right of screen\n- [ ] Help overlay shows all commands for current screen from registry\n- [ ] render_screen routes to correct per-screen view function\n- [ ] Overlays (help, error, palette) render on top of screen content\n\n## Files\n- CREATE: crates/lore-tui/src/view/mod.rs\n- CREATE: crates/lore-tui/src/view/common/mod.rs\n\n## TDD Anchor\nRED: Write test_breadcrumbs_format that creates a NavigationStack with Dashboard > IssueList, calls breadcrumbs(), asserts [\"Dashboard\", \"Issues\"].\nGREEN: Implement breadcrumbs() in NavigationStack (already in nav task) and render_breadcrumb.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml test_breadcrumbs\n\n## Edge Cases\n- Breadcrumb must truncate from the left if stack is too deep for terminal width\n- Status bar must handle narrow terminals (<60 cols) gracefully — show abbreviated hints\n- Error toast must handle very long messages with truncation\n- Help overlay must scroll if there are more commands than terminal height\n\n## Dependency Context\nUses NavigationStack from \"Implement NavigationStack\" task.\nUses CommandRegistry from \"Implement CommandRegistry\" task.\nUses LoadState from \"Implement AppState composition\" task.\nUses Theme from \"Implement theme configuration\" task.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T16:57:13.520393Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:25.901669Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-26f2","depends_on_id":"bd-1qpp","type":"blocks","created_at":"2026-02-12T17:09:39.304411Z","created_by":"tayloreernisse"},{"issue_id":"bd-26f2","depends_on_id":"bd-1v9m","type":"blocks","created_at":"2026-02-12T17:09:39.322983Z","created_by":"tayloreernisse"},{"issue_id":"bd-26f2","depends_on_id":"bd-2tr4","type":"blocks","created_at":"2026-02-12T18:11:25.901643Z","created_by":"tayloreernisse"},{"issue_id":"bd-26f2","depends_on_id":"bd-38lb","type":"blocks","created_at":"2026-02-12T17:09:39.313270Z","created_by":"tayloreernisse"},{"issue_id":"bd-26f2","depends_on_id":"bd-5ofk","type":"blocks","created_at":"2026-02-12T17:09:39.295693Z","created_by":"tayloreernisse"}]} {"id":"bd-26lp","title":"Implement CLI integration (lore tui command + binary delegation)","description":"## Background\nThe lore CLI binary needs a tui subcommand that launches the lore-tui binary. This is runtime binary delegation — lore finds lore-tui via PATH lookup and execs it, passing through relevant flags. Zero compile-time dependency from lore to lore-tui. The TUI is the human interface; the CLI is the robot/script interface.\n\n## Approach\nAdd a tui subcommand to the lore CLI:\n\n**CLI side** (`src/cli/tui.rs`):\n- Add `Tui` variant to the main CLI enum with flags: --config, --sync, --fresh, --render-mode, --ascii, --no-alt-screen\n- Implementation: resolve lore-tui binary via PATH lookup (std::process::Command with \"lore-tui\")\n- Pass through all flags as CLI arguments\n- If lore-tui not found in PATH, print helpful error: \"lore-tui binary not found. Install with: cargo install --path crates/lore-tui\"\n- Exec (not spawn+wait) using std::os::unix::process::CommandExt::exec() for clean process replacement on Unix\n\n**Binary naming**: The binary is `lore-tui` (hyphenated), matching the crate name.\n\n## Acceptance Criteria\n- [ ] lore tui launches lore-tui binary from PATH\n- [ ] All flags (--config, --sync, --fresh, --render-mode, --ascii, --no-alt-screen) are passed through\n- [ ] Missing binary produces helpful error with install instructions\n- [ ] Uses exec() on Unix for clean process replacement (no zombie parent)\n- [ ] Robot mode: lore --robot tui returns JSON error if binary not found\n- [ ] lore tui --help shows TUI-specific flags\n\n## Files\n- CREATE: src/cli/tui.rs\n- MODIFY: src/cli/mod.rs (add tui subcommand to CLI enum)\n- MODIFY: src/main.rs (add match arm for Tui variant)\n\n## TDD Anchor\nRED: Write `test_tui_binary_not_found_error` that asserts the error message includes install instructions when lore-tui is not in PATH.\nGREEN: Implement the binary lookup and error handling.\nVERIFY: cargo test tui_binary -- --nocapture\n\nAdditional tests:\n- test_tui_flag_passthrough (verify all flags are forwarded)\n- test_tui_robot_mode_json_error (structured error when binary missing)\n\n## Edge Cases\n- lore-tui binary exists but is not executable — should produce clear error\n- PATH contains multiple lore-tui versions — uses first match (standard PATH behavior)\n- Windows: exec() not available — fall back to spawn+wait+exit with same code\n- User runs lore tui in robot mode — should fail with structured JSON error (TUI is human-only)\n\n## Dependency Context\nDepends on bd-2iqk (Doctor + Stats screens) for phase ordering. The CLI integration is one of the last Phase 4 tasks because it requires lore-tui to be substantially complete for the delegation to be useful.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T17:02:39.602970Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:34.449333Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-26lp","depends_on_id":"bd-1df9","type":"blocks","created_at":"2026-02-12T18:11:34.449307Z","created_by":"tayloreernisse"},{"issue_id":"bd-26lp","depends_on_id":"bd-2iqk","type":"blocks","created_at":"2026-02-12T17:10:02.880825Z","created_by":"tayloreernisse"}]} {"id":"bd-2711","title":"WHO: Reviews mode query (query_reviews)","description":"## Background\n\nReviews mode answers \"What review patterns does person X have?\" by analyzing the **prefix** convention in DiffNote bodies (e.g., **suggestion**: ..., **question**: ..., **nit**: ...). Only counts DiffNotes on MRs the user did NOT author (m.author_username != ?1).\n\n## Approach\n\n### Three queries:\n1. **Total DiffNotes**: COUNT(*) of DiffNotes by user on others' MRs\n2. **Distinct MRs reviewed**: COUNT(DISTINCT m.id) \n3. **Category extraction**: SQL-level prefix parsing + Rust normalization\n\n### Category extraction SQL:\n```sql\nSELECT\n SUBSTR(ltrim(n.body), 3, INSTR(SUBSTR(ltrim(n.body), 3), '**') - 1) AS raw_prefix,\n COUNT(*) AS cnt\nFROM notes n\nJOIN discussions d ON n.discussion_id = d.id\nJOIN merge_requests m ON d.merge_request_id = m.id\nWHERE n.author_username = ?1\n AND n.note_type = 'DiffNote' AND n.is_system = 0\n AND m.author_username != ?1\n AND ltrim(n.body) LIKE '**%**%' -- only bodies with **prefix** pattern\n AND n.created_at >= ?2\n AND (?3 IS NULL OR n.project_id = ?3)\nGROUP BY raw_prefix ORDER BY cnt DESC\n```\n\nKey: `ltrim(n.body)` tolerates leading whitespace before **prefix** (common in practice).\n\n### normalize_review_prefix() in Rust:\n```rust\nfn normalize_review_prefix(raw: &str) -> String {\n let s = raw.trim().trim_end_matches(':').trim().to_lowercase();\n // Strip parentheticals like \"(non-blocking)\"\n let s = if let Some(idx) = s.find('(') { s[..idx].trim().to_string() } else { s };\n // Merge nit/nitpick variants\n match s.as_str() {\n \"nitpick\" | \"nit\" => \"nit\".to_string(),\n other => other.to_string(),\n }\n}\n```\n\n### HashMap merge for normalized categories, then sort by count DESC\n\n### ReviewsResult struct:\n```rust\npub struct ReviewsResult {\n pub username: String,\n pub total_diffnotes: u32,\n pub categorized_count: u32,\n pub mrs_reviewed: u32,\n pub categories: Vec,\n}\npub struct ReviewCategory { pub name: String, pub count: u32, pub percentage: f64 }\n```\n\nNo LIMIT needed — categories are naturally bounded (few distinct prefixes).\n\n## Files\n\n- `src/cli/commands/who.rs`\n\n## TDD Loop\n\nRED:\n```\ntest_reviews_query — insert 3 DiffNotes (2 with **prefix**, 1 without); verify total=3, categorized=2, categories.len()=2\ntest_normalize_review_prefix — \"suggestion\" \"Suggestion:\" \"suggestion (non-blocking):\" \"Nitpick:\" \"nit (non-blocking):\" \"question\" \"TODO:\"\n```\n\nGREEN: Implement query_reviews + normalize_review_prefix\nVERIFY: `cargo test -- reviews`\n\n## Acceptance Criteria\n\n- [ ] test_reviews_query passes (total=3, categorized=2)\n- [ ] test_normalize_review_prefix passes (nit/nitpick merge, parenthetical strip)\n- [ ] Only counts DiffNotes on MRs user did NOT author\n- [ ] Default since window: 6m\n\n## Edge Cases\n\n- Self-authored MRs excluded (m.author_username != ?1) — user's notes on own MRs are not \"reviews\"\n- ltrim() handles leading whitespace before **prefix**\n- Empty raw_prefix after normalization filtered out (!normalized.is_empty())\n- Percentage calculated from categorized_count (not total_diffnotes)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-08T02:40:53.350210Z","created_by":"tayloreernisse","updated_at":"2026-02-08T04:10:29.599252Z","closed_at":"2026-02-08T04:10:29.599217Z","close_reason":"Implemented by agent team: migration 017, CLI skeleton, all 5 query modes, human+robot output, 20 tests. All quality gates pass.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-2711","depends_on_id":"bd-2ldg","type":"blocks","created_at":"2026-02-08T02:43:37.763557Z","created_by":"tayloreernisse"},{"issue_id":"bd-2711","depends_on_id":"bd-34rr","type":"blocks","created_at":"2026-02-08T02:43:37.911881Z","created_by":"tayloreernisse"}]} @@ -122,7 +122,7 @@ {"id":"bd-2dlt","title":"Implement GraphQL client with partial-error handling","description":"## Background\nGitLab's GraphQL endpoint (/api/graphql) uses different auth than REST (Bearer token, not PRIVATE-TOKEN). We need a minimal GraphQL client that handles the GitLab-specific error codes and partial-data responses per GraphQL spec. The client returns a GraphqlQueryResult struct that propagates partial-error metadata end-to-end.\n\n## Approach\nCreate a new file src/gitlab/graphql.rs with GraphqlClient (uses reqwest). Add httpdate crate for Retry-After HTTP-date parsing. Wire into the module tree. Factory on GitLabClient keeps token encapsulated.\n\n## Files\n- src/gitlab/graphql.rs (NEW) — GraphqlClient struct, GraphqlQueryResult, ansi256_from_rgb\n- src/gitlab/mod.rs (add pub mod graphql;)\n- src/gitlab/client.rs (add graphql_client() factory method)\n- Cargo.toml (add httpdate dependency)\n\n## Implementation\n\nGraphqlClient struct:\n Fields: http (reqwest::Client with 30s timeout), base_url (String), token (String)\n Constructor: new(base_url, token) — trims trailing slash from base_url\n \nquery() method:\n - POST to {base_url}/api/graphql\n - Headers: Authorization: Bearer {token}, Content-Type: application/json\n - Body: {\"query\": \"...\", \"variables\": {...}}\n - Returns Result\n\nGraphqlQueryResult struct (pub):\n data: serde_json::Value\n had_partial_errors: bool\n first_partial_error: Option\n\nHTTP status mapping:\n 401 | 403 -> LoreError::GitLabAuthFailed\n 404 -> LoreError::GitLabNotFound { resource: \"GraphQL endpoint\" }\n 429 -> LoreError::GitLabRateLimited { retry_after } (parse Retry-After: try u64 first, then httpdate::parse_http_date, fallback 60)\n Other non-success -> LoreError::Other\n\nGraphQL-level error handling:\n errors array present + data absent/null -> Err(LoreError::Other(\"GraphQL error: {first_msg}\"))\n errors array present + data present -> Ok(GraphqlQueryResult { data, had_partial_errors: true, first_partial_error: Some(first_msg) })\n No errors + data present -> Ok(GraphqlQueryResult { data, had_partial_errors: false, first_partial_error: None })\n No errors + no data -> Err(LoreError::Other(\"missing 'data' field\"))\n\nansi256_from_rgb(r, g, b) -> u8:\n Maps RGB to nearest ANSI 256-color index using 6x6x6 cube (indices 16-231).\n MUST be placed BEFORE #[cfg(test)] module (clippy::items_after_test_module).\n\nFactory in src/gitlab/client.rs:\n pub fn graphql_client(&self) -> crate::gitlab::graphql::GraphqlClient {\n crate::gitlab::graphql::GraphqlClient::new(&self.base_url, &self.token)\n }\n\n## Acceptance Criteria\n- [ ] query() sends POST with Bearer auth header\n- [ ] Success: returns GraphqlQueryResult { data, had_partial_errors: false }\n- [ ] Errors-only (no data): returns Err with first error message\n- [ ] Partial data + errors: returns Ok with had_partial_errors: true\n- [ ] 401 -> GitLabAuthFailed\n- [ ] 403 -> GitLabAuthFailed\n- [ ] 404 -> GitLabNotFound\n- [ ] 429 -> GitLabRateLimited (parses Retry-After delta-seconds and HTTP-date, fallback 60)\n- [ ] ansi256_from_rgb: (0,0,0)->16, (255,255,255)->231\n- [ ] cargo check --all-targets passes\n\n## TDD Loop\nRED: test_graphql_query_success, test_graphql_query_with_errors_no_data, test_graphql_auth_uses_bearer, test_graphql_401_maps_to_auth_failed, test_graphql_403_maps_to_auth_failed, test_graphql_404_maps_to_not_found, test_graphql_partial_data_with_errors_returns_data, test_retry_after_http_date_format, test_retry_after_invalid_falls_back_to_60, test_ansi256_from_rgb\n Tests use wiremock or similar mock HTTP server\nGREEN: Implement GraphqlClient, add httpdate to Cargo.toml\nVERIFY: cargo test graphql && cargo test ansi256\n\n## Edge Cases\n- Use r##\"...\"## in tests containing \"#1f75cb\" hex colors (# breaks r#\"...\"#)\n- LoreError::GitLabRateLimited uses u64 not Option — use .unwrap_or(60)\n- httpdate::parse_http_date returns SystemTime — compute duration_since(now) for delta\n- GraphqlQueryResult is NOT Clone — tests must check fields individually","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-11T06:41:52.833151Z","created_by":"tayloreernisse","updated_at":"2026-02-11T07:21:33.417835Z","closed_at":"2026-02-11T07:21:33.417793Z","close_reason":"Implemented by agent swarm — all quality gates pass (595 tests, 0 failures)","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-2dlt","depends_on_id":"bd-1v8t","type":"blocks","created_at":"2026-02-11T06:42:40.451408Z","created_by":"tayloreernisse"},{"issue_id":"bd-2dlt","depends_on_id":"bd-2y79","type":"parent-child","created_at":"2026-02-11T06:41:52.840577Z","created_by":"tayloreernisse"}]} {"id":"bd-2e8","title":"Add fetchResourceEvents config flag to SyncConfig","description":"## Background\nEvent fetching should be opt-in (default true) so users who don't need temporal queries skip 3 extra API calls per entity. This follows the existing SyncConfig pattern with serde defaults and camelCase JSON aliases.\n\n## Approach\nAdd to SyncConfig in src/core/config.rs:\n```rust\n#[serde(rename = \"fetchResourceEvents\", default = \"default_true\")]\npub fetch_resource_events: bool,\n```\n\nAdd default function (if not already present):\n```rust\nfn default_true() -> bool { true }\n```\n\nUpdate Default impl for SyncConfig to include `fetch_resource_events: true`.\n\nAdd --no-events flag to sync command in src/cli/mod.rs (SyncArgs):\n```rust\n/// Skip resource event fetching (overrides config)\n#[arg(long = \"no-events\", help_heading = \"Sync Options\")]\npub no_events: bool,\n```\n\nIn the sync command handler (src/cli/commands/sync.rs), override config when flag is set:\n```rust\nif args.no_events {\n config.sync.fetch_resource_events = false;\n}\n```\n\n## Acceptance Criteria\n- [ ] SyncConfig deserializes `fetchResourceEvents: false` from JSON config\n- [ ] SyncConfig defaults to `fetch_resource_events: true` when field absent\n- [ ] `--no-events` flag parses correctly in CLI\n- [ ] `--no-events` overrides config to false\n- [ ] `cargo test` passes with no regressions\n\n## Files\n- src/core/config.rs (add field to SyncConfig + default fn + Default impl)\n- src/cli/mod.rs (add --no-events to SyncArgs)\n- src/cli/commands/sync.rs (override config when flag set)\n\n## TDD Loop\nRED: tests/config_tests.rs (or inline in config.rs):\n- `test_sync_config_fetch_resource_events_default_true` - omit field from JSON, verify default\n- `test_sync_config_fetch_resource_events_explicit_false` - set field false, verify parsed\n- `test_sync_config_no_events_flag` - verify CLI arg parsing\n\nGREEN: Add the field, default fn, Default impl update, CLI flag, and override logic\n\nVERIFY: `cargo test config -- --nocapture && cargo build`\n\n## Edge Cases\n- Ensure serde rename matches camelCase convention used by all other SyncConfig fields\n- The default_true fn may already exist for other fields — check before adding duplicate\n- The --no-events flag must NOT be confused with --no-X negation flags already in CLI (check mod.rs for conflicts)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:31:24.006037Z","created_by":"tayloreernisse","updated_at":"2026-02-03T16:10:20.311986Z","closed_at":"2026-02-03T16:10:20.311939Z","close_reason":"Completed: Added fetch_resource_events bool to SyncConfig with serde rename, default_true, --no-events CLI flag, and config override in sync handler","compaction_level":0,"original_size":0,"labels":["config","gate-1","phase-b"],"dependencies":[{"issue_id":"bd-2e8","depends_on_id":"bd-2zl","type":"parent-child","created_at":"2026-02-02T21:31:24.010608Z","created_by":"tayloreernisse"}]} {"id":"bd-2emv","title":"FrankenTUI integration proof + terminal compat smoke test","description":"## Background\nThis is the critical validation that FrankenTUI works with our setup. A minimal Model trait implementation must compile, render a frame, and handle basic input. Terminal compatibility must be verified in iTerm2 and tmux. This proves the toolchain gate before investing in the full implementation.\n\n## Approach\nIn crates/lore-tui/src/app.rs, implement a minimal LoreApp that:\n- implements ftui_runtime::program::Model with type Message = Msg\n- init() returns Cmd::none()\n- update() handles Msg::Quit to return None (exit) and ignores everything else\n- view() renders a simple \"lore TUI\" text centered on screen\n- subscriptions() returns empty vec\n\nAdd a smoke test binary or integration test that:\n- Creates a TerminalSession with ftui test harness\n- Verifies Model::view() produces non-empty output\n- Verifies resize events are handled without panic\n- Tests render in both fullscreen and inline(12) modes\n\nTerminal compat: manually verify ftui demo-showcase renders correctly in iTerm2 and tmux (document results in test notes).\n\n## Acceptance Criteria\n- [ ] LoreApp implements Model trait with Msg as message type\n- [ ] App::fullscreen(lore_app).run() compiles (even if not runnable in CI without a TTY)\n- [ ] App::inline(lore_app, 12).run() compiles\n- [ ] Panic hook installed: terminal restored on crash (crossterm disable_raw_mode + LeaveAlternateScreen)\n- [ ] Crash report written to ~/.local/share/lore/crash-{timestamp}.log with redacted sensitive data\n- [ ] Crash file retention: max 20 files, oldest deleted\n- [ ] ftui demo-showcase renders correctly in iTerm2 (documented)\n- [ ] ftui demo-showcase renders correctly in tmux (documented)\n- [ ] Binary size increase < 5MB over current lore binary\n\n## Files\n- CREATE: crates/lore-tui/src/app.rs (minimal Model impl)\n- MODIFY: crates/lore-tui/src/lib.rs (add install_panic_hook_for_tui, crash report logic)\n- CREATE: crates/lore-tui/src/crash_context.rs (ring buffer stub for crash diagnostics)\n\n## TDD Anchor\nRED: Write test_app_model_compiles that creates LoreApp and calls init(), verifying it returns without error.\nGREEN: Implement minimal LoreApp struct with Model trait.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml test_app_model\n\n## Edge Cases\n- CI environments have no TTY — tests must use ftui test harness, not actual terminal\n- tmux may not support all ANSI features — FrankenTUI's BOCPD resize coalescing must be verified\n- Panic hook must handle double-panic gracefully (don't panic inside the panic hook)\n- Crash context ring buffer must be lock-free readable from panic hook (signal safety)\n\n## Dependency Context\nUses crate scaffold (Cargo.toml, rust-toolchain.toml) from \"Create lore-tui crate scaffold\" task.\nUses Msg enum and Screen type from \"Implement core types\" task.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T16:54:52.087021Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:21.877846Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-2emv","depends_on_id":"bd-1cj0","type":"blocks","created_at":"2026-02-12T18:11:21.877815Z","created_by":"tayloreernisse"},{"issue_id":"bd-2emv","depends_on_id":"bd-3ddw","type":"blocks","created_at":"2026-02-12T17:09:28.605699Z","created_by":"tayloreernisse"},{"issue_id":"bd-2emv","depends_on_id":"bd-c9gk","type":"blocks","created_at":"2026-02-12T17:09:28.615323Z","created_by":"tayloreernisse"}]} -{"id":"bd-2ez","title":"Add 'lore count references' command","description":"## Background\n\nThe count command currently supports issues, mrs, discussions, notes, and events. This adds 'references' as a new entity type, showing cross-reference totals and breakdowns by reference_type and source_method.\n\n## Codebase Context\n\n- entity_references table (migration 011) with:\n - reference_type CHECK: `'closes' | 'mentioned' | 'related'`\n - source_method CHECK: `'api' | 'note_parse' | 'description_parse'` (**codebase values, NOT spec values**)\n - target_entity_id: NULL for unresolved cross-project refs\n- Count command pattern in src/cli/commands/count.rs: run_count() returns CountResult, handle_count formats output\n- events count already implemented as a special case: run_count_events() in main.rs (line ~829)\n- count.rs has value_parser list for entity arg\n\n## Approach\n\n### 1. Add to CountArgs value_parser in `src/cli/mod.rs`:\n```rust\n#[arg(value_parser = [\"issues\", \"mrs\", \"discussions\", \"notes\", \"events\", \"references\"])]\npub entity: String,\n```\n\n### 2. Add types and query in `src/cli/commands/count.rs`:\n\n```rust\npub struct ReferenceCountResult {\n pub total: i64,\n pub by_type: HashMap, // closes, mentioned, related\n pub by_method: HashMap, // api, note_parse, description_parse\n pub unresolved: i64,\n}\n```\n\n### 3. SQL:\n```sql\nSELECT\n COUNT(*) as total,\n COALESCE(SUM(CASE WHEN reference_type = 'closes' THEN 1 ELSE 0 END), 0) as closes,\n COALESCE(SUM(CASE WHEN reference_type = 'mentioned' THEN 1 ELSE 0 END), 0) as mentioned,\n COALESCE(SUM(CASE WHEN reference_type = 'related' THEN 1 ELSE 0 END), 0) as related,\n COALESCE(SUM(CASE WHEN source_method = 'api' THEN 1 ELSE 0 END), 0) as api,\n COALESCE(SUM(CASE WHEN source_method = 'note_parse' THEN 1 ELSE 0 END), 0) as note_parse,\n COALESCE(SUM(CASE WHEN source_method = 'description_parse' THEN 1 ELSE 0 END), 0) as desc_parse,\n COALESCE(SUM(CASE WHEN target_entity_id IS NULL THEN 1 ELSE 0 END), 0) as unresolved\nFROM entity_references\n```\n\n### 4. Human output:\n```\nReferences: 1,234\n By type:\n closes: 456\n mentioned: 678\n related: 100\n By source:\n api: 234\n note_parse: 890\n description_parse: 110\n Unresolved: 45 (3.6%)\n```\n\n### 5. Robot JSON:\n```json\n{\n \"ok\": true,\n \"data\": {\n \"entity\": \"references\",\n \"total\": 1234,\n \"by_type\": { \"closes\": 456, \"mentioned\": 678, \"related\": 100 },\n \"by_method\": { \"api\": 234, \"note_parse\": 890, \"description_parse\": 110 },\n \"unresolved\": 45\n }\n}\n```\n\n### 6. Wire in main.rs handle_count:\nAdd \"references\" branch, similar to the existing \"events\" special case.\n\n## Acceptance Criteria\n\n- [ ] `lore count references` works with human output\n- [ ] `lore --robot count references` returns JSON\n- [ ] by_type uses codebase values: closes, mentioned, related\n- [ ] by_method uses codebase values: api, note_parse, description_parse (NOT spec values)\n- [ ] Unresolved = WHERE target_entity_id IS NULL\n- [ ] Zero references: all counts 0, not error\n- [ ] entity_references table missing (old schema): graceful error with migration suggestion\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/cli/mod.rs` (add \"references\" to value_parser)\n- `src/cli/commands/count.rs` (add count_references + ReferenceCountResult)\n- `src/main.rs` (add \"references\" branch in handle_count)\n\n## TDD Loop\n\nRED: `test_count_references_query` with in-memory DB + migration 011 data\n\nGREEN: Implement query, result type, output.\n\nVERIFY: `cargo test --lib -- count && cargo check --all-targets`\n\n## Edge Cases\n\n- entity_references table doesn't exist (pre-migration-011): catch SQL error, suggest `lore migrate`\n- All references unresolved: unresolved = total\n- New source_method values in future: consider logging unknown values","status":"open","priority":3,"issue_type":"task","created_at":"2026-02-02T22:42:43.780303Z","created_by":"tayloreernisse","updated_at":"2026-02-05T19:42:55.459109Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-2ez","depends_on_id":"bd-1se","type":"parent-child","created_at":"2026-02-02T22:43:40.652558Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ez","depends_on_id":"bd-hu3","type":"blocks","created_at":"2026-02-02T22:43:33.877742Z","created_by":"tayloreernisse"}]} +{"id":"bd-2ez","title":"Add 'lore count references' command","description":"## Background\n\nThe count command currently supports issues, mrs, discussions, notes, and events. This adds 'references' as a new entity type, showing cross-reference totals and breakdowns by reference_type and source_method.\n\n## Codebase Context\n\n- entity_references table (migration 011) with:\n - reference_type CHECK: 'closes' | 'mentioned' | 'related'\n - source_method CHECK: 'api' | 'note_parse' | 'description_parse'\n - target_entity_id: NULL for unresolved cross-project refs\n- Count command pattern in src/cli/commands/count.rs: run_count() returns CountResult, handle_count formats output\n- events count already implemented as a special case: run_count_events() in main.rs (line ~829)\n- count.rs has value_parser list for entity arg\n- 26 migrations exist (001-026). entity_references was introduced in migration 011.\n\n## Approach\n\n### 1. Add to CountArgs value_parser in `src/cli/mod.rs`:\n```rust\n#[arg(value_parser = [\"issues\", \"mrs\", \"discussions\", \"notes\", \"events\", \"references\"])]\npub entity: String,\n```\n\n### 2. Add types and query in `src/cli/commands/count.rs`:\n\n```rust\npub struct ReferenceCountResult {\n pub total: i64,\n pub by_type: HashMap, // closes, mentioned, related\n pub by_method: HashMap, // api, note_parse, description_parse\n pub unresolved: i64,\n}\n```\n\n### 3. SQL (single conditional aggregate query — no N+1):\n```sql\nSELECT\n COUNT(*) as total,\n COALESCE(SUM(CASE WHEN reference_type = 'closes' THEN 1 ELSE 0 END), 0) as closes,\n COALESCE(SUM(CASE WHEN reference_type = 'mentioned' THEN 1 ELSE 0 END), 0) as mentioned,\n COALESCE(SUM(CASE WHEN reference_type = 'related' THEN 1 ELSE 0 END), 0) as related,\n COALESCE(SUM(CASE WHEN source_method = 'api' THEN 1 ELSE 0 END), 0) as api,\n COALESCE(SUM(CASE WHEN source_method = 'note_parse' THEN 1 ELSE 0 END), 0) as note_parse,\n COALESCE(SUM(CASE WHEN source_method = 'description_parse' THEN 1 ELSE 0 END), 0) as desc_parse,\n COALESCE(SUM(CASE WHEN target_entity_id IS NULL THEN 1 ELSE 0 END), 0) as unresolved\nFROM entity_references\n```\n\n### 4. Human output:\n```\nReferences: 1,234\n By type:\n closes: 456\n mentioned: 678\n related: 100\n By source:\n api: 234\n note_parse: 890\n description_parse: 110\n Unresolved: 45 (3.6%)\n```\n\n### 5. Robot JSON:\n```json\n{\n \"ok\": true,\n \"data\": {\n \"entity\": \"references\",\n \"total\": 1234,\n \"by_type\": { \"closes\": 456, \"mentioned\": 678, \"related\": 100 },\n \"by_method\": { \"api\": 234, \"note_parse\": 890, \"description_parse\": 110 },\n \"unresolved\": 45\n }\n}\n```\n\n### 6. Wire in main.rs handle_count:\nAdd \"references\" branch, similar to the existing \"events\" special case.\n\n## Acceptance Criteria\n\n- [ ] `lore count references` works with human output\n- [ ] `lore --robot count references` returns JSON with {ok, data, meta} envelope\n- [ ] by_type uses codebase values: closes, mentioned, related\n- [ ] by_method uses codebase values: api, note_parse, description_parse\n- [ ] Unresolved = COUNT WHERE target_entity_id IS NULL\n- [ ] Zero references: all counts 0, not error\n- [ ] entity_references table missing (pre-migration-011 schema): graceful error with migration suggestion\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n- [ ] `cargo fmt --check` passes\n\n## Files\n\n- MODIFY: src/cli/mod.rs (add \"references\" to value_parser list)\n- MODIFY: src/cli/commands/count.rs (add count_references() + ReferenceCountResult)\n- MODIFY: src/main.rs (add \"references\" branch in handle_count)\n\n## TDD Anchor\n\nRED: test_count_references_query — in-memory DB with migration 011+, insert 3 entity_references rows (one closes/api, one mentioned/note_parse, one related/api with target_entity_id=NULL), verify all counts.\n\nGREEN: Implement query, result type, output formatters.\n\nVERIFY: cargo test --lib -- count && cargo check --all-targets\n\n## Edge Cases\n\n- entity_references table doesn't exist (pre-migration-011): catch SQL error, return user-friendly message suggesting `lore sync`\n- All references unresolved: unresolved = total, percentage = 100%\n- Division by zero in percentage: guard with `if total > 0`\n- New reference_type/source_method values added in future: they won't appear in breakdown but will be in total — consider logging unknown values\n\n## Dependency Context\n\n- **bd-hu3 / migration 011**: provides the entity_references table with reference_type and source_method CHECK constraints. This bead reads from that table — no writes.\n- **count.rs pattern**: run_count() dispatches to entity-specific queries. events already has a special-case function run_count_events() — follow the same pattern for references.","status":"open","priority":3,"issue_type":"task","created_at":"2026-02-02T22:42:43.780303Z","created_by":"tayloreernisse","updated_at":"2026-02-17T16:52:59.706810Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-2ez","depends_on_id":"bd-1se","type":"parent-child","created_at":"2026-02-02T22:43:40.652558Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ez","depends_on_id":"bd-hu3","type":"blocks","created_at":"2026-02-02T22:43:33.877742Z","created_by":"tayloreernisse"}]} {"id":"bd-2ezb","title":"NOTE-2D: Regenerator and dirty tracking for note documents","description":"## Background\nWire note document extraction into the regenerator and add change-aware dirty marking in the ingestion pipeline. When a note's semantic content changes during upsert, it gets queued for document regeneration.\n\n## Approach\n1. Update regenerate_one() in src/documents/regenerator.rs (line 86-91):\n Add match arm: SourceType::Note => extract_note_document(conn, source_id)?\n Add import: use crate::documents::extract_note_document;\n This replaces the temporary unreachable!() from NOTE-2B.\n\n2. Add change-aware dirty marking in src/ingestion/discussions.rs (in upsert loop modified by NOTE-0A):\n After each upsert_note_for_issue call:\n if !note.is_system && outcome.changed_semantics {\n dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;\n }\n Import: use crate::documents::SourceType;\n\n3. Same in src/ingestion/mr_discussions.rs for MR note upserts (after upsert_note call near line 470 area, once NOTE-0A modifies it to return NoteUpsertOutcome).\n\n4. Update test setup helpers:\n - src/documents/regenerator.rs tests: the setup_db() function creates test tables. Add notes + discussions tables so regenerate_one can be tested with SourceType::Note. Also update the dirty_sources CHECK constraint in test setup to include 'note'.\n - src/ingestion/dirty_tracker.rs tests: similar test setup_db() update for CHECK constraint.\n\n## Files\n- MODIFY: src/documents/regenerator.rs (add Note match arm at line 90, add import, update test setup_db)\n- MODIFY: src/ingestion/discussions.rs (add dirty marking after upsert loop)\n- MODIFY: src/ingestion/mr_discussions.rs (add dirty marking after upsert)\n- MODIFY: src/ingestion/dirty_tracker.rs (update test setup_db CHECK constraint if present)\n\n## TDD Anchor\nRED: test_regenerate_note_document — create project, issue, discussion, note, mark dirty, call regenerate_dirty_documents, assert document created with source_type='note'.\nGREEN: Add SourceType::Note arm to regenerate_one.\nVERIFY: cargo test regenerate_note_document -- --nocapture\nTests: test_regenerate_note_system_note_deletes (system note in dirty queue → document gets deleted), test_regenerate_note_unchanged (same content hash → no update), test_note_ingestion_idempotent_across_two_syncs (identical re-sync produces no new dirty entries), test_mark_dirty_note_type\n\n## Acceptance Criteria\n- [ ] regenerate_one() handles SourceType::Note via extract_note_document\n- [ ] Changed notes queued as dirty during issue discussion ingestion\n- [ ] Changed notes queued as dirty during MR discussion ingestion\n- [ ] System notes never queued as dirty (is_system guard)\n- [ ] Unchanged notes not re-queued (changed_semantics = false from NOTE-0A)\n- [ ] Second sync of identical data produces no new dirty entries\n- [ ] All 5 tests pass\n\n## Dependency Context\n- Depends on NOTE-0A (bd-3bpk): uses NoteUpsertOutcome.changed_semantics from upsert functions\n- Depends on NOTE-2B (bd-ef0u): SourceType::Note enum variant for dirty marking and match arm\n- Depends on NOTE-2C (bd-18yh): extract_note_document function for the regenerator dispatch\n\n## Edge Cases\n- Note deleted during regeneration: extract_note_document returns None → delete_document called (line 93-95 of regenerator.rs)\n- System note in dirty queue (from manual INSERT): extract returns None → document deleted\n- Concurrent sync + regeneration: dirty_tracker uses ON CONFLICT handling","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T17:02:14.161688Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:23.852811Z","closed_at":"2026-02-12T18:13:23.852765Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"],"dependencies":[{"issue_id":"bd-2ezb","depends_on_id":"bd-22uw","type":"blocks","created_at":"2026-02-12T17:04:49.792463Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ezb","depends_on_id":"bd-3o0i","type":"blocks","created_at":"2026-02-12T17:04:49.717290Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ezb","depends_on_id":"bd-9wl5","type":"blocks","created_at":"2026-02-12T17:04:49.866514Z","created_by":"tayloreernisse"}]} {"id":"bd-2f0","title":"[CP1] gi count issues/discussions/notes commands","description":"## Background\n\nThe `gi count` command provides quick counts of entities in the local database. It supports counting issues, MRs, discussions, and notes, with optional filtering by noteable type. This enables quick validation that sync is working correctly.\n\n## Approach\n\n### Module: src/cli/commands/count.rs\n\n### Clap Definition\n\n```rust\n#[derive(Args)]\npub struct CountArgs {\n /// Entity type to count\n #[arg(value_parser = [\"issues\", \"mrs\", \"discussions\", \"notes\"])]\n pub entity: String,\n\n /// Filter by noteable type (for discussions/notes)\n #[arg(long, value_parser = [\"issue\", \"mr\"])]\n pub r#type: Option,\n}\n```\n\n### Handler Function\n\n```rust\npub async fn handle_count(args: CountArgs, conn: &Connection) -> Result<()>\n```\n\n### Queries by Entity\n\n**issues:**\n```sql\nSELECT COUNT(*) FROM issues\n```\nOutput: `Issues: 3,801`\n\n**discussions:**\n```sql\n-- Without type filter\nSELECT COUNT(*) FROM discussions\n\n-- With --type=issue\nSELECT COUNT(*) FROM discussions WHERE noteable_type = 'Issue'\n```\nOutput: `Issue Discussions: 1,234`\n\n**notes:**\n```sql\n-- Total and system count\nSELECT COUNT(*), SUM(is_system) FROM notes\n\n-- With --type=issue (join through discussions)\nSELECT COUNT(*), SUM(n.is_system)\nFROM notes n\nJOIN discussions d ON n.discussion_id = d.id\nWHERE d.noteable_type = 'Issue'\n```\nOutput: `Issue Notes: 5,678 (excluding 1,234 system)`\n\n### Output Format\n\n```\nIssues: 3,801\n```\n\n```\nIssue Discussions: 1,234\n```\n\n```\nIssue Notes: 5,678 (excluding 1,234 system)\n```\n\n## Acceptance Criteria\n\n- [ ] `gi count issues` shows total issue count\n- [ ] `gi count discussions` shows total discussion count\n- [ ] `gi count discussions --type=issue` filters to issue discussions\n- [ ] `gi count notes` shows total note count with system note exclusion\n- [ ] `gi count notes --type=issue` filters to issue notes\n- [ ] Numbers formatted with thousands separators (1,234)\n\n## Files\n\n- src/cli/commands/mod.rs (add `pub mod count;`)\n- src/cli/commands/count.rs (create)\n- src/cli/mod.rs (add Count variant to Commands enum)\n\n## TDD Loop\n\nRED:\n```rust\n#[tokio::test] async fn count_issues_returns_total()\n#[tokio::test] async fn count_discussions_with_type_filter()\n#[tokio::test] async fn count_notes_excludes_system_notes()\n```\n\nGREEN: Implement handler with queries\n\nVERIFY: `cargo test count`\n\n## Edge Cases\n\n- Zero entities - show \"Issues: 0\"\n- --type flag invalid for issues/mrs - ignore or error\n- All notes are system notes - show \"Notes: 0 (excluding 1,234 system)\"","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-25T17:02:38.360495Z","created_by":"tayloreernisse","updated_at":"2026-01-25T23:01:37.084627Z","closed_at":"2026-01-25T23:01:37.084568Z","close_reason":"Implemented gi count command with issues/discussions/notes support, format_number helper, and system note exclusion","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-2f0","depends_on_id":"bd-208","type":"blocks","created_at":"2026-01-25T17:04:05.677181Z","created_by":"tayloreernisse"}]} {"id":"bd-2f2","title":"Implement timeline human output renderer","description":"## Background\n\nThis bead implements the human-readable (non-robot) output renderer for `lore timeline`. It takes a collection of TimelineEvents and renders them as a colored, chronological timeline in the terminal.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.4 (Human Output Format).\n\n## Codebase Context\n\n- Colored output pattern: src/cli/commands/show.rs uses `colored` crate for terminal styling\n- Existing formatters: `print_show_issue()`, `print_show_mr()`, `print_list_issues()`\n- TimelineEvent model (bd-20e): timestamp, entity_type, entity_iid, project_path, event_type, summary, actor, url, is_seed\n- TimelineEventType enum (bd-20e): Created, StateChanged, LabelAdded, LabelRemoved, MilestoneSet, MilestoneRemoved, Merged, NoteEvidence, CrossReferenced\n- Expansion provenance: expanded entities have `via` info (from which seed, what edge type)\n- Convention: all output functions take `&[TimelineEvent]` and metadata, not raw DB results\n\n## Approach\n\nCreate `src/cli/commands/timeline.rs`:\n\n```rust\nuse colored::Colorize;\nuse crate::core::timeline::{TimelineEvent, TimelineEventType, TimelineQueryResult};\n\npub fn print_timeline(result: &TimelineQueryResult) {\n // Header\n println\\!();\n println\\!(\"{}\", format\\!(\"Timeline: \\\"{}\\\" ({} events across {} entities)\",\n result.query, result.events.len(), result.total_entities).bold());\n println\\!(\"{}\", \"─\".repeat(60));\n println\\!();\n\n // Events\n for event in &result.events {\n print_timeline_event(event);\n }\n\n // Footer\n println\\!();\n println\\!(\"{}\", \"─\".repeat(60));\n print_timeline_footer(result);\n}\n\nfn print_timeline_event(event: &TimelineEvent) {\n let date = format_date(event.timestamp);\n let tag = format_event_tag(&event.event_type);\n let entity = format_entity_ref(event.entity_type.as_str(), event.entity_iid);\n let actor = event.actor.as_deref().map(|a| format\\!(\"@{a}\")).unwrap_or_default();\n let expanded_marker = if event.is_seed { \"\" } else { \" [expanded]\" };\n\n println\\!(\"{date} {tag:10} {entity:6} {summary:40} {actor}{expanded_marker}\",\n summary = &event.summary);\n\n // Extra lines for specific event types\n match &event.event_type {\n TimelineEventType::NoteEvidence { snippet, .. } => {\n // Show snippet indented, wrapped to ~70 chars\n for line in wrap_text(snippet, 70) {\n println\\!(\" \\\"{line}\\\"\");\n }\n }\n TimelineEventType::Created => {\n // Could show labels if available in details\n }\n _ => {}\n }\n}\n```\n\n### Event Tag Colors:\n| Tag | Color |\n|-----|-------|\n| CREATED | green |\n| CLOSED | red |\n| REOPENED | yellow |\n| MERGED | cyan |\n| LABEL | blue |\n| MILESTONE | magenta |\n| NOTE | white/dim |\n| REF | dim |\n\n### Date Format:\n```\n2024-03-15 CREATED #234 Migrate to OAuth2 @alice\n```\nUse `YYYY-MM-DD` for dates. Group consecutive same-day events visually.\n\nAdd `pub mod timeline;` to `src/cli/commands/mod.rs` and re-export `print_timeline`.\n\n## Acceptance Criteria\n\n- [ ] `print_timeline()` renders header with query, event count, entity count\n- [ ] Events displayed chronologically with: date, tag, entity ref, summary, actor\n- [ ] Expanded entities marked with [expanded] suffix\n- [ ] NoteEvidence events show snippet text indented and quoted\n- [ ] Tags colored by event type\n- [ ] Footer shows seed entities and expansion info\n- [ ] Module registered in src/cli/commands/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/cli/commands/timeline.rs` (NEW)\n- `src/cli/commands/mod.rs` (add `pub mod timeline;` and re-export `print_timeline`)\n\n## TDD Loop\n\nNo unit tests for terminal rendering. Verify visually:\n\n```bash\ncargo check --all-targets\n# After full pipeline: lore timeline \"some query\"\n```\n\n## Edge Cases\n\n- Empty result: print \"No events found for query.\" and exit 0\n- Very long summaries: truncate to 60 chars with \"...\"\n- NoteEvidence snippets: wrap at 70 chars, cap at 4 lines\n- Null actors (system events): show no @username\n- Entity types: # for issues, \\! for MRs (GitLab convention)\n","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:28.326026Z","created_by":"tayloreernisse","updated_at":"2026-02-06T13:49:10.580508Z","closed_at":"2026-02-06T13:49:10.580438Z","close_reason":"Implemented print_timeline() human renderer in src/cli/commands/timeline.rs with colored chronological output, event tags, entity refs, evidence note snippets, and footer summary","compaction_level":0,"original_size":0,"labels":["cli","gate-3","phase-b"],"dependencies":[{"issue_id":"bd-2f2","depends_on_id":"bd-3as","type":"blocks","created_at":"2026-02-02T21:33:37.659719Z","created_by":"tayloreernisse"},{"issue_id":"bd-2f2","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:28.329132Z","created_by":"tayloreernisse"}]} @@ -153,7 +153,7 @@ {"id":"bd-2og9","title":"Implement entity cache + render cache","description":"## Background\nEntity cache provides near-instant detail view reopens during Enter/Esc drill workflows by caching IssueDetail/MrDetail payloads. Render cache prevents per-frame recomputation of expensive render artifacts (markdown to styled text, discussion tree shaping). Both use bounded LRU eviction with selective invalidation.\n\n## Approach\n\n### Entity Cache (entity_cache.rs)\n\n```rust\nuse std::collections::HashMap;\n\npub struct EntityCache {\n entries: HashMap, // value + last-access tick\n capacity: usize,\n tick: u64,\n}\n\nimpl EntityCache {\n pub fn new(capacity: usize) -> Self;\n pub fn get(&mut self, key: &EntityKey) -> Option<&V>; // updates tick\n pub fn put(&mut self, key: EntityKey, value: V); // evicts oldest if at capacity\n pub fn invalidate(&mut self, keys: &[EntityKey]); // selective by key set\n}\n```\n\n- `EntityKey` is `(EntityType, i64)` from core types (bd-c9gk) — e.g., `(EntityType::Issue, 42)`\n- Default capacity: 64 entries (sufficient for typical drill-in/out workflows)\n- LRU eviction: on `put()` when at capacity, find entry with lowest tick and remove it\n- `get()` bumps the access tick to keep recently-accessed entries alive\n- `invalidate()` takes a slice of changed keys (from sync results) and removes only those entries — NOT a blanket clear\n\n### Render Cache (render_cache.rs)\n\n```rust\npub struct RenderCacheKey {\n content_hash: u64, // FxHash of source content\n terminal_width: u16, // width affects line wrapping\n}\n\npub struct RenderCache {\n entries: HashMap,\n capacity: usize,\n}\n\nimpl RenderCache {\n pub fn new(capacity: usize) -> Self;\n pub fn get(&self, key: &RenderCacheKey) -> Option<&V>;\n pub fn put(&mut self, key: RenderCacheKey, value: V);\n pub fn invalidate_width(&mut self, keep_width: u16); // remove entries NOT matching this width\n pub fn invalidate_all(&mut self); // theme change = full clear\n}\n```\n\n- Default capacity: 256 entries\n- Used for: markdown->styled text, discussion tree layout, issue body rendering\n- `content_hash` uses `std::hash::Hasher` with FxHash (or std DefaultHasher) on source text\n- `invalidate_width(keep_width)`: on terminal resize, remove entries cached at old width\n- `invalidate_all()`: on theme change, clear everything (colors changed)\n- Both caches are NOT thread-safe (single-threaded TUI event loop). No Arc/Mutex needed.\n\n### Integration Point\nBoth caches live as fields on the main LoreApp struct. Cache miss falls through to normal DB query transparently — the action functions check cache first, query DB on miss, populate cache on return.\n\n## Acceptance Criteria\n- [ ] EntityCache::get returns Some for recently put items\n- [ ] EntityCache::put evicts the least-recently-accessed entry when at capacity\n- [ ] EntityCache::invalidate removes only the specified keys, leaves others intact\n- [ ] EntityCache capacity defaults to 64\n- [ ] RenderCache::get returns Some for matching (hash, width) pair\n- [ ] RenderCache::invalidate_width removes entries with non-matching width\n- [ ] RenderCache::invalidate_all clears everything\n- [ ] RenderCache capacity defaults to 256\n- [ ] Both caches are Send (no Rc, no raw pointers) but NOT required to be Sync\n- [ ] No unsafe code\n\n## Files\n- CREATE: crates/lore-tui/src/entity_cache.rs\n- CREATE: crates/lore-tui/src/render_cache.rs\n- MODIFY: crates/lore-tui/src/lib.rs (add `pub mod entity_cache; pub mod render_cache;`)\n\n## TDD Anchor\nRED: Write `test_entity_cache_lru_eviction` that creates EntityCache with capacity 3, puts 4 items, asserts first item (lowest tick) is evicted and the other 3 remain.\nGREEN: Implement LRU eviction using tick-based tracking.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml entity_cache\n\nAdditional tests:\n- test_entity_cache_get_bumps_tick (accessed item survives eviction over older untouched items)\n- test_entity_cache_invalidate_selective (removes only specified keys)\n- test_entity_cache_invalidate_nonexistent_key (no panic)\n- test_render_cache_width_invalidation (entries at old width removed, current width kept)\n- test_render_cache_invalidate_all (empty after call)\n- test_render_cache_capacity_eviction\n\n## Edge Cases\n- Invalidating an EntityKey not in the cache is a no-op (no panic)\n- Zero-capacity cache: all gets return None, all puts are no-ops (degenerate but safe)\n- RenderCacheKey equality: two different strings can have the same hash (collision) — accept this; worst case is a wrong cached render that gets corrected on next invalidation\n- Entity cache should NOT be prewarmed synchronously during sync — sync results just invalidate stale entries, and the next view() call repopulates on demand\n\n## Dependency Context\nDepends on bd-c9gk (core types) for EntityKey type definition.\nBoth caches are integrated into LoreApp (bd-6pmy) as struct fields.\nAction functions (from Phase 2/3 screen beads) check cache before querying DB.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T17:03:25.520201Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:34.626204Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-2og9","depends_on_id":"bd-1df9","type":"blocks","created_at":"2026-02-12T18:11:34.626177Z","created_by":"tayloreernisse"},{"issue_id":"bd-2og9","depends_on_id":"bd-c9gk","type":"blocks","created_at":"2026-02-12T17:39:25.511630Z","created_by":"tayloreernisse"}]} {"id":"bd-2px","title":"[CP1] Epic: Issue Ingestion","description":"Ingest all issues, labels, and issue discussions from configured GitLab repositories with resumable cursor-based incremental sync. This establishes the core data ingestion pattern reused for MRs in CP2.\n\n## Success Criteria\n- gi ingest --type=issues fetches all issues (count matches GitLab UI)\n- Labels extracted from issue payloads (name-only)\n- Label linkage reflects current GitLab state (removed labels unlinked on re-sync)\n- Issue discussions fetched per-issue (dependent sync)\n- Cursor-based sync is resumable (re-running fetches 0 new items)\n- Discussion sync skips unchanged issues (per-issue watermark)\n- Sync tracking records all runs\n- Single-flight lock prevents concurrent runs\n\n## Internal Gates\n- Gate A: Issues only (cursor + upsert + raw payloads + list/count/show)\n- Gate B: Labels correct (stale-link removal verified)\n- Gate C: Dependent discussion sync (watermark prevents redundant refetch)\n- Gate D: Resumability proof (kill mid-run, rerun; bounded redo)\n\nReference: docs/prd/checkpoint-1.md","status":"tombstone","priority":1,"issue_type":"epic","created_at":"2026-01-25T15:42:13.167698Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.638609Z","deleted_at":"2026-01-25T17:02:01.638606Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"epic","compaction_level":0,"original_size":0} {"id":"bd-2rk9","title":"WHO: CLI skeleton — WhoArgs, Commands::Who, dispatch arm","description":"## Background\n\nWire up the CLI plumbing so `lore who --help` works and dispatch reaches the who module. This is pure boilerplate — no query logic yet.\n\n## Approach\n\n### 1. src/cli/mod.rs — WhoArgs struct (after TimelineArgs, ~line 195)\n\n```rust\n#[derive(Parser)]\n#[command(after_help = \"\\x1b[1mExamples:\\x1b[0m\n lore who src/features/auth/ # Who knows about this area?\n lore who @asmith # What is asmith working on?\n lore who @asmith --reviews # What review patterns does asmith have?\n lore who --active # What discussions need attention?\n lore who --overlap src/features/auth/ # Who else is touching these files?\n lore who --path README.md # Expert lookup for a root file\")]\npub struct WhoArgs {\n /// Username or file path (path if contains /)\n pub target: Option,\n\n /// Force expert mode for a file/directory path (handles root files like README.md, Makefile)\n #[arg(long, help_heading = \"Mode\", conflicts_with_all = [\"active\", \"overlap\", \"reviews\"])]\n pub path: Option,\n\n /// Show active unresolved discussions\n #[arg(long, help_heading = \"Mode\", conflicts_with_all = [\"target\", \"overlap\", \"reviews\", \"path\"])]\n pub active: bool,\n\n /// Find users with MRs/notes touching this file path\n #[arg(long, help_heading = \"Mode\", conflicts_with_all = [\"target\", \"active\", \"reviews\", \"path\"])]\n pub overlap: Option,\n\n /// Show review pattern analysis (requires username target)\n #[arg(long, help_heading = \"Mode\", requires = \"target\", conflicts_with_all = [\"active\", \"overlap\", \"path\"])]\n pub reviews: bool,\n\n /// Time window (7d, 2w, 6m, YYYY-MM-DD). Default varies by mode.\n #[arg(long, help_heading = \"Filters\")]\n pub since: Option,\n\n /// Scope to a project (supports fuzzy matching)\n #[arg(short = 'p', long, help_heading = \"Filters\")]\n pub project: Option,\n\n /// Maximum results per section (1..=500)\n #[arg(short = 'n', long = \"limit\", default_value = \"20\",\n value_parser = clap::value_parser!(u16).range(1..=500),\n help_heading = \"Output\")]\n pub limit: u16,\n}\n```\n\n### 2. Commands enum — add Who(WhoArgs) after Timeline, before hidden List\n\n### 3. src/cli/commands/mod.rs — add `pub mod who;` and re-exports:\n```rust\npub use who::{run_who, print_who_human, print_who_json, WhoRun};\n```\n\n### 4. src/main.rs — dispatch arm + handler:\n```rust\nSome(Commands::Who(args)) => handle_who(cli.config.as_deref(), args, robot_mode),\n```\n\n### 5. src/cli/commands/who.rs — stub file with signatures that compile\n\n## Files\n\n- `src/cli/mod.rs` — WhoArgs struct + Commands::Who variant\n- `src/cli/commands/mod.rs` — pub mod who + re-exports\n- `src/main.rs` — dispatch arm + handle_who function + imports\n- `src/cli/commands/who.rs` — CREATE stub file\n\n## TDD Loop\n\nRED: `cargo check --all-targets` fails (missing who module)\nGREEN: Create stub who.rs with empty/todo!() implementations, wire up all 4 files\nVERIFY: `cargo check --all-targets && cargo run -- who --help`\n\n## Acceptance Criteria\n\n- [ ] `cargo check --all-targets` passes\n- [ ] `lore who --help` displays all flags with correct grouping (Mode, Filters, Output)\n- [ ] `lore who --active --overlap foo` rejected by clap (conflicts_with)\n- [ ] `lore who --reviews` rejected by clap (requires target)\n- [ ] WhoArgs is pub and importable from lore::cli\n\n## Edge Cases\n\n- conflicts_with_all on --path must NOT include \"target\" (--path is used alongside positional target in some cases... actually no, --path replaces target — check the plan: it conflicts with active/overlap/reviews but NOT target. Wait, looking at the plan: --path does NOT conflict with target. But if both target and --path are provided, --path takes priority in resolve_mode. The clap struct allows both.)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-08T02:39:58.436660Z","created_by":"tayloreernisse","updated_at":"2026-02-08T04:10:29.594923Z","closed_at":"2026-02-08T04:10:29.594882Z","close_reason":"Implemented by agent team: migration 017, CLI skeleton, all 5 query modes, human+robot output, 20 tests. All quality gates pass.","compaction_level":0,"original_size":0} -{"id":"bd-2rqs","title":"Dynamic shell completions for file paths (lore complete-path)","description":"Add a hidden lore complete-path subcommand that queries the DB for matching file paths, enabling tab-completion in bash/zsh/fish. Reuse path_resolver suffix_probe. Prior art: kubectl, gh, docker all use hidden subcommands for dynamic completions. Must be fast under 100ms. clap_complete v4 has custom completer API.","status":"open","priority":3,"issue_type":"feature","created_at":"2026-02-13T16:31:48.589428Z","created_by":"tayloreernisse","updated_at":"2026-02-13T16:31:48.592659Z","compaction_level":0,"original_size":0,"labels":["cli-ux","gate-4"]} +{"id":"bd-2rqs","title":"Dynamic shell completions for file paths (lore complete-path)","description":"## Background\n\nTab-completion for lore commands currently only covers static subcommand/flag names via clap_complete v4 (src/main.rs handle_completions(), line ~1667). Users frequently type file paths (for who --path, file-history) and entity IIDs (for issues, mrs, show) manually. Dynamic completions would allow tab-completing these from the local SQLite database.\n\n**Pattern:** kubectl, gh, docker all use hidden subcommands for dynamic completions. clap_complete v4 has a custom completer API that can shell out to these hidden subcommands.\n\n## Codebase Context\n\n- **Static completions**: Commands::Completions variant in src/cli/mod.rs, handled by handle_completions() in src/main.rs (line ~1667) using clap_complete::generate()\n- **clap_complete v4**: Already in Cargo.toml. Supports custom completer API for dynamic values.\n- **Commands taking IIDs**: IssuesArgs (iid: Option), MrsArgs (iid: Option), Drift (for: EntityRef), Show (hidden, takes entity ref)\n- **path_resolver**: src/core/path_resolver.rs (245 lines). build_path_query() (lines 71-187) and suffix_probe() (lines 192-240) resolve partial paths against mr_file_changes. SuffixResult::Ambiguous(Vec) returns multiple matches — perfect for completions.\n- **who --path**: WhoArgs has `path: Option` field, already uses path_resolver\n- **DB access**: create_connection() from src/core/db.rs, config loading from src/core/config.rs\n- **Performance**: Must complete in <100ms. SQLite queries against indexed columns are sub-ms.\n\n## Approach\n\n### 1. Hidden Subcommands (src/cli/mod.rs)\n\nAdd hidden subcommands that query the DB and print completion candidates:\n\n```rust\n/// Hidden: emit file path completions for shell integration\n#[command(name = \"complete-path\", hide = true)]\nCompletePath {\n /// Partial path prefix to complete\n prefix: String,\n /// Project scope\n #[arg(short = 'p', long)]\n project: Option,\n},\n\n/// Hidden: emit issue IID completions\n#[command(name = \"complete-issue\", hide = true)]\nCompleteIssue {\n /// Partial IID prefix\n prefix: String,\n #[arg(short = 'p', long)]\n project: Option,\n},\n\n/// Hidden: emit MR IID completions\n#[command(name = \"complete-mr\", hide = true)]\nCompleteMr {\n /// Partial IID prefix\n prefix: String,\n #[arg(short = 'p', long)]\n project: Option,\n},\n```\n\n### 2. Completion Handlers (src/cli/commands/completions.rs NEW)\n\n```rust\npub fn complete_path(conn: &Connection, prefix: &str, project_id: Option) -> Result> {\n // Use suffix_probe() from path_resolver if prefix looks like a suffix (no leading /)\n // Otherwise: SELECT DISTINCT new_path FROM mr_file_changes WHERE new_path LIKE ?||'%' LIMIT 50\n // Also check old_path for rename awareness\n}\n\npub fn complete_issue(conn: &Connection, prefix: &str, project_id: Option) -> Result> {\n // SELECT iid, title FROM issues WHERE CAST(iid AS TEXT) LIKE ?||'%' ORDER BY updated_at DESC LIMIT 30\n // Output: \"123\\tFix login bug\" (tab-separated for shell description)\n}\n\npub fn complete_mr(conn: &Connection, prefix: &str, project_id: Option) -> Result> {\n // SELECT iid, title FROM merge_requests WHERE CAST(iid AS TEXT) LIKE ?||'%' ORDER BY updated_at DESC LIMIT 30\n // Output: \"456\\tAdd OAuth support\"\n}\n```\n\n### 3. Wire in main.rs\n\nAdd match arms for CompletePath, CompleteIssue, CompleteMr. Each:\n1. Opens DB connection (read-only)\n2. Resolves project if -p given\n3. Calls completion handler\n4. Prints one candidate per line to stdout\n5. Exits 0\n\n### 4. Shell Integration\n\nUpdate handle_completions() to generate shell scripts that call the hidden subcommands. For fish:\n```fish\ncomplete -c lore -n '__fish_seen_subcommand_from issues' -a '(lore complete-issue \"\")'\ncomplete -c lore -n '__fish_seen_subcommand_from who' -l path -a '(lore complete-path (commandline -ct))'\n```\n\nSimilar for bash (using `_lore_complete()` function) and zsh.\n\n## Acceptance Criteria\n\n- [ ] `lore complete-path \"src/co\"` prints matching file paths from mr_file_changes\n- [ ] `lore complete-issue \"12\"` prints matching issue IIDs with titles\n- [ ] `lore complete-mr \"45\"` prints matching MR IIDs with titles\n- [ ] All three hidden subcommands respect -p for project scoping\n- [ ] All three complete in <100ms (SQLite indexed queries)\n- [ ] Empty prefix returns recent/popular results (not all rows)\n- [ ] Hidden subcommands don't appear in --help or completions themselves\n- [ ] Shell completion scripts (fish, bash, zsh) call hidden subcommands for dynamic values\n- [ ] Static completions (subcommands, flags) still work as before\n- [ ] No DB connection attempted if DB doesn't exist (graceful degradation — return no completions)\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n- [ ] `cargo fmt --check` passes\n\n## Files\n\n- MODIFY: src/cli/mod.rs (add CompletePath, CompleteIssue, CompleteMr hidden variants)\n- CREATE: src/cli/commands/completions.rs (complete_path, complete_issue, complete_mr handlers)\n- MODIFY: src/cli/commands/mod.rs (add pub mod completions)\n- MODIFY: src/main.rs (match arms for hidden subcommands + update handle_completions shell scripts)\n\n## TDD Anchor\n\nRED:\n- test_complete_path_suffix_match (in-memory DB with mr_file_changes rows, verify suffix matching returns correct paths)\n- test_complete_issue_prefix (in-memory DB with issues, verify IID prefix filtering)\n- test_complete_mr_prefix (same for MRs)\n- test_complete_empty_prefix_returns_recent (verify limited results ordered by updated_at DESC)\n\nGREEN: Implement completion handlers with SQL queries.\n\nVERIFY: cargo test --lib -- completions && cargo check --all-targets\n\n## Edge Cases\n\n- DB doesn't exist yet (first run before sync): return empty completions, exit 0 (not error)\n- mr_file_changes empty (sync hasn't run with --fetch-mr-diffs): complete-path returns nothing, no error\n- Very long prefix with no matches: empty output, exit 0\n- Special characters in paths (spaces, brackets): shell quoting handled by completion framework\n- Project ambiguous with -p: exit 18, same as other commands (resolve_project pattern)\n- IID prefix \"0\": return nothing (no issues/MRs have iid=0)\n\n## Dependency Context\n\n- **path_resolver** (src/core/path_resolver.rs): provides suffix_probe() which returns SuffixResult::Exact/Ambiguous/NotFound — reuse for complete-path instead of raw SQL when prefix looks like a suffix\n- **mr_file_changes** (migration 016): provides new_path/old_path columns for file path completions\n- **clap_complete v4** (Cargo.toml): provides generate() for static completions and custom completer API for dynamic shell integration","status":"open","priority":3,"issue_type":"feature","created_at":"2026-02-13T16:31:48.589428Z","created_by":"tayloreernisse","updated_at":"2026-02-17T16:51:21.891406Z","compaction_level":0,"original_size":0,"labels":["cli-ux","gate-4"]} {"id":"bd-2rr","title":"OBSERV: Replace subscriber init with dual-layer setup","description":"## Background\nThis is the core infrastructure bead for Phase 1. It replaces the single-layer subscriber (src/main.rs:44-58) with a dual-layer registry that separates stderr and file concerns. The file layer provides always-on post-mortem data; the stderr layer respects -v flags.\n\n## Approach\nReplace src/main.rs lines 44-58 with a function (e.g., init_tracing()) that:\n\n1. Build stderr filter from -v count (or RUST_LOG override):\n```rust\nfn build_stderr_filter(verbose: u8, quiet: bool) -> EnvFilter {\n if let Ok(rust_log) = std::env::var(\"RUST_LOG\") {\n return EnvFilter::new(rust_log);\n }\n if quiet {\n return EnvFilter::new(\"lore=warn,error\");\n }\n match verbose {\n 0 => EnvFilter::new(\"lore=info,warn\"),\n 1 => EnvFilter::new(\"lore=debug,warn\"),\n 2 => EnvFilter::new(\"lore=debug,info\"),\n _ => EnvFilter::new(\"trace,debug\"),\n }\n}\n```\n\n2. Build file filter (always lore=debug,warn unless RUST_LOG set):\n```rust\nfn build_file_filter() -> EnvFilter {\n if let Ok(rust_log) = std::env::var(\"RUST_LOG\") {\n return EnvFilter::new(rust_log);\n }\n EnvFilter::new(\"lore=debug,warn\")\n}\n```\n\n3. Assemble the registry:\n```rust\nlet stderr_layer = fmt::layer()\n .with_target(false)\n .with_writer(SuspendingWriter);\n// Conditionally add .json() based on log_format\n\nlet file_appender = tracing_appender::rolling::daily(log_dir, \"lore\");\nlet (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);\nlet file_layer = fmt::layer()\n .json()\n .with_writer(non_blocking);\n\ntracing_subscriber::registry()\n .with(stderr_layer.with_filter(build_stderr_filter(cli.verbose, cli.quiet)))\n .with(file_layer.with_filter(build_file_filter()))\n .init();\n```\n\nCRITICAL: The non_blocking _guard must be held for the program's lifetime. Store it in main() scope, NOT in the init function. If the guard drops, the file writer thread stops and buffered logs are lost.\n\nCRITICAL: Per-layer filtering requires each .with_filter() to produce a Filtered type. The two layers will have different concrete types (one with json, one without). This is fine -- the registry accepts heterogeneous layers via .with().\n\nWhen --log-format json: wrap stderr_layer with .json() too. This requires conditional construction. Two approaches:\n A) Use Box> for dynamic dispatch (simpler, tiny perf hit)\n B) Use an enum wrapper (zero cost but more code)\nRecommend approach A for simplicity. The overhead is one vtable indirection per log event, dwarfed by I/O.\n\nWhen file_logging is false (LoggingConfig.file_logging == false): skip adding the file layer entirely.\n\n## Acceptance Criteria\n- [ ] lore sync writes JSON log lines to ~/.local/share/lore/logs/lore.YYYY-MM-DD.log\n- [ ] lore -v sync shows DEBUG lore::* on stderr, deps at WARN\n- [ ] lore -vv sync shows DEBUG lore::* + INFO deps on stderr\n- [ ] lore -vvv sync shows TRACE everything on stderr\n- [ ] RUST_LOG=lore::gitlab=trace overrides -v for both layers\n- [ ] lore --log-format json sync emits JSON on stderr\n- [ ] -q + -v: -q wins (stderr at WARN+)\n- [ ] -q does NOT affect file layer (still DEBUG+)\n- [ ] File layer does NOT use SuspendingWriter\n- [ ] Non-blocking guard kept alive for program duration\n- [ ] Existing behavior unchanged when no new flags passed\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/main.rs (replace lines 44-58, add init_tracing function or inline)\n\n## TDD Loop\nRED:\n - test_verbosity_filter_construction: assert filter directives for verbose=0,1,2,3\n - test_rust_log_overrides_verbose: set env, assert TRACE not DEBUG\n - test_quiet_overrides_verbose: -q + -v => WARN+\n - test_json_log_output_format: capture file output, parse as JSON\n - test_suspending_writer_dual_layer: no garbled stderr with progress bars\nGREEN: Implement build_stderr_filter, build_file_filter, assemble registry\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- _guard lifetime: if guard is dropped early, buffered log lines are lost. MUST hold in main() scope.\n- Type erasure: stderr layer with/without .json() produces different types. Use Box> or separate init paths.\n- Empty RUST_LOG string: env::var returns Ok(\"\"), which EnvFilter::new(\"\") defaults to TRACE. May want to check is_empty().\n- File I/O error on log dir: tracing-appender handles this gracefully (no panic), but logs will be silently lost. The doctor command (bd-2i10) can diagnose this.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-04T15:53:55.577025Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:15:04.384114Z","closed_at":"2026-02-04T17:15:04.384062Z","close_reason":"Replaced single-layer subscriber with dual-layer setup: stderr (human/json, -v controlled) + file (always-on JSON, daily rotation via tracing-appender)","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-2rr","depends_on_id":"bd-17n","type":"blocks","created_at":"2026-02-04T15:55:19.397949Z","created_by":"tayloreernisse"},{"issue_id":"bd-2rr","depends_on_id":"bd-1k4","type":"blocks","created_at":"2026-02-04T15:55:19.461728Z","created_by":"tayloreernisse"},{"issue_id":"bd-2rr","depends_on_id":"bd-1o1","type":"blocks","created_at":"2026-02-04T15:55:19.327157Z","created_by":"tayloreernisse"},{"issue_id":"bd-2rr","depends_on_id":"bd-2nx","type":"parent-child","created_at":"2026-02-04T15:53:55.577882Z","created_by":"tayloreernisse"},{"issue_id":"bd-2rr","depends_on_id":"bd-gba","type":"blocks","created_at":"2026-02-04T15:55:19.262870Z","created_by":"tayloreernisse"}]} {"id":"bd-2sr2","title":"Robot sync envelope: status enrichment metadata","description":"## Background\nAgents need machine-readable status enrichment metadata in the robot sync output to detect issues like unsupported GraphQL, partial errors, or enrichment failures. Without this, enrichment problems are invisible to automation.\n\n## Approach\nWire IngestProjectResult status fields into the per-project robot sync JSON. Add aggregate error count to top-level summary.\n\n## Files\n- Wherever robot sync output JSON is constructed (likely src/cli/commands/ingest.rs or the sync output serialization path — search for IngestProjectResult -> JSON conversion)\n\n## Implementation\n\nPer-project status_enrichment object in robot sync JSON:\n{\n \"mode\": \"fetched\" | \"unsupported\" | \"skipped\",\n \"reason\": null | \"graphql_endpoint_missing\" | \"auth_forbidden\",\n \"seen\": N,\n \"enriched\": N,\n \"cleared\": N,\n \"without_widget\": N,\n \"partial_errors\": N,\n \"first_partial_error\": null | \"message\",\n \"error\": null | \"message\"\n}\n\nSource fields from IngestProjectResult:\n mode <- status_enrichment_mode\n reason <- status_unsupported_reason\n seen <- statuses_seen\n enriched <- statuses_enriched\n cleared <- statuses_cleared\n without_widget <- statuses_without_widget\n partial_errors <- partial_error_count\n first_partial_error <- first_partial_error\n error <- status_enrichment_error\n\nTop-level sync summary: add status_enrichment_errors: N (count of projects where error is Some)\n\nField semantics:\n mode \"fetched\": enrichment ran (even if 0 statuses or error occurred)\n mode \"unsupported\": 404/403 from GraphQL\n mode \"skipped\": config toggle off\n seen > 0 + enriched == 0: project has issues but none with status\n partial_errors > 0: some pages returned incomplete data\n\n## Acceptance Criteria\n- [ ] Robot sync JSON includes per-project status_enrichment object\n- [ ] All 9 fields present with correct types\n- [ ] mode reflects actual enrichment outcome (fetched/unsupported/skipped)\n- [ ] Top-level status_enrichment_errors count present\n- [ ] Test: full robot sync output validates structure\n\n## TDD Loop\nRED: test_robot_sync_includes_status_enrichment\nGREEN: Wire fields into JSON serialization\nVERIFY: cargo test robot_sync\n\n## Edge Cases\n- Find the exact location where IngestProjectResult is serialized to JSON — may be in a Serialize impl or manual json! macro\n- All numeric fields default to 0, all Option fields default to null in JSON\n- mode is always present (never null)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-11T06:42:29.127412Z","created_by":"tayloreernisse","updated_at":"2026-02-11T07:21:33.422233Z","closed_at":"2026-02-11T07:21:33.422193Z","close_reason":"Implemented by agent swarm — all quality gates pass (595 tests, 0 failures)","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-2sr2","depends_on_id":"bd-2y79","type":"parent-child","created_at":"2026-02-11T06:42:29.130750Z","created_by":"tayloreernisse"},{"issue_id":"bd-2sr2","depends_on_id":"bd-3dum","type":"blocks","created_at":"2026-02-11T06:42:45.995816Z","created_by":"tayloreernisse"}]} {"id":"bd-2sx","title":"Implement lore embed CLI command","description":"## Background\nThe embed CLI command is the user-facing wrapper for the embedding pipeline. It runs Ollama health checks, selects documents to embed (pending or failed), shows progress, and reports results. This is the standalone command for building embeddings outside of the sync orchestrator.\n\n## Approach\nCreate `src/cli/commands/embed.rs` per PRD Section 4.4.\n\n**IMPORTANT: The embed command is async.** The underlying `embed_documents()` function is `async fn` (uses `FuturesUnordered` for concurrent HTTP to Ollama). The CLI runner must use tokio runtime.\n\n**Core function (async):**\n```rust\npub async fn run_embed(\n config: &Config,\n retry_failed: bool,\n) -> Result\n```\n\n**Pipeline:**\n1. Create OllamaClient from config.embedding (base_url, model, timeout_secs)\n2. Run `client.health_check().await` — fail early with clear error if Ollama unavailable or model missing\n3. Determine selection: `EmbedSelection::RetryFailed` if --retry-failed, else `EmbedSelection::Pending`\n4. Call `embed_documents(conn, &client, selection, concurrency, progress_callback).await`\n - `concurrency` param controls max in-flight HTTP requests to Ollama\n - `progress_callback` drives indicatif progress bar\n5. Show progress bar (indicatif) during embedding\n6. Return EmbedResult with counts\n\n**CLI args:**\n```rust\n#[derive(Args)]\npub struct EmbedArgs {\n #[arg(long)]\n retry_failed: bool,\n}\n```\n\n**Output:**\n- Human: \"Embedded 42 documents (15 chunks), 2 errors, 5 skipped (unchanged)\"\n- JSON: `{\"ok\": true, \"data\": {\"embedded\": 42, \"chunks\": 15, \"errors\": 2, \"skipped\": 5}}`\n\n**Tokio integration note:**\nThe embed command runs async code. Either:\n- Use `#[tokio::main]` on main and propagate async through CLI dispatch\n- Or use `tokio::runtime::Runtime::new()` in the embed command handler\n\n## Acceptance Criteria\n- [ ] Command is async (embed_documents is async, health_check is async)\n- [ ] OllamaClient created from config.embedding settings\n- [ ] Health check runs first — clear error if Ollama down (exit code 14)\n- [ ] Clear error if model not found: \"Pull the model: ollama pull nomic-embed-text\" (exit code 15)\n- [ ] Embeds pending documents (no existing embeddings or stale content_hash)\n- [ ] --retry-failed re-attempts documents with last_error\n- [ ] Progress bar shows during embedding (indicatif)\n- [ ] embed_documents called with concurrency parameter\n- [ ] embed_documents called with progress_callback for progress bar\n- [ ] Human + JSON output\n- [ ] `cargo build` succeeds\n\n## Files\n- `src/cli/commands/embed.rs` — new file\n- `src/cli/commands/mod.rs` — add `pub mod embed;`\n- `src/cli/mod.rs` — add EmbedArgs, wire up embed subcommand\n- `src/main.rs` — add embed command handler (async dispatch)\n\n## TDD Loop\nRED: Integration test needing Ollama\nGREEN: Implement run_embed (async)\nVERIFY: `cargo build && cargo test embed`\n\n## Edge Cases\n- No documents in DB: \"No documents to embed\" (not error)\n- All documents already embedded and unchanged: \"0 documents to embed (all up to date)\"\n- Ollama goes down mid-embedding: pipeline records errors for remaining docs, returns partial result\n- --retry-failed with no failed docs: \"No failed documents to retry\"","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:26:34.126482Z","created_by":"tayloreernisse","updated_at":"2026-01-30T18:02:38.633115Z","closed_at":"2026-01-30T18:02:38.633055Z","close_reason":"Embed CLI command fully wired: EmbedArgs, Commands::Embed variant, handle_embed handler, clean build, all tests pass","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-2sx","depends_on_id":"bd-am7","type":"blocks","created_at":"2026-01-30T15:29:24.766104Z","created_by":"tayloreernisse"}]} @@ -163,7 +163,7 @@ {"id":"bd-2w1p","title":"Add half-life fields and config validation to ScoringConfig","description":"## Background\nThe flat-weight ScoringConfig (config.rs:155-167) has only 3 fields: author_weight (25), reviewer_weight (10), note_bonus (1). Time-decay scoring needs half-life parameters, a reviewer split (participated vs assigned-only), closed MR discount, substantive-note threshold, and bot filtering.\n\n## Approach\nExtend the existing ScoringConfig struct at config.rs:155. Add new fields with #[serde(default)] and camelCase rename to match existing convention (authorWeight, reviewerWeight, noteBonus). Extend the Default impl at config.rs:169 with new defaults. Extend validate_scoring() at config.rs:274-291 (currently validates 3 weights >= 0).\n\n### New fields to add:\n```rust\n#[serde(rename = \"reviewerAssignmentWeight\")]\npub reviewer_assignment_weight: i64, // default: 3\n#[serde(rename = \"authorHalfLifeDays\")]\npub author_half_life_days: u32, // default: 180\n#[serde(rename = \"reviewerHalfLifeDays\")]\npub reviewer_half_life_days: u32, // default: 90\n#[serde(rename = \"reviewerAssignmentHalfLifeDays\")]\npub reviewer_assignment_half_life_days: u32, // default: 45\n#[serde(rename = \"noteHalfLifeDays\")]\npub note_half_life_days: u32, // default: 45\n#[serde(rename = \"closedMrMultiplier\")]\npub closed_mr_multiplier: f64, // default: 0.5\n#[serde(rename = \"reviewerMinNoteChars\")]\npub reviewer_min_note_chars: u32, // default: 20\n#[serde(rename = \"excludedUsernames\")]\npub excluded_usernames: Vec, // default: vec![]\n```\n\n### Validation additions to validate_scoring() (config.rs:274):\n- All *_half_life_days must be > 0 AND <= 3650\n- All *_weight / *_bonus must be >= 0\n- reviewer_assignment_weight must be >= 0\n- closed_mr_multiplier must be finite (not NaN/Inf) AND in (0.0, 1.0]\n- reviewer_min_note_chars must be >= 0 AND <= 4096\n- excluded_usernames entries must be non-empty strings\n- Return LoreError::ConfigInvalid with clear message on failure\n\n## TDD Loop\n\n### RED (write first):\n```rust\n#[test]\nfn test_config_validation_rejects_zero_half_life() {\n let mut cfg = ScoringConfig::default();\n assert!(validate_scoring(&cfg).is_ok());\n cfg.author_half_life_days = 0;\n assert!(validate_scoring(&cfg).is_err());\n cfg.author_half_life_days = 180;\n cfg.reviewer_half_life_days = 0;\n assert!(validate_scoring(&cfg).is_err());\n cfg.reviewer_half_life_days = 90;\n cfg.closed_mr_multiplier = 0.0;\n assert!(validate_scoring(&cfg).is_err());\n cfg.closed_mr_multiplier = 1.5;\n assert!(validate_scoring(&cfg).is_err());\n cfg.closed_mr_multiplier = 1.0;\n assert!(validate_scoring(&cfg).is_ok());\n}\n\n#[test]\nfn test_config_validation_rejects_absurd_half_life() {\n let mut cfg = ScoringConfig::default();\n cfg.author_half_life_days = 5000; // > 3650 cap\n assert!(validate_scoring(&cfg).is_err());\n cfg.author_half_life_days = 3650; // boundary: valid\n assert!(validate_scoring(&cfg).is_ok());\n cfg.reviewer_min_note_chars = 5000; // > 4096 cap\n assert!(validate_scoring(&cfg).is_err());\n cfg.reviewer_min_note_chars = 4096; // boundary: valid\n assert!(validate_scoring(&cfg).is_ok());\n}\n\n#[test]\nfn test_config_validation_rejects_nan_multiplier() {\n let mut cfg = ScoringConfig::default();\n cfg.closed_mr_multiplier = f64::NAN;\n assert!(validate_scoring(&cfg).is_err());\n cfg.closed_mr_multiplier = f64::INFINITY;\n assert!(validate_scoring(&cfg).is_err());\n cfg.closed_mr_multiplier = f64::NEG_INFINITY;\n assert!(validate_scoring(&cfg).is_err());\n}\n```\n\n### GREEN: Add fields to struct + Default impl + validation rules.\n### VERIFY: cargo test -p lore -- test_config_validation\n\n## Acceptance Criteria\n- [ ] test_config_validation_rejects_zero_half_life passes\n- [ ] test_config_validation_rejects_absurd_half_life passes\n- [ ] test_config_validation_rejects_nan_multiplier passes\n- [ ] ScoringConfig::default() returns correct values for all 11 fields\n- [ ] cargo check --all-targets passes\n- [ ] Existing config deserialization works (#[serde(default)] fills new fields)\n- [ ] validate_scoring() is pub(crate) or accessible from config.rs test module\n\n## Files\n- MODIFY: src/core/config.rs (struct at line 155, Default impl at line 169, validate_scoring at line 274)\n\n## Edge Cases\n- f64 comparison: use .is_finite() for NaN/Inf check, > 0.0 and <= 1.0 for range\n- Vec default: use Vec::new()\n- Upper bounds prevent silent misconfig (5000-day half-life effectively disables decay)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-09T16:59:14.654469Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.400186Z","closed_at":"2026-02-12T20:43:04.399988Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"]} {"id":"bd-2wpf","title":"Ship timeline CLI with human and robot renderers","description":"## Problem\nThe timeline pipeline (5-stage SEED->HYDRATE->EXPAND->COLLECT->RENDER) is implemented but not wired to the CLI. This is one of lore's most unique features — chronological narrative reconstruction from resource events, cross-references, and notes — and it is invisible to users and agents.\n\n## Current State\n- Types defined: src/core/timeline.rs (TimelineEvent, TimelineSeed, etc.)\n- Seed stage: src/core/timeline_seed.rs (FTS search -> seed entities)\n- Expand stage: src/core/timeline_expand.rs (cross-reference expansion)\n- Collect stage: src/core/timeline_collect.rs (event gathering from resource events + notes)\n- CLI command structure: src/cli/commands/timeline.rs (exists but incomplete)\n- Remaining beads: bd-1nf (CLI wiring), bd-2f2 (human renderer), bd-dty (robot renderer)\n\n## Acceptance Criteria\n1. lore timeline 'authentication refactor' works end-to-end:\n - Searches for matching entities (SEED)\n - Fetches raw data (HYDRATE)\n - Expands via cross-references (EXPAND with --depth flag, default 1)\n - Collects events chronologically (COLLECT)\n - Renders human-readable narrative (RENDER)\n2. Human renderer output:\n - Chronological event stream with timestamps\n - Color-coded by event type (state change, label change, note, reference)\n - Actor names with role context\n - Grouped by day/week for readability\n - Evidence snippets from notes (first 200 chars)\n3. Robot renderer output (--robot / -J):\n - JSON array of events with: timestamp, event_type, actor, entity_ref, body/snippet, metadata\n - Seed entities listed separately (what matched the query)\n - Expansion depth metadata (how far from seed)\n - Total event count and time range\n4. CLI flags:\n - --project (scope to project)\n - --since (time range)\n - --depth N (expansion depth, default 1, max 3)\n - --expand-mentions (follow mention references, not just closes/related)\n - -n LIMIT (max events)\n5. Performance: timeline for a single issue with 50 events renders in <200ms\n\n## Relationship to Existing Beads\nThis supersedes/unifies: bd-1nf (CLI wiring), bd-2f2 (human renderer), bd-dty (robot renderer). Those can be closed when this ships.\n\n## Files to Modify\n- src/cli/commands/timeline.rs (CLI wiring, flag parsing, output dispatch)\n- src/core/timeline.rs (may need RENDER stage types)\n- New: src/cli/render/timeline_human.rs or inline in timeline.rs\n- New: src/cli/render/timeline_robot.rs or inline in timeline.rs","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-02-12T15:46:16.246889Z","created_by":"tayloreernisse","updated_at":"2026-02-12T15:50:43.885226Z","closed_at":"2026-02-12T15:50:43.885180Z","close_reason":"Already implemented: run_timeline(), print_timeline(), print_timeline_json_with_meta(), handle_timeline() all exist and are fully wired. Code audit 2026-02-12.","compaction_level":0,"original_size":0,"labels":["cli","cli-imp"],"dependencies":[{"issue_id":"bd-2wpf","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T15:46:16.250013Z","created_by":"tayloreernisse"}]} {"id":"bd-2x2h","title":"Implement Sync screen (running + summary modes + progress coalescer)","description":"## Background\nThe Sync screen provides real-time progress visualization during data synchronization. The TUI drives sync directly via lore library calls (not subprocess) — this gives direct access to progress callbacks, proper error propagation, and cooperative cancellation via CancelToken. The TUI is the primary human interface; the CLI serves robots/scripts.\n\nAfter sync completes, the screen transitions to a summary view showing exact changed entity counts. A progress coalescer prevents render thrashing by batching rapid progress updates.\n\nDesign principle: the TUI is self-contained. It does NOT detect or react to external CLI sync operations. If someone runs lore sync externally, the TUI's natural re-query on navigation handles stale data implicitly.\n\n## Approach\nCreate state, action, and view modules for the Sync screen:\n\n**State** (crates/lore-tui/src/screen/sync/state.rs):\n- SyncScreenMode enum: FullScreen, Inline (for use from Bootstrap screen)\n- SyncState enum: Idle, Running(SyncProgress), Complete(SyncSummary), Error(String)\n- SyncProgress: per-lane progress (issues, MRs, discussions, notes, events, statuses) with counts and ETA\n- SyncSummary: changed entity counts (new, updated, deleted per type), duration, errors\n- ProgressCoalescer: buffers progress updates, emits at most every 100ms to prevent render thrash\n\n**sync_delta_ledger** (crates/lore-tui/src/screen/sync/delta_ledger.rs):\n- SyncDeltaLedger: in-memory per-run record of changed entity IDs\n- Fields: new_issue_iids (Vec), updated_issue_iids (Vec), new_mr_iids (Vec), updated_mr_iids (Vec)\n- record_change(entity_type, iid, change_kind) — called by sync progress callback\n- summary() -> SyncSummary — produces the final counts for the summary view\n- Purpose: after sync completes, the dashboard and list screens can use the ledger to highlight \"new since last sync\" items\n\n**Action** (crates/lore-tui/src/screen/sync/action.rs):\n- start_sync(db: &DbManager, config: &Config, cancel: CancelToken) -> Cmd\n- Calls lore library ingestion functions directly: ingest_issues, ingest_mrs, ingest_discussions, etc.\n- Progress callback sends Msg::SyncProgress(lane, count, total) via channel\n- On completion sends Msg::SyncComplete(SyncSummary)\n- On cancel sends Msg::SyncCancelled(partial_summary)\n\n**Per-project fault isolation:** If sync for one project fails, continue syncing other projects. Collect per-project errors and display in summary view. Don't abort entire sync on single project failure.\n\n**View** (crates/lore-tui/src/screen/sync/view.rs):\n- Running view: per-lane progress bars with counts/totals, overall ETA, cancel hint (Esc)\n- Stream stats footer: show items/sec throughput for active lanes\n- Summary view: table of entity types with new/updated/deleted columns, total duration, per-project error list\n- Error view: error message with retry option\n- Inline mode: compact single-line progress for embedding in Bootstrap screen\n\nThe Sync screen uses TaskSupervisor for the background sync task with cooperative cancellation.\n\n## Acceptance Criteria\n- [ ] Sync screen launches sync via lore library calls (NOT subprocess)\n- [ ] Per-lane progress bars update in real-time during sync\n- [ ] ProgressCoalescer batches updates to at most 10/second (100ms floor)\n- [ ] Esc cancels sync cooperatively via CancelToken, shows partial summary\n- [ ] Sync completion transitions to summary view with accurate change counts\n- [ ] Summary view shows new/updated/deleted counts per entity type\n- [ ] Error during sync shows error message with retry option\n- [ ] Sync task registered with TaskSupervisor (dedup by TaskKey::Sync)\n- [ ] Per-project fault isolation: single project failure doesn't abort entire sync\n- [ ] SyncDeltaLedger records changed entity IDs for post-sync highlighting\n- [ ] Stream stats footer shows items/sec throughput\n- [ ] ScreenMode::Inline renders compact single-line progress for Bootstrap embedding\n- [ ] Unit tests for ProgressCoalescer batching behavior\n- [ ] Unit tests for SyncDeltaLedger record/summary\n- [ ] Integration test: mock sync with FakeClock verifies progress -> summary transition\n\n## Files\n- CREATE: crates/lore-tui/src/screen/sync/state.rs\n- CREATE: crates/lore-tui/src/screen/sync/action.rs\n- CREATE: crates/lore-tui/src/screen/sync/view.rs\n- CREATE: crates/lore-tui/src/screen/sync/delta_ledger.rs\n- CREATE: crates/lore-tui/src/screen/sync/mod.rs\n- MODIFY: crates/lore-tui/src/screen/mod.rs (add pub mod sync)\n\n## TDD Anchor\nRED: Write test_progress_coalescer_batches_rapid_updates that sends 50 progress updates in 10ms and asserts coalescer emits at most 1.\nGREEN: Implement ProgressCoalescer with configurable floor interval.\nVERIFY: cargo test -p lore-tui sync -- --nocapture\n\nAdditional tests:\n- test_sync_cancel_produces_partial_summary\n- test_sync_complete_produces_full_summary\n- test_sync_error_shows_retry\n- test_sync_dedup_prevents_double_launch\n- test_delta_ledger_records_changes: record 5 new issues and 3 updated MRs, assert summary counts\n- test_per_project_fault_isolation: simulate one project failure, verify others complete\n\n## Edge Cases\n- Sync cancelled immediately after start — partial summary with zero counts is valid\n- Network timeout during sync — error state with last-known progress preserved\n- Very large sync (100k+ entities) — progress coalescer prevents render thrash\n- Sync started while another sync TaskKey::Sync exists — TaskSupervisor dedup rejects it\n- Inline mode from Bootstrap: compact rendering, no full progress bars\n\n## Dependency Context\nUses TaskSupervisor from bd-3le2 for dedup and cancellation. Uses DbManager from bd-2kop for database access. Uses lore library ingestion module directly for sync operations. Used by Bootstrap screen (bd-3ty8) in inline mode.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T17:02:09.481354Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:34.266057Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-2x2h","depends_on_id":"bd-1df9","type":"blocks","created_at":"2026-02-12T18:11:34.266030Z","created_by":"tayloreernisse"},{"issue_id":"bd-2x2h","depends_on_id":"bd-3le2","type":"blocks","created_at":"2026-02-12T18:11:13.405879Z","created_by":"tayloreernisse"},{"issue_id":"bd-2x2h","depends_on_id":"bd-u7se","type":"blocks","created_at":"2026-02-12T17:10:02.861920Z","created_by":"tayloreernisse"}]} -{"id":"bd-2y79","title":"Add work item status via GraphQL enrichment","description":"## Summary\n\nGitLab 18.2+ has native work item status (To do, In progress, Done, Won't do, Duplicate) available ONLY via GraphQL, not REST. This enriches synced issues with status information by making supplementary GraphQL calls after REST ingestion.\n\n**Plan document:** plans/work-item-status-graphql.md\n\n## Critical Findings (from API research)\n\n- **EE-only (Premium/Ultimate)** — Free tier won't have the widget at all\n- **GraphQL auth differs from REST** — must use `Authorization: Bearer `, NOT `PRIVATE-TOKEN`\n- **Must use `workItems` resolver, NOT `project.issues`** — legacy issues path doesn't expose status widgets\n- **5 categories:** TRIAGE, TO_DO, IN_PROGRESS, DONE, CANCELED (not 3 as originally assumed)\n- **Max 100 items per GraphQL page** (standard GitLab limit)\n- **Custom statuses possible on 18.5+** — can't assume only system-defined statuses\n\n## Migration\n\nUses migration **021** (001-020 already exist on disk).\nAdds `status_name TEXT` and `status_category TEXT` to `issues` table (both nullable).\n\n## Files\n\n- src/gitlab/graphql.rs (NEW — minimal GraphQL client + status fetcher)\n- src/gitlab/mod.rs (add pub mod graphql)\n- src/gitlab/types.rs (WorkItemStatus, WorkItemStatusCategory enum)\n- src/core/db.rs (migration 021 in MIGRATIONS array)\n- src/core/config.rs (fetch_work_item_status toggle in SyncConfig)\n- src/ingestion/orchestrator.rs (enrichment step after issue sync)\n- src/cli/commands/show.rs (display status)\n- src/cli/commands/list.rs (status in list output + --status filter)\n\n## Acceptance Criteria\n\n- [ ] GraphQL client POSTs queries with Bearer auth and handles errors\n- [ ] Status fetched via workItems resolver with pagination\n- [ ] Migration 021 adds status_name and status_category to issues\n- [ ] lore show issue displays status (when available)\n- [ ] lore --robot show issue includes status in JSON\n- [ ] lore list issues --status filter works\n- [ ] Graceful degradation: Free tier, old GitLab, disabled GraphQL all handled\n- [ ] Config toggle: fetch_work_item_status (default true)\n- [ ] cargo check + clippy + tests pass","status":"open","priority":1,"issue_type":"feature","created_at":"2026-02-05T18:32:39.287957Z","created_by":"tayloreernisse","updated_at":"2026-02-10T19:45:28.686499Z","compaction_level":0,"original_size":0,"labels":["api","phase-b"]} +{"id":"bd-2y79","title":"Add work item status via GraphQL enrichment","description":"## Summary\n\nGitLab 18.2+ has native work item status (To do, In progress, Done, Won't do, Duplicate) available ONLY via GraphQL, not REST. This enriches synced issues with status information by making supplementary GraphQL calls after REST ingestion.\n\n**Plan document:** plans/work-item-status-graphql.md\n\n## Critical Findings (from API research)\n\n- **EE-only (Premium/Ultimate)** — Free tier won't have the widget at all\n- **GraphQL auth differs from REST** — must use `Authorization: Bearer `, NOT `PRIVATE-TOKEN`\n- **Must use `workItems` resolver, NOT `project.issues`** — legacy issues path doesn't expose status widgets\n- **5 categories:** TRIAGE, TO_DO, IN_PROGRESS, DONE, CANCELED (not 3 as originally assumed)\n- **Max 100 items per GraphQL page** (standard GitLab limit)\n- **Custom statuses possible on 18.5+** — can't assume only system-defined statuses\n\n## Migration\n\nUses migration **021** (001-020 already exist on disk).\nAdds `status_name TEXT` and `status_category TEXT` to `issues` table (both nullable).\n\n## Files\n\n- src/gitlab/graphql.rs (NEW — minimal GraphQL client + status fetcher)\n- src/gitlab/mod.rs (add pub mod graphql)\n- src/gitlab/types.rs (WorkItemStatus, WorkItemStatusCategory enum)\n- src/core/db.rs (migration 021 in MIGRATIONS array)\n- src/core/config.rs (fetch_work_item_status toggle in SyncConfig)\n- src/ingestion/orchestrator.rs (enrichment step after issue sync)\n- src/cli/commands/show.rs (display status)\n- src/cli/commands/list.rs (status in list output + --status filter)\n\n## Acceptance Criteria\n\n- [ ] GraphQL client POSTs queries with Bearer auth and handles errors\n- [ ] Status fetched via workItems resolver with pagination\n- [ ] Migration 021 adds status_name and status_category to issues\n- [ ] lore show issue displays status (when available)\n- [ ] lore --robot show issue includes status in JSON\n- [ ] lore list issues --status filter works\n- [ ] Graceful degradation: Free tier, old GitLab, disabled GraphQL all handled\n- [ ] Config toggle: fetch_work_item_status (default true)\n- [ ] cargo check + clippy + tests pass","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-02-05T18:32:39.287957Z","created_by":"tayloreernisse","updated_at":"2026-02-17T15:08:29.499020Z","closed_at":"2026-02-17T15:08:29.498969Z","close_reason":"Already implemented: GraphQL status enrichment shipped in v0.8.x — migration 021, graphql.rs, --status filter, --no-status flag all complete","compaction_level":0,"original_size":0,"labels":["api","phase-b"]} {"id":"bd-2ygk","title":"Implement user flow integration tests (9 PRD flows)","description":"## Background\n\nThe PRD Section 6 defines 9 end-to-end user flows that exercise cross-screen navigation, state preservation, and data flow. The existing vertical slice test (bd-1mju) covers one flow (Dashboard -> Issue List -> Issue Detail -> Sync). These integration tests cover the remaining 8 flows plus re-test the vertical slice from a user-journey perspective. Each test simulates a realistic keystroke sequence using FrankenTUI's test harness and verifies that the correct screens are reached with the correct data visible.\n\n## Approach\n\nCreate a test module `tests/tui_user_flows.rs` with 9 test functions, each simulating a keystroke sequence against a FrankenTUI `TestHarness` with a pre-populated test database. Tests use `FakeClock` for deterministic timestamps.\n\n**Test database fixture**: A shared setup function creates an in-memory SQLite DB with ~20 issues, ~10 MRs, ~30 discussions, a few experts, and timeline events. This fixture is reused across all flow tests.\n\n**Flow tests**:\n\n1. **`test_flow_find_expert`** — Dashboard -> `w` -> type \"src/auth/\" -> verify Expert mode results appear -> `↓` select first person -> `Enter` -> verify navigation to Issue List filtered by that person\n2. **`test_flow_timeline_query`** — Dashboard -> `t` -> type \"auth timeout\" -> `Enter` -> verify Timeline shows seed events -> `Enter` on first event -> verify entity detail opens -> `Esc` -> back on Timeline\n3. **`test_flow_quick_search`** — Any screen -> `/` -> type query -> verify results appear -> `Tab` (switch mode) -> verify mode label changes -> `Enter` -> verify entity detail opens\n4. **`test_flow_sync_and_browse`** — Dashboard -> `s` -> `Enter` (start sync) -> wait for completion -> verify Summary shows deltas -> `i` -> verify Issue List filtered to new items\n5. **`test_flow_review_workload`** — Dashboard -> `w` -> `Tab` (Workload mode) -> type \"@bjones\" -> verify workload sections appear (assigned, authored, reviewing)\n6. **`test_flow_command_palette`** — Any screen -> `Ctrl+P` -> type \"mrs draft\" -> verify fuzzy match -> `Enter` -> verify MR List opened with draft filter\n7. **`test_flow_morning_triage`** — Dashboard -> `i` -> verify Issue List (opened, sorted by updated) -> `Enter` on first -> verify Issue Detail -> `Esc` -> verify cursor preserved on same row -> `j` -> verify cursor moved\n8. **`test_flow_direct_screen_jumps`** — Issue Detail -> `gt` -> verify Timeline -> `gw` -> verify Who -> `gi` -> verify Issue List -> `H` -> verify Dashboard (clean reset)\n9. **`test_flow_risk_sweep`** — Dashboard -> scroll to Insights -> `Enter` on first insight -> verify pre-filtered Issue List\n\nEach test follows the pattern:\n```rust\n#[test]\nfn test_flow_X() {\n let (harness, app) = setup_test_harness_with_fixture();\n // Send keystrokes\n harness.send_key(Key::Char('w'));\n // Assert screen state\n assert_eq!(app.current_screen(), Screen::Who);\n // Assert visible content\n let frame = harness.render();\n assert!(frame.contains(\"Expert\"));\n}\n```\n\n## Acceptance Criteria\n- [ ] All 9 flow tests exist and compile\n- [ ] Each test uses the shared DB fixture (no per-test DB setup)\n- [ ] Each test verifies screen transitions via `current_screen()` assertions\n- [ ] Each test verifies at least one content assertion (rendered text contains expected data)\n- [ ] test_flow_morning_triage verifies cursor preservation after Enter/Esc round-trip\n- [ ] test_flow_direct_screen_jumps verifies the g-prefix navigation chain\n- [ ] test_flow_sync_and_browse verifies delta-filtered navigation after sync\n- [ ] All tests use FakeClock for deterministic timestamps\n- [ ] Tests complete in <5 seconds each (no real I/O)\n\n## Files\n- CREATE: crates/lore-tui/tests/tui_user_flows.rs\n- MODIFY: (none — this is a new test file only)\n\n## TDD Anchor\nRED: Write `test_flow_morning_triage` first — it exercises the most common daily workflow (Dashboard -> Issue List -> Issue Detail -> back with cursor preservation). Start with just the Dashboard -> Issue List transition.\nGREEN: Requires all Phase 2 core screens to be working; the test itself is the GREEN verification.\nVERIFY: cargo test -p lore-tui test_flow_morning_triage\n\nAdditional tests: All 9 flows listed above.\n\n## Edge Cases\n- Flow tests must handle async data loading — use harness.tick() or harness.wait_for_idle() to let async tasks complete before asserting\n- g-prefix timeout (500ms) — tests must send the second key within the timeout; use harness clock control\n- Sync flow test needs a mock sync that completes quickly — use a pre-populated SyncDeltaLedger rather than running actual sync\n\n## Dependency Context\n- Depends on bd-1mju (vertical slice integration test) which establishes the test harness patterns and fixture setup.\n- Depends on bd-2nfs (snapshot test infrastructure) which provides the FakeClock and TestHarness setup.\n- Depends on all Phase 2 core screen beads (bd-35g5 Dashboard, bd-3ei1 Issue List, bd-8ab7 Issue Detail, bd-2kr0 MR List, bd-3t1b MR Detail) being implemented.\n- Depends on Phase 3 power feature beads (bd-1zow Search, bd-29qw Timeline, bd-u7se Who, bd-wzqi Command Palette) being implemented.\n- Depends on bd-2x2h (Sync screen) for the sync+browse flow test.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T19:29:41.060826Z","created_by":"tayloreernisse","updated_at":"2026-02-12T19:29:52.743563Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-2ygk","depends_on_id":"bd-1mju","type":"blocks","created_at":"2026-02-12T19:29:51.843432Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-1zow","type":"blocks","created_at":"2026-02-12T19:29:52.419228Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-29qw","type":"blocks","created_at":"2026-02-12T19:29:52.498629Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-2kr0","type":"blocks","created_at":"2026-02-12T19:29:52.256838Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-2nfs","type":"blocks","created_at":"2026-02-12T19:29:51.931101Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-2x2h","type":"blocks","created_at":"2026-02-12T19:29:52.743530Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-35g5","type":"blocks","created_at":"2026-02-12T19:29:52.013419Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-3ei1","type":"blocks","created_at":"2026-02-12T19:29:52.099343Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-3t1b","type":"blocks","created_at":"2026-02-12T19:29:52.337215Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-8ab7","type":"blocks","created_at":"2026-02-12T19:29:52.178117Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-u7se","type":"blocks","created_at":"2026-02-12T19:29:52.580082Z","created_by":"tayloreernisse"},{"issue_id":"bd-2ygk","depends_on_id":"bd-wzqi","type":"blocks","created_at":"2026-02-12T19:29:52.665763Z","created_by":"tayloreernisse"}]} {"id":"bd-2yo","title":"Fetch MR diffs API and populate mr_file_changes","description":"## Background\n\nThis bead fetches MR diff metadata from the GitLab API and populates the mr_file_changes table created by migration 016. It extracts only file-level metadata (paths, change type) and discards actual diff content.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.3 (Ingestion).\n\n## Codebase Context\n\n- pending_dependent_fetches already has `job_type='mr_diffs'` in CHECK constraint (migration 011)\n- dependent_queue.rs has: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- Orchestrator pattern: enqueue after entity ingestion, drain after primary ingestion completes\n- GitLab client uses fetch_all_pages() for pagination\n- Existing drain patterns in orchestrator.rs: drain_resource_events() and drain_mr_closes_issues() — follow same pattern\n- config.sync.fetch_mr_file_changes flag guards enqueue (see bd-jec)\n- mr_file_changes table created by migration 016 (bd-1oo) — NOT 015 (015 is commit SHAs)\n- merge_commit_sha and squash_commit_sha already captured during MR ingestion (src/ingestion/merge_requests.rs lines 184, 205-206, 230-231) — no work needed for those fields\n\n## Approach\n\n### 1. API Client — add to `src/gitlab/client.rs`:\n\n```rust\npub async fn fetch_mr_diffs(\n &self,\n project_id: i64,\n mr_iid: i64,\n) -> Result> {\n let path = format\\!(\"/projects/{project_id}/merge_requests/{mr_iid}/diffs\");\n self.fetch_all_pages(&path, &[(\"per_page\", \"100\")]).await\n .or_else(|e| coalesce_not_found(e, Vec::new()))\n}\n```\n\n### 2. Types — add to `src/gitlab/types.rs`:\n\n```rust\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct GitLabMrDiff {\n pub old_path: String,\n pub new_path: String,\n pub new_file: bool,\n pub renamed_file: bool,\n pub deleted_file: bool,\n // Ignore: diff, a_mode, b_mode, generated_file (not stored)\n}\n```\n\nAdd `GitLabMrDiff` to `src/gitlab/mod.rs` re-exports.\n\n### 3. Change Type Derivation (in new file):\n\n```rust\nfn derive_change_type(diff: &GitLabMrDiff) -> &'static str {\n if diff.new_file { \"added\" }\n else if diff.renamed_file { \"renamed\" }\n else if diff.deleted_file { \"deleted\" }\n else { \"modified\" }\n}\n```\n\n### 4. DB Storage — new `src/ingestion/mr_diffs.rs`:\n\n```rust\npub fn upsert_mr_file_changes(\n conn: &Connection,\n mr_local_id: i64,\n project_id: i64,\n diffs: &[GitLabMrDiff],\n) -> Result {\n // DELETE FROM mr_file_changes WHERE merge_request_id = ?\n // INSERT each diff row with derived change_type\n // DELETE+INSERT is simpler than UPSERT for array replacement\n}\n```\n\nAdd `pub mod mr_diffs;` to `src/ingestion/mod.rs`.\n\n### 5. Queue Integration — in orchestrator.rs:\n\n```rust\n// After MR upsert, if config.sync.fetch_mr_file_changes:\nenqueue_job(conn, project_id, \"merge_request\", mr_iid, mr_local_id, \"mr_diffs\")?;\n```\n\nAdd `drain_mr_diffs()` following the drain_mr_closes_issues() pattern. Call it after drain_mr_closes_issues() in the sync pipeline.\n\n## Acceptance Criteria\n\n- [ ] `fetch_mr_diffs()` calls GET /projects/:id/merge_requests/:iid/diffs with pagination\n- [ ] GitLabMrDiff type added to src/gitlab/types.rs and re-exported from src/gitlab/mod.rs\n- [ ] Change type derived: new_file->added, renamed_file->renamed, deleted_file->deleted, else->modified\n- [ ] mr_file_changes rows have correct old_path, new_path, change_type\n- [ ] Old rows deleted before insert (clean replacement per MR)\n- [ ] Jobs only enqueued when config.sync.fetch_mr_file_changes is true\n- [ ] 404/403 API errors handled gracefully (empty result, not failure)\n- [ ] drain_mr_diffs() added to orchestrator.rs sync pipeline\n- [ ] `pub mod mr_diffs;` added to src/ingestion/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/gitlab/client.rs` (add fetch_mr_diffs method)\n- `src/gitlab/types.rs` (add GitLabMrDiff struct)\n- `src/gitlab/mod.rs` (re-export GitLabMrDiff)\n- `src/ingestion/mr_diffs.rs` (NEW — upsert_mr_file_changes + derive_change_type)\n- `src/ingestion/mod.rs` (add pub mod mr_diffs)\n- `src/ingestion/orchestrator.rs` (enqueue mr_diffs jobs + drain_mr_diffs)\n\n## TDD Loop\n\nRED:\n- `test_derive_change_type_added` - new_file=true -> \"added\"\n- `test_derive_change_type_renamed` - renamed_file=true -> \"renamed\"\n- `test_derive_change_type_deleted` - deleted_file=true -> \"deleted\"\n- `test_derive_change_type_modified` - all false -> \"modified\"\n- `test_upsert_replaces_existing` - second upsert replaces first\n\nGREEN: Implement API client, type derivation, DB ops, orchestrator wiring.\n\nVERIFY: `cargo test --lib -- mr_diffs`\n\n## Edge Cases\n\n- MR with 500+ files: paginate properly via fetch_all_pages\n- Binary files: handled as modified (renamed_file/new_file/deleted_file all false)\n- File renamed AND modified: renamed_file=true takes precedence\n- Draft MRs: still fetch diffs\n- Deleted MR: 404 -> empty vec via coalesce_not_found()\n- merge_commit_sha/squash_commit_sha: already handled in merge_requests.rs ingestion — NOT part of this bead\n","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:08.939514Z","created_by":"tayloreernisse","updated_at":"2026-02-08T18:27:05.993580Z","closed_at":"2026-02-08T18:27:05.993482Z","close_reason":"Implemented: GitLabMrDiff type, fetch_mr_diffs client method, upsert_mr_file_changes in new mr_diffs.rs module, enqueue_mr_diffs_jobs + drain_mr_diffs in orchestrator, migration 020 for diffs_synced_for_updated_at watermark, progress events, autocorrect registry. All 390 tests pass, clippy clean.","compaction_level":0,"original_size":0,"labels":["api","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-2yo","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:08.941359Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-1oo","type":"blocks","created_at":"2026-02-02T21:34:16.555239Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-jec","type":"blocks","created_at":"2026-02-02T21:34:16.656402Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-tir","type":"blocks","created_at":"2026-02-02T21:34:16.605198Z","created_by":"tayloreernisse"}]} {"id":"bd-2yq","title":"[CP1] Issue transformer with label extraction","description":"Transform GitLab issue payloads to normalized database schema.\n\nFunctions to implement:\n- transformIssue(gitlabIssue, localProjectId) → NormalizedIssue\n- extractLabels(gitlabIssue, localProjectId) → Label[]\n\nTransformation rules:\n- Convert ISO timestamps to ms epoch using isoToMs()\n- Set last_seen_at to nowMs()\n- Handle labels vs labels_details (prefer details when available)\n- Handle missing optional fields gracefully\n\nFiles: src/gitlab/transformers/issue.ts\nTests: tests/unit/issue-transformer.test.ts\nDone when: Unit tests pass for payload transformation and label extraction","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:19:09.660448Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.152259Z","deleted_at":"2026-01-25T15:21:35.152254Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} @@ -178,7 +178,7 @@ {"id":"bd-32mc","title":"OBSERV: Implement log retention cleanup at startup","description":"## Background\nLog files accumulate at ~1-10 MB/day. Without cleanup, they grow unbounded. Retention runs BEFORE subscriber init so deleted file handles aren't held open by the appender.\n\n## Approach\nAdd a cleanup function, called from main.rs before the subscriber is initialized (before current line 44):\n\n```rust\n/// Delete log files older than retention_days.\n/// Matches files named lore.YYYY-MM-DD.log in the log directory.\npub fn cleanup_old_logs(log_dir: &Path, retention_days: u32) -> std::io::Result {\n if retention_days == 0 {\n return Ok(0); // 0 means file logging disabled, don't delete\n }\n let cutoff = SystemTime::now() - Duration::from_secs(u64::from(retention_days) * 86400);\n let mut deleted = 0;\n\n for entry in std::fs::read_dir(log_dir)? {\n let entry = entry?;\n let name = entry.file_name();\n let name_str = name.to_string_lossy();\n\n // Only match lore.YYYY-MM-DD.log pattern\n if !name_str.starts_with(\"lore.\") || !name_str.ends_with(\".log\") {\n continue;\n }\n\n if let Ok(metadata) = entry.metadata() {\n if let Ok(modified) = metadata.modified() {\n if modified < cutoff {\n std::fs::remove_file(entry.path())?;\n deleted += 1;\n }\n }\n }\n }\n Ok(deleted)\n}\n```\n\nPlace this function in src/core/paths.rs (next to get_log_dir) or a new src/core/log_retention.rs. Prefer paths.rs since it's small and related.\n\nCall from main.rs:\n```rust\nlet log_dir = get_log_dir(config.logging.log_dir.as_deref());\nlet _ = cleanup_old_logs(&log_dir, config.logging.retention_days);\n// THEN init subscriber\n```\n\nNote: Config must be loaded before cleanup runs. Current main.rs parses Cli at line 60, but config loading happens inside command handlers. This means we need to either:\n A) Load config early in main() before subscriber init (preferred)\n B) Defer cleanup to after config load\n\nSince the subscriber must also know log_dir, approach A is natural: load config -> cleanup -> init subscriber -> dispatch command.\n\n## Acceptance Criteria\n- [ ] Files matching lore.*.log older than retention_days are deleted\n- [ ] Files matching lore.*.log within retention_days are preserved\n- [ ] Non-matching files (e.g., other.txt) are never deleted\n- [ ] retention_days=0 skips cleanup entirely (no files deleted)\n- [ ] Errors on individual files don't prevent cleanup of remaining files\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/paths.rs (add cleanup_old_logs function)\n- src/main.rs (call cleanup before subscriber init)\n\n## TDD Loop\nRED:\n - test_log_retention_cleanup: create tempdir with lore.2026-01-01.log through lore.2026-02-04.log, run with retention_days=7, assert old deleted, recent preserved\n - test_log_retention_ignores_non_log_files: create other.txt alongside old log files, assert other.txt untouched\n - test_log_retention_zero_days: retention_days=0, assert nothing deleted\nGREEN: Implement cleanup_old_logs\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- SystemTime::now() precision varies by OS; use file modified time, not name parsing (simpler and more reliable)\n- read_dir on non-existent directory: get_log_dir creates it first, so this shouldn't happen. But handle gracefully.\n- Permissions error on individual file: log a warning, continue with remaining files (don't propagate)\n- Race condition: another process creates a file during cleanup. Not a concern -- we only delete old files.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-04T15:53:55.627901Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:15:04.452086Z","closed_at":"2026-02-04T17:15:04.452039Z","close_reason":"Implemented cleanup_old_logs() with date-pattern matching and retention_days config, runs at startup before subscriber init","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-32mc","depends_on_id":"bd-17n","type":"blocks","created_at":"2026-02-04T15:55:19.523048Z","created_by":"tayloreernisse"},{"issue_id":"bd-32mc","depends_on_id":"bd-1k4","type":"blocks","created_at":"2026-02-04T15:55:19.583155Z","created_by":"tayloreernisse"},{"issue_id":"bd-32mc","depends_on_id":"bd-2nx","type":"parent-child","created_at":"2026-02-04T15:53:55.628795Z","created_by":"tayloreernisse"}]} {"id":"bd-32q","title":"Implement timeline seed phase: FTS5 keyword search to entity IDs","description":"## Background\n\nThe seed phase is steps 1-2 of the timeline pipeline (spec Section 3.2): SEED + HYDRATE. It converts a keyword query into entity IDs via FTS5 search and collects evidence note candidates.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 steps 1-2.\n\n## Codebase Context\n\n- FTS5 index exists: documents_fts table (migration 008)\n- documents table: id, source_type ('issue'|'merge_request'|'discussion'), source_id, project_id, created_at, content\n- discussions table: id, issue_id, merge_request_id\n- notes table: discussion_id, author_username, body, created_at, is_system, id (note_id)\n- Safe FTS query builder: src/search/fts.rs has to_fts_query(raw, FtsQueryMode::Safe) for sanitizing user input\n- projects table: path_with_namespace\n- issues/merge_requests: iid, project_id\n\n## Approach\n\nCreate `src/core/timeline_seed.rs`:\n\n```rust\nuse crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType};\nuse rusqlite::Connection;\n\npub struct SeedResult {\n pub seed_entities: Vec,\n pub evidence_notes: Vec, // NoteEvidence events\n}\n\npub fn seed_timeline(\n conn: &Connection,\n query: &str,\n project_id: Option,\n since_ms: Option,\n max_seeds: usize, // default 50\n) -> Result { ... }\n```\n\n### SQL for SEED + HYDRATE (entity discovery):\n```sql\nSELECT DISTINCT d.source_type, d.source_id, d.project_id,\n CASE d.source_type\n WHEN 'issue' THEN (SELECT iid FROM issues WHERE id = d.source_id)\n WHEN 'merge_request' THEN (SELECT iid FROM merge_requests WHERE id = d.source_id)\n WHEN 'discussion' THEN NULL -- discussions map to parent entity below\n END AS iid,\n CASE d.source_type\n WHEN 'issue' THEN (SELECT p.path_with_namespace FROM projects p JOIN issues i ON i.project_id = p.id WHERE i.id = d.source_id)\n WHEN 'merge_request' THEN (SELECT p.path_with_namespace FROM projects p JOIN merge_requests m ON m.project_id = p.id WHERE m.id = d.source_id)\n WHEN 'discussion' THEN NULL\n END AS project_path\nFROM documents_fts fts\nJOIN documents d ON d.id = fts.rowid\nWHERE documents_fts MATCH ?1\n AND (?2 IS NULL OR d.project_id = ?2)\nORDER BY rank\nLIMIT ?3\n```\n\nFor 'discussion' source_type: resolve to parent entity via discussions.issue_id or discussions.merge_request_id.\n\n### SQL for evidence notes (top 10 FTS5-matched notes):\n```sql\nSELECT n.id as note_id, n.body, n.created_at, n.author_username,\n disc.id as discussion_id,\n CASE WHEN disc.issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END as parent_type,\n COALESCE(disc.issue_id, disc.merge_request_id) AS parent_entity_id\nFROM documents_fts fts\nJOIN documents d ON d.id = fts.rowid\nJOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'\nJOIN notes n ON n.discussion_id = disc.id AND n.is_system = 0\nWHERE documents_fts MATCH ?1\nORDER BY rank\nLIMIT 10\n```\n\nEvidence notes become TimelineEvent with:\n- event_type: NoteEvidence { note_id, snippet (first 200 chars), discussion_id }\n- Use to_fts_query(query, FtsQueryMode::Safe) to sanitize user input before MATCH\n\nRegister in `src/core/mod.rs`: `pub mod timeline_seed;`\n\n## Acceptance Criteria\n\n- [ ] seed_timeline() returns entities from FTS5 search\n- [ ] Entities deduplicated (same entity from multiple docs appears once)\n- [ ] Discussion documents resolved to parent entity (issue or MR)\n- [ ] Evidence notes capped at 10\n- [ ] Evidence note snippets truncated to 200 chars (safe UTF-8 boundary)\n- [ ] Uses to_fts_query(query, FtsQueryMode::Safe) for input sanitization\n- [ ] --since filter works\n- [ ] -p filter works\n- [ ] Empty result for zero-match queries (not error)\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_seed.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_seed;`)\n\n## TDD Loop\n\nRED:\n- `test_seed_deduplicates_entities`\n- `test_seed_resolves_discussion_to_parent`\n- `test_seed_empty_query_returns_empty`\n- `test_seed_evidence_capped_at_10`\n- `test_seed_evidence_snippet_truncated`\n- `test_seed_respects_since_filter`\n\nTests need in-memory DB with migrations 001-014 + documents/FTS test data.\n\nGREEN: Implement FTS5 queries and deduplication.\n\nVERIFY: `cargo test --lib -- timeline_seed`\n\n## Edge Cases\n\n- FTS5 MATCH invalid syntax: to_fts_query(query, FtsQueryMode::Safe) sanitizes\n- Discussion orphans: LEFT JOIN handles deleted notes\n- UTF-8 truncation: use char_indices() to find safe 200-char boundary\n- Discussion source resolving to both issue_id and merge_request_id: prefer issue_id (shouldn't happen but be defensive)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.615908Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:47:07.966488Z","closed_at":"2026-02-05T21:47:07.966437Z","close_reason":"Completed: Created src/core/timeline_seed.rs with seed_timeline() function. FTS5 search to entity IDs with discussion-to-parent resolution, entity deduplication, evidence note extraction (capped, snippet-truncated). 12 tests pass. All quality gates pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-32q","depends_on_id":"bd-20e","type":"blocks","created_at":"2026-02-02T21:33:37.368005Z","created_by":"tayloreernisse"},{"issue_id":"bd-32q","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.617483Z","created_by":"tayloreernisse"}]} {"id":"bd-335","title":"Implement Ollama API client","description":"## Background\nThe Ollama API client provides the HTTP interface to the local Ollama embedding server. It handles health checks (is Ollama running? does the model exist?), batch embedding requests (up to 32 texts per call), and error translation to LoreError variants. This is the lowest-level embedding component — the pipeline (bd-am7) builds on top of it.\n\n## Approach\nCreate \\`src/embedding/ollama.rs\\` per PRD Section 4.2. **Uses async reqwest (not blocking).**\n\n```rust\nuse reqwest::Client; // NOTE: async Client, not reqwest::blocking\nuse serde::{Deserialize, Serialize};\nuse crate::core::error::{LoreError, Result};\n\npub struct OllamaConfig {\n pub base_url: String, // default \\\"http://localhost:11434\\\"\n pub model: String, // default \\\"nomic-embed-text\\\"\n pub timeout_secs: u64, // default 60\n}\n\nimpl Default for OllamaConfig { /* PRD defaults */ }\n\npub struct OllamaClient {\n client: Client, // async reqwest::Client\n config: OllamaConfig,\n}\n\n#[derive(Serialize)]\nstruct EmbedRequest { model: String, input: Vec }\n\n#[derive(Deserialize)]\nstruct EmbedResponse { model: String, embeddings: Vec> }\n\n#[derive(Deserialize)]\nstruct TagsResponse { models: Vec }\n\n#[derive(Deserialize)]\nstruct ModelInfo { name: String }\n\nimpl OllamaClient {\n pub fn new(config: OllamaConfig) -> Self;\n\n /// Async health check: GET /api/tags\n /// Model matched via starts_with (\\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\")\n pub async fn health_check(&self) -> Result<()>;\n\n /// Async batch embedding: POST /api/embed\n /// Input: Vec of texts, Response: Vec> of 768-dim embeddings\n pub async fn embed_batch(&self, texts: Vec) -> Result>>;\n}\n\n/// Quick health check without full client (async).\npub async fn check_ollama_health(base_url: &str) -> bool;\n```\n\n**Error mapping (per PRD):**\n- Connection refused/timeout -> LoreError::OllamaUnavailable { base_url, source: Some(e) }\n- Model not in /api/tags -> LoreError::OllamaModelNotFound { model }\n- Non-200 from /api/embed -> LoreError::EmbeddingFailed { document_id: 0, reason: format!(\\\"HTTP {}: {}\\\", status, body) }\n\n**Key PRD detail:** Model matching uses \\`starts_with\\` (not exact match) so \\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\".\n\n## Acceptance Criteria\n- [ ] Uses async reqwest::Client (not blocking)\n- [ ] health_check() is async, detects server availability and model presence\n- [ ] Model matched via starts_with (handles \\\":latest\\\" suffix)\n- [ ] embed_batch() is async, sends POST /api/embed\n- [ ] Batch size up to 32 texts\n- [ ] Returns Vec> with 768 dimensions each\n- [ ] OllamaUnavailable error includes base_url and source error\n- [ ] OllamaModelNotFound error includes model name\n- [ ] Non-200 response mapped to EmbeddingFailed with status + body\n- [ ] Timeout: 60 seconds default (configurable via OllamaConfig)\n- [ ] \\`cargo build\\` succeeds\n\n## Files\n- \\`src/embedding/ollama.rs\\` — new file\n- \\`src/embedding/mod.rs\\` — add \\`pub mod ollama;\\` and re-exports\n\n## TDD Loop\nRED: Tests (unit tests with mock, integration needs Ollama):\n- \\`test_config_defaults\\` — verify default base_url, model, timeout\n- \\`test_health_check_model_starts_with\\` — \\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\"\n- \\`test_embed_batch_parse\\` — mock response parsed correctly\n- \\`test_connection_error_maps_to_ollama_unavailable\\`\nGREEN: Implement OllamaClient\nVERIFY: \\`cargo test ollama\\`\n\n## Edge Cases\n- Ollama returns model name with version tag (\\\"nomic-embed-text:latest\\\"): starts_with handles this\n- Empty texts array: send empty batch, Ollama returns empty embeddings\n- Ollama returns wrong number of embeddings (2 texts, 1 embedding): caller (pipeline) validates\n- Non-JSON response: reqwest deserialization error -> wrap appropriately","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:26:34.025099Z","created_by":"tayloreernisse","updated_at":"2026-01-30T16:58:17.546852Z","closed_at":"2026-01-30T16:58:17.546794Z","close_reason":"Completed: OllamaClient with async health_check (starts_with model matching), embed_batch, error mapping to LoreError variants, check_ollama_health helper, 4 tests pass","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-335","depends_on_id":"bd-ljf","type":"blocks","created_at":"2026-01-30T15:29:24.627951Z","created_by":"tayloreernisse"}]} -{"id":"bd-343o","title":"Fetch and store GitLab linked issues (Related to)","description":"## Background\n\nGitLab's \"Linked items\" provides bidirectional issue linking distinct from \"closes\" and \"mentioned\" references. This data is only available via the issue links API (GET /projects/:id/issues/:iid/links).\n\n**IMPORTANT:** This bead uses migration **017** (after bd-2y79's migration 016). Coordinate numbering.\n\n## Codebase Context\n\n- entity_references table (migration 011) with:\n - reference_type CHECK: 'closes' | 'mentioned' | 'related'\n - source_method CHECK: 'api' | 'note_parse' | 'description_parse'\n- pending_dependent_fetches: job_type CHECK 'resource_events' | 'mr_closes_issues' | 'mr_diffs'\n- **CRITICAL:** Adding 'issue_links' to job_type CHECK requires recreating pending_dependent_fetches table (SQLite can't ALTER CHECK constraints). Migration 017 must copy data, drop, recreate with expanded CHECK, and reinsert.\n- Orchestrator pattern: enqueue_job() + drain loop with claim/complete/fail (src/ingestion/orchestrator.rs)\n- dependent_queue.rs: enqueue_job(), claim_jobs(), complete_job(), fail_job()\n- GitLab issue links API returns link_type: \"relates_to\", \"blocks\", \"is_blocked_by\"\n- entity_references reference_type only has 'closes', 'mentioned', 'related' — \"blocks\"/\"is_blocked_by\" not modeled. Store all as 'related' with link_type in a JSON payload_json field or as a separate column in a future migration.\n\n## Approach\n\n### Phase 1: API Client (src/gitlab/client.rs)\n```rust\npub async fn fetch_issue_links(\n &self,\n project_id: i64,\n issue_iid: i64,\n) -> Result> {\n // GET /projects/:id/issues/:iid/links\n // Use fetch_all_pages() + coalesce_not_found()\n}\n```\n\n### Phase 2: Types (src/gitlab/types.rs)\n```rust\n#[derive(Debug, Deserialize)]\npub struct GitLabIssueLink {\n pub id: i64,\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub web_url: String,\n pub link_type: String, // \"relates_to\", \"blocks\", \"is_blocked_by\"\n pub link_created_at: Option,\n}\n```\n\n### Phase 3: Migration 017 (migrations/017_issue_links_job_type.sql)\nRecreate pending_dependent_fetches with expanded CHECK:\n```sql\nCREATE TABLE pending_dependent_fetches_new (\n id INTEGER PRIMARY KEY,\n project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,\n entity_type TEXT NOT NULL CHECK (entity_type IN ('issue', 'merge_request')),\n entity_iid INTEGER NOT NULL,\n entity_local_id INTEGER NOT NULL,\n job_type TEXT NOT NULL CHECK (job_type IN (\n 'resource_events', 'mr_closes_issues', 'mr_diffs', 'issue_links'\n )),\n payload_json TEXT,\n enqueued_at INTEGER NOT NULL,\n attempts INTEGER NOT NULL DEFAULT 0,\n last_error TEXT,\n next_retry_at INTEGER,\n locked_at INTEGER,\n UNIQUE(project_id, entity_type, entity_iid, job_type)\n);\nINSERT INTO pending_dependent_fetches_new SELECT * FROM pending_dependent_fetches;\nDROP TABLE pending_dependent_fetches;\nALTER TABLE pending_dependent_fetches_new RENAME TO pending_dependent_fetches;\n-- Recreate indexes from migration 011\n```\n\n### Phase 4: Ingestion (src/ingestion/issue_links.rs NEW)\n```rust\npub async fn fetch_and_store_issue_links(\n conn: &Connection,\n client: &GitLabClient,\n project_id: i64,\n issue_local_id: i64,\n issue_iid: i64,\n) -> Result {\n // 1. Fetch links from API\n // 2. Resolve target issue to local DB id (or store as unresolved)\n // 3. Insert into entity_references: reference_type='related', source_method='api'\n // 4. Create bidirectional refs: A->B and B->A\n // 5. Skip self-links\n}\n```\n\n### Phase 5: Queue Integration\n- Enqueue 'issue_links' job after issue ingestion in orchestrator\n- Add drain_issue_links() following drain_mr_closes_issues() pattern\n\n### Phase 6: Display\nIn `lore show issue 123`, add \"Related Issues\" section after closing MRs.\n\n## Acceptance Criteria\n\n- [ ] API client fetches issue links with pagination\n- [ ] Stored as entity_reference: reference_type='related', source_method='api'\n- [ ] Bidirectional: A links B creates both A->B and B->A references\n- [ ] link_type captured (relates_to, blocks, is_blocked_by) — stored as 'related' for now\n- [ ] Cross-project links stored as unresolved (target_entity_id NULL)\n- [ ] Self-links skipped\n- [ ] Migration 017 recreates pending_dependent_fetches with 'issue_links' in CHECK\n- [ ] `lore show issue 123` shows related issues section\n- [ ] `lore --robot show issue 123` includes related_issues in JSON\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- src/gitlab/client.rs (add fetch_issue_links)\n- src/gitlab/types.rs (add GitLabIssueLink)\n- src/ingestion/issue_links.rs (NEW)\n- src/ingestion/mod.rs (add pub mod issue_links)\n- src/ingestion/orchestrator.rs (enqueue + drain)\n- migrations/017_issue_links_job_type.sql (NEW — table recreation)\n- src/core/db.rs (add migration to MIGRATIONS array)\n- src/cli/commands/show.rs (display related issues)\n\n## TDD Loop\n\nRED:\n- test_issue_link_deserialization\n- test_store_issue_links_creates_bidirectional_references\n- test_self_link_skipped\n- test_cross_project_link_unresolved\n\nGREEN: Implement API client, ingestion, migration, display.\n\nVERIFY: cargo test --lib -- issue_links\n\n## Edge Cases\n\n- Cross-project links: target not in local DB -> unresolved reference\n- Self-links: skip\n- UNIQUE constraint prevents duplicate entity_references\n- \"blocks\"/\"is_blocked_by\" semantics not modeled in entity_references yet — store as 'related'\n- Table recreation migration: safe because pending_dependent_fetches is transient queue data\n- Migration numbering: 017 follows bd-2y79's migration 016","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-05T15:14:25.202900Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:16:28.629763Z","compaction_level":0,"original_size":0,"labels":["ISSUE"]} +{"id":"bd-343o","title":"Fetch and store GitLab linked issues (Related to)","description":"## Background\n\nGitLab's \"Linked items\" provides bidirectional issue linking distinct from \"closes\" and \"mentioned\" references. This data is only available via the issue links API (GET /projects/:id/issues/:iid/links). The goal is to fetch these links during sync and store them as entity_references so they appear in `lore show issue` and are queryable.\n\n**Why:** Currently `lore show issue` displays closing MRs (via `get_closing_mrs()` in show.rs:~line 1544) but has NO related issues section. This bead adds that capability.\n\n## Codebase Context\n\n- **entity_references table** (migration 011): reference_type CHECK: 'closes' | 'mentioned' | 'related'; source_method CHECK: 'api' | 'note_parse' | 'description_parse'\n- **pending_dependent_fetches** (migration 011): job_type CHECK: 'resource_events' | 'mr_closes_issues' | 'mr_diffs'. No later migrations modified this table.\n- **CRITICAL:** Adding 'issue_links' to job_type CHECK requires recreating pending_dependent_fetches table (SQLite can't ALTER CHECK constraints). Migration **027** must copy data, drop, recreate with expanded CHECK, and reinsert.\n- **Orchestrator** (src/ingestion/orchestrator.rs, 1745 lines): Three drain functions exist — drain_resource_events() (line 932), drain_mr_closes_issues() (line 1254), drain_mr_diffs() (line 1514). Follow the same claim/complete/fail pattern from dependent_queue.rs.\n- **dependent_queue.rs**: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- **show.rs** (1544 lines): Has get_closing_mrs() for closing MR display. NO related_issues section exists yet.\n- **GitLab API**: GET /projects/:id/issues/:iid/links returns link_type: \"relates_to\", \"blocks\", \"is_blocked_by\"\n- **Migration count**: 26 migrations exist (001-026). Next migration = **027**.\n\n## Approach\n\n### Phase 1: API Client (src/gitlab/client.rs)\n```rust\npub async fn fetch_issue_links(\n &self,\n project_id: i64,\n issue_iid: i64,\n) -> Result> {\n // GET /projects/:id/issues/:iid/links\n // Use fetch_all_pages() + coalesce_not_found()\n}\n```\n\n### Phase 2: Types (src/gitlab/types.rs)\n```rust\n#[derive(Debug, Deserialize)]\npub struct GitLabIssueLink {\n pub id: i64,\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub web_url: String,\n pub link_type: String, // \"relates_to\", \"blocks\", \"is_blocked_by\"\n pub link_created_at: Option,\n}\n```\n\n### Phase 3: Migration 027 (migrations/027_issue_links_job_type.sql)\nRecreate pending_dependent_fetches with expanded CHECK:\n```sql\nCREATE TABLE pending_dependent_fetches_new (\n id INTEGER PRIMARY KEY,\n project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,\n entity_type TEXT NOT NULL CHECK (entity_type IN ('issue', 'merge_request')),\n entity_iid INTEGER NOT NULL,\n entity_local_id INTEGER NOT NULL,\n job_type TEXT NOT NULL CHECK (job_type IN (\n 'resource_events', 'mr_closes_issues', 'mr_diffs', 'issue_links'\n )),\n payload_json TEXT,\n enqueued_at INTEGER NOT NULL,\n attempts INTEGER NOT NULL DEFAULT 0,\n last_error TEXT,\n next_retry_at INTEGER,\n locked_at INTEGER,\n UNIQUE(project_id, entity_type, entity_iid, job_type)\n);\nINSERT INTO pending_dependent_fetches_new SELECT * FROM pending_dependent_fetches;\nDROP TABLE pending_dependent_fetches;\nALTER TABLE pending_dependent_fetches_new RENAME TO pending_dependent_fetches;\n-- Recreate indexes from migration 011 (idx_pdf_job_type, idx_pdf_next_retry)\n```\n\nRegister in MIGRATIONS array in src/core/db.rs (entry 27).\n\n### Phase 4: Ingestion (src/ingestion/issue_links.rs NEW)\n```rust\npub async fn fetch_and_store_issue_links(\n conn: &Connection,\n client: &GitLabClient,\n project_id: i64,\n issue_local_id: i64,\n issue_iid: i64,\n) -> Result {\n // 1. Fetch links from API\n // 2. Resolve target issue to local DB id (SELECT id FROM issues WHERE project_id=? AND iid=?)\n // 3. Insert into entity_references: reference_type='related', source_method='api'\n // 4. Create bidirectional refs: A->B and B->A\n // 5. Skip self-links\n // 6. Cross-project: store with target_entity_id=NULL (unresolved)\n}\n```\n\n### Phase 5: Queue Integration (src/ingestion/orchestrator.rs)\n- Enqueue 'issue_links' job after issue ingestion (near the existing resource_events enqueue)\n- Add drain_issue_links() following drain_mr_closes_issues() pattern (lines 1254-1512)\n- Config gate: add `sync.fetchIssueLinks` (default true) to config, like existing `sync.fetchResourceEvents`\n\n### Phase 6: Display (src/cli/commands/show.rs)\nIn `lore show issue 123`, add \"Related Issues\" section after closing MRs.\nPattern: query entity_references WHERE source_entity_type='issue' AND source_entity_id= AND reference_type='related'.\n\n## Acceptance Criteria\n\n- [ ] API client fetches issue links with pagination (fetch_all_pages + coalesce_not_found)\n- [ ] Stored as entity_reference: reference_type='related', source_method='api'\n- [ ] Bidirectional: A links B creates both A->B and B->A references\n- [ ] link_type captured (relates_to, blocks, is_blocked_by) — stored as 'related' for now\n- [ ] Cross-project links stored as unresolved (target_entity_id NULL)\n- [ ] Self-links skipped\n- [ ] Migration **027** recreates pending_dependent_fetches with 'issue_links' in CHECK\n- [ ] Migration registered in MIGRATIONS array in src/core/db.rs\n- [ ] `lore show issue 123` shows related issues section\n- [ ] `lore --robot show issue 123` includes related_issues in JSON\n- [ ] Config gate: sync.fetchIssueLinks (default true, camelCase serde rename)\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n- [ ] `cargo fmt --check` passes\n\n## Files\n\n- MODIFY: src/gitlab/client.rs (add fetch_issue_links)\n- MODIFY: src/gitlab/types.rs (add GitLabIssueLink)\n- CREATE: src/ingestion/issue_links.rs\n- MODIFY: src/ingestion/mod.rs (add pub mod issue_links)\n- MODIFY: src/ingestion/orchestrator.rs (enqueue + drain_issue_links)\n- CREATE: migrations/027_issue_links_job_type.sql\n- MODIFY: src/core/db.rs (add migration 027 to MIGRATIONS array)\n- MODIFY: src/core/config.rs (add sync.fetchIssueLinks)\n- MODIFY: src/cli/commands/show.rs (display related issues)\n\n## TDD Anchor\n\nRED:\n- test_issue_link_deserialization (types.rs: deserialize GitLabIssueLink from JSON)\n- test_store_issue_links_creates_bidirectional_references (in-memory DB, insert 2 issues, store link, verify 2 rows in entity_references)\n- test_self_link_skipped (same issue_iid both sides, verify 0 rows)\n- test_cross_project_link_unresolved (target not in DB, verify target_entity_id IS NULL)\n\nGREEN: Implement API client, ingestion, migration, display.\n\nVERIFY: cargo test --lib -- issue_links\n\n## Edge Cases\n\n- Cross-project links: target not in local DB -> unresolved reference (target_entity_id NULL)\n- Self-links: skip entirely\n- UNIQUE constraint on entity_references prevents duplicate refs on re-sync\n- \"blocks\"/\"is_blocked_by\" semantics not modeled in entity_references yet — store as 'related'\n- Table recreation migration: safe because pending_dependent_fetches is transient queue data that gets re-enqueued on next sync\n- Recreated table must restore indexes: idx_pdf_job_type, idx_pdf_next_retry (check migration 011 for exact definitions)\n\n## Dependency Context\n\n- **entity_references** (migration 011): provides the target table. reference_type='related' already in CHECK.\n- **dependent_queue.rs**: provides enqueue_job/claim_jobs/complete_job/fail_job lifecycle used by drain_issue_links()\n- **orchestrator drain pattern**: drain_mr_closes_issues() (line 1254) is the closest template — fetch API data, insert entity_references, complete job","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-05T15:14:25.202900Z","created_by":"tayloreernisse","updated_at":"2026-02-17T16:50:44.934373Z","compaction_level":0,"original_size":0,"labels":["ISSUE"]} {"id":"bd-34ek","title":"OBSERV: Implement MetricsLayer custom tracing subscriber layer","description":"## Background\nMetricsLayer is a custom tracing subscriber layer that records span timing and structured fields, then materializes them into Vec. This avoids threading a mutable collector through every function signature -- spans are the single source of truth.\n\n## Approach\nAdd to src/core/metrics.rs (same file as StageTiming):\n\n```rust\nuse std::collections::HashMap;\nuse std::sync::{Arc, Mutex};\nuse std::time::Instant;\nuse tracing::span::{Attributes, Id, Record};\nuse tracing::Subscriber;\nuse tracing_subscriber::layer::{Context, Layer};\nuse tracing_subscriber::registry::LookupSpan;\n\n#[derive(Debug)]\nstruct SpanData {\n name: String,\n parent_id: Option,\n start: Instant,\n fields: HashMap,\n}\n\n#[derive(Debug, Clone)]\npub struct MetricsLayer {\n spans: Arc>>,\n completed: Arc>>,\n}\n\nimpl MetricsLayer {\n pub fn new() -> Self {\n Self {\n spans: Arc::new(Mutex::new(HashMap::new())),\n completed: Arc::new(Mutex::new(Vec::new())),\n }\n }\n\n /// Extract timing tree for a completed run.\n /// Call this after the root span closes.\n pub fn extract_timings(&self) -> Vec {\n let completed = self.completed.lock().unwrap();\n // Build tree: find root entries (no parent), attach children\n // ... tree construction logic\n }\n}\n\nimpl Layer for MetricsLayer\nwhere\n S: Subscriber + for<'a> LookupSpan<'a>,\n{\n fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, ctx: Context<'_, S>) {\n let parent_id = ctx.span(id).and_then(|s| s.parent().map(|p| p.id()));\n let mut fields = HashMap::new();\n // Visit attrs to capture initial field values\n let mut visitor = FieldVisitor(&mut fields);\n attrs.record(&mut visitor);\n\n self.spans.lock().unwrap().insert(id.into_u64(), SpanData {\n name: attrs.metadata().name().to_string(),\n parent_id,\n start: Instant::now(),\n fields,\n });\n }\n\n fn on_record(&self, id: &Id, values: &Record<'_>, _ctx: Context<'_, S>) {\n // Capture recorded fields (items_processed, items_skipped, errors)\n if let Some(data) = self.spans.lock().unwrap().get_mut(&id.into_u64()) {\n let mut visitor = FieldVisitor(&mut data.fields);\n values.record(&mut visitor);\n }\n }\n\n fn on_close(&self, id: Id, _ctx: Context<'_, S>) {\n if let Some(data) = self.spans.lock().unwrap().remove(&id.into_u64()) {\n let elapsed = data.start.elapsed();\n let timing = StageTiming {\n name: data.name,\n project: data.fields.get(\"project\").and_then(|v| v.as_str()).map(String::from),\n elapsed_ms: elapsed.as_millis() as u64,\n items_processed: data.fields.get(\"items_processed\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n items_skipped: data.fields.get(\"items_skipped\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n errors: data.fields.get(\"errors\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n sub_stages: vec![], // Will be populated during extract_timings tree construction\n };\n self.completed.lock().unwrap().push((id.into_u64(), timing));\n }\n }\n}\n```\n\nNeed a FieldVisitor struct implementing tracing::field::Visit to capture field values.\n\nRegister in subscriber stack (src/main.rs), alongside stderr and file layers:\n```rust\nlet metrics_layer = MetricsLayer::new();\nlet metrics_handle = metrics_layer.clone(); // Clone Arc for later extraction\n\nregistry()\n .with(stderr_layer.with_filter(stderr_filter))\n .with(file_layer.with_filter(file_filter))\n .with(metrics_layer) // No filter -- captures all spans\n .init();\n```\n\nPass metrics_handle to command handlers so they can call extract_timings() after the pipeline completes.\n\n## Acceptance Criteria\n- [ ] MetricsLayer captures span enter/close timing\n- [ ] on_record captures items_processed, items_skipped, errors fields\n- [ ] extract_timings() returns correctly nested Vec tree\n- [ ] Parallel spans (multiple projects) both appear as sub_stages of parent\n- [ ] Thread-safe: Arc> allows concurrent span operations\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/metrics.rs (add MetricsLayer, FieldVisitor, tree construction)\n- src/main.rs (register MetricsLayer in subscriber stack)\n\n## TDD Loop\nRED:\n - test_metrics_layer_single_span: enter/exit one span, extract, assert one StageTiming\n - test_metrics_layer_nested_spans: parent + child, assert child in parent.sub_stages\n - test_metrics_layer_parallel_spans: two sibling spans, assert both in parent.sub_stages\n - test_metrics_layer_field_recording: record items_processed=42, assert captured\nGREEN: Implement MetricsLayer with on_new_span, on_record, on_close, extract_timings\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- Span ID reuse: tracing may reuse span IDs after close. Using remove on close prevents stale data.\n- Lock contention: Mutex per operation. For high-span-count scenarios, consider parking_lot::Mutex. But lore's span count is low (<100 per run), so std::sync::Mutex is fine.\n- extract_timings tree construction: iterate completed Vec, build parent->children map, then recursively construct StageTiming tree. Root entries have parent_id matching the root span or None.\n- MetricsLayer has no filter: it sees ALL spans. To avoid noise from dependency spans, check if span name starts with known stage names, or rely on the \"stage\" field being present.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-04T15:54:31.960669Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:25:25.523811Z","closed_at":"2026-02-04T17:25:25.523730Z","close_reason":"Implemented MetricsLayer custom tracing subscriber layer with span timing capture, rate-limit/retry event detection, tree extraction, and 12 unit tests","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-34ek","depends_on_id":"bd-1o4h","type":"blocks","created_at":"2026-02-04T15:55:19.851554Z","created_by":"tayloreernisse"},{"issue_id":"bd-34ek","depends_on_id":"bd-24j1","type":"blocks","created_at":"2026-02-04T15:55:19.905554Z","created_by":"tayloreernisse"},{"issue_id":"bd-34ek","depends_on_id":"bd-3er","type":"parent-child","created_at":"2026-02-04T15:54:31.961646Z","created_by":"tayloreernisse"}]} {"id":"bd-34o","title":"Implement MR transformer","description":"## Background\nTransforms GitLab MR API responses into normalized schema for database storage. Handles deprecated field fallbacks and extracts metadata (labels, assignees, reviewers).\n\n## Approach\nCreate new transformer module following existing issue transformer pattern:\n- `NormalizedMergeRequest` - Database-ready struct\n- `MergeRequestWithMetadata` - MR + extracted labels/assignees/reviewers\n- `transform_merge_request()` - Main transformation function\n- `extract_labels()` - Label extraction helper\n\n## Files\n- `src/gitlab/transformers/merge_request.rs` - New transformer module\n- `src/gitlab/transformers/mod.rs` - Export new module\n- `tests/mr_transformer_tests.rs` - Unit tests\n\n## Acceptance Criteria\n- [ ] `NormalizedMergeRequest` struct exists with all DB columns\n- [ ] `MergeRequestWithMetadata` contains MR + label_names + assignee_usernames + reviewer_usernames\n- [ ] `transform_merge_request()` returns `Result`\n- [ ] `draft` computed as `gitlab_mr.draft || gitlab_mr.work_in_progress`\n- [ ] `detailed_merge_status` prefers `detailed_merge_status` over `merge_status_legacy`\n- [ ] `merge_user_username` prefers `merge_user` over `merged_by`\n- [ ] `head_sha` extracted from `sha` field\n- [ ] `references_short` and `references_full` extracted from `references` Option\n- [ ] Timestamps parsed with `iso_to_ms()`, errors returned (not zeroed)\n- [ ] `last_seen_at` set to `now_ms()`\n- [ ] `cargo test mr_transformer` passes\n\n## TDD Loop\nRED: `cargo test mr_transformer` -> module not found\nGREEN: Add transformer with all fields\nVERIFY: `cargo test mr_transformer`\n\n## Struct Definitions\n```rust\n#[derive(Debug, Clone)]\npub struct NormalizedMergeRequest {\n pub gitlab_id: i64,\n pub project_id: i64,\n pub iid: i64,\n pub title: String,\n pub description: Option,\n pub state: String,\n pub draft: bool,\n pub author_username: String,\n pub source_branch: String,\n pub target_branch: String,\n pub head_sha: Option,\n pub references_short: Option,\n pub references_full: Option,\n pub detailed_merge_status: Option,\n pub merge_user_username: Option,\n pub created_at: i64,\n pub updated_at: i64,\n pub merged_at: Option,\n pub closed_at: Option,\n pub last_seen_at: i64,\n pub web_url: String,\n}\n\n#[derive(Debug, Clone)]\npub struct MergeRequestWithMetadata {\n pub merge_request: NormalizedMergeRequest,\n pub label_names: Vec,\n pub assignee_usernames: Vec,\n pub reviewer_usernames: Vec,\n}\n```\n\n## Function Signature\n```rust\npub fn transform_merge_request(\n gitlab_mr: &GitLabMergeRequest,\n local_project_id: i64,\n) -> Result\n```\n\n## Key Logic\n```rust\n// Draft: prefer draft, fallback to work_in_progress\nlet is_draft = gitlab_mr.draft || gitlab_mr.work_in_progress;\n\n// Merge status: prefer detailed_merge_status\nlet detailed_merge_status = gitlab_mr.detailed_merge_status\n .clone()\n .or_else(|| gitlab_mr.merge_status_legacy.clone());\n\n// Merge user: prefer merge_user\nlet merge_user_username = gitlab_mr.merge_user\n .as_ref()\n .map(|u| u.username.clone())\n .or_else(|| gitlab_mr.merged_by.as_ref().map(|u| u.username.clone()));\n\n// References extraction\nlet (references_short, references_full) = gitlab_mr.references\n .as_ref()\n .map(|r| (Some(r.short.clone()), Some(r.full.clone())))\n .unwrap_or((None, None));\n\n// Head SHA\nlet head_sha = gitlab_mr.sha.clone();\n```\n\n## Edge Cases\n- Invalid timestamps should return `Err`, not zero values\n- Empty labels/assignees/reviewers should return empty Vecs, not None\n- `state` must pass through as-is (including \"locked\")","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:40.849049Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:11:48.501301Z","closed_at":"2026-01-27T00:11:48.501241Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-34o","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-01-26T22:08:54.023616Z","created_by":"tayloreernisse"},{"issue_id":"bd-34o","depends_on_id":"bd-5ta","type":"blocks","created_at":"2026-01-26T22:08:54.059646Z","created_by":"tayloreernisse"}]} {"id":"bd-34rr","title":"WHO: Migration 017 — composite indexes for query paths","description":"## Background\n\nWith 280K notes, the path/timestamp queries for lore who will degrade without composite indexes. Existing indexes cover note_type and position_new_path separately (migration 006) but not as composites aligned to the who query patterns. This is a non-breaking, additive-only migration.\n\n## Approach\n\nAdd as entry 17 (index 16) in the MIGRATIONS array in src/core/db.rs. LATEST_SCHEMA_VERSION auto-updates via MIGRATIONS.len() as i32.\n\n### Exact SQL for the migration entry:\n\n```sql\n-- Migration 017: Composite indexes for who query paths\n\n-- Expert/Overlap: DiffNote path prefix + timestamp filter.\n-- Leading with position_new_path (not note_type) because the partial index\n-- predicate already handles the constant filter.\nCREATE INDEX IF NOT EXISTS idx_notes_diffnote_path_created\n ON notes(position_new_path, created_at, project_id)\n WHERE note_type = 'DiffNote' AND is_system = 0;\n\n-- Active/Workload: discussion participation lookups.\nCREATE INDEX IF NOT EXISTS idx_notes_discussion_author\n ON notes(discussion_id, author_username)\n WHERE is_system = 0;\n\n-- Active (project-scoped): unresolved discussions by recency.\nCREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent\n ON discussions(project_id, last_note_at)\n WHERE resolvable = 1 AND resolved = 0;\n\n-- Active (global): unresolved discussions by recency (no project scope).\n-- Without this, (project_id, last_note_at) can't satisfy ORDER BY last_note_at DESC\n-- efficiently when project_id is unconstrained.\nCREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent_global\n ON discussions(last_note_at)\n WHERE resolvable = 1 AND resolved = 0;\n\n-- Workload: issue assignees by username.\nCREATE INDEX IF NOT EXISTS idx_issue_assignees_username\n ON issue_assignees(username, issue_id);\n```\n\n### Not added (already adequate):\n- merge_requests(author_username) — idx_mrs_author (migration 006)\n- mr_reviewers(username) — idx_mr_reviewers_username (migration 006)\n- notes(discussion_id) — idx_notes_discussion (migration 002)\n\n## Files\n\n- `src/core/db.rs` — append to MIGRATIONS array as entry index 16\n\n## TDD Loop\n\nRED: `cargo test -- test_migration` (existing migration tests should still pass)\nGREEN: Add the migration SQL string to the array\nVERIFY: `cargo test && cargo check --all-targets`\n\n## Acceptance Criteria\n\n- [ ] MIGRATIONS array has 17 entries (index 0-16)\n- [ ] LATEST_SCHEMA_VERSION is 17\n- [ ] cargo test passes (in-memory DB runs all migrations including 017)\n- [ ] No existing index names conflict\n\n## Edge Cases\n\n- The SQL uses CREATE INDEX IF NOT EXISTS — safe for idempotent reruns\n- Partial indexes (WHERE clause) keep index size small: ~33K of 280K notes for DiffNote index","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-08T02:39:49.397860Z","created_by":"tayloreernisse","updated_at":"2026-02-08T04:10:29.593561Z","closed_at":"2026-02-08T04:10:29.593519Z","close_reason":"Implemented by agent team: migration 017, CLI skeleton, all 5 query modes, human+robot output, 20 tests. All quality gates pass.","compaction_level":0,"original_size":0} @@ -310,6 +310,6 @@ {"id":"bd-ymd","title":"[CP1] Final validation - Gate A through D","description":"Run all tests and verify all internal gates pass.\n\n## Gate A: Issues Only (Must Pass First)\n- [ ] gi ingest --type=issues fetches all issues from configured projects\n- [ ] Issues stored with correct schema, including last_seen_at\n- [ ] Cursor-based sync is resumable (re-run fetches only new/updated)\n- [ ] Incremental cursor updates every 100 issues\n- [ ] Raw payloads stored for each issue\n- [ ] gi list issues and gi count issues work\n\n## Gate B: Labels Correct (Must Pass)\n- [ ] Labels extracted and stored (name-only)\n- [ ] Label links created correctly\n- [ ] Stale label links removed on re-sync (verified with test)\n- [ ] Label count per issue matches GitLab\n\n## Gate C: Dependent Discussion Sync (Must Pass)\n- [ ] Discussions fetched for issues with updated_at advancement\n- [ ] Notes stored with is_system flag correctly set\n- [ ] Raw payloads stored for discussions and notes\n- [ ] discussions_synced_for_updated_at watermark updated after sync\n- [ ] Unchanged issues skip discussion refetch (verified with test)\n- [ ] Bounded concurrency (dependent_concurrency respected)\n\n## Gate D: Resumability Proof (Must Pass)\n- [ ] Kill mid-run, rerun; bounded redo (cursor progress preserved)\n- [ ] No redundant discussion refetch after crash recovery\n- [ ] Single-flight lock prevents concurrent runs\n\n## Final Gate (Must Pass)\n- [ ] All unit tests pass (cargo test)\n- [ ] All integration tests pass (mocked with wiremock)\n- [ ] cargo clippy passes with no warnings\n- [ ] cargo fmt --check passes\n- [ ] Compiles with --release\n\n## Validation Commands\ncargo test\ncargo clippy -- -D warnings\ncargo fmt --check\ncargo build --release\n\nFiles: All CP1 files\nDone when: All gate criteria pass","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:59:26.795633Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:02.132613Z","deleted_at":"2026-01-25T17:02:02.132608Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-ypa","title":"Implement timeline expand phase: BFS cross-reference expansion","description":"## Background\n\nThe expand phase is step 3 of the timeline pipeline (spec Section 3.2). Starting from seed entities, it performs BFS over entity_references to discover related entities not matched by keywords.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 step 3, Section 3.5 (expanded_entities JSON).\n\n## Codebase Context\n\n- entity_references table exists (migration 011) with columns: source_entity_type, source_entity_id, target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method, created_at\n- reference_type CHECK: `'closes' | 'mentioned' | 'related'`\n- source_method CHECK: `'api' | 'note_parse' | 'description_parse'` — use these values in provenance, NOT the spec's original values\n- Indexes: idx_entity_refs_source (source_entity_type, source_entity_id), idx_entity_refs_target (target_entity_id WHERE NOT NULL)\n\n## Approach\n\nCreate `src/core/timeline_expand.rs`:\n\n```rust\nuse std::collections::{HashSet, VecDeque};\nuse rusqlite::Connection;\nuse crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef};\n\npub struct ExpandResult {\n pub expanded_entities: Vec,\n pub unresolved_references: Vec,\n}\n\npub fn expand_timeline(\n conn: &Connection,\n seeds: &[EntityRef],\n depth: u32, // 0=no expansion, 1=default, 2+=deep\n include_mentions: bool, // --expand-mentions flag\n max_entities: usize, // cap at 100 to prevent explosion\n) -> Result { ... }\n```\n\n### BFS Algorithm\n\n```\nvisited: HashSet<(String, i64)> = seeds as set (entity_type, entity_id)\nqueue: VecDeque<(EntityRef, u32)> for multi-hop\n\nFor each seed:\n query_neighbors(conn, seed, edge_types) -> outgoing + incoming refs\n - Outgoing: SELECT target_* FROM entity_references WHERE source_entity_type=? AND source_entity_id=? AND reference_type IN (...)\n - Incoming: SELECT source_* FROM entity_references WHERE target_entity_type=? AND target_entity_id=? AND reference_type IN (...)\n - Unresolved (target_entity_id IS NULL): collect in UnresolvedRef, don't traverse\n - New resolved: add to expanded with provenance (via_from, via_reference_type, via_source_method)\n - If current_depth < depth: enqueue for further BFS\n```\n\n### Edge Type Filtering\n\n```rust\nfn edge_types(include_mentions: bool) -> Vec<&'static str> {\n if include_mentions {\n vec![\"closes\", \"related\", \"mentioned\"]\n } else {\n vec![\"closes\", \"related\"]\n }\n}\n```\n\n### Provenance (Critical for spec compliance)\n\nEach expanded entity needs via object per spec Section 3.5:\n- via_from: EntityRef of the entity that referenced this one\n- via_reference_type: from entity_references.reference_type column\n- via_source_method: from entity_references.source_method column (**codebase values: 'api', 'note_parse', 'description_parse'**)\n\nRegister in `src/core/mod.rs`: `pub mod timeline_expand;`\n\n## Acceptance Criteria\n\n- [ ] BFS traverses outgoing AND incoming edges in entity_references\n- [ ] Default: only \"closes\" and \"related\" edges (not \"mentioned\")\n- [ ] --expand-mentions: also traverses \"mentioned\" edges\n- [ ] depth=0: returns empty expanded list\n- [ ] max_entities cap prevents explosion (default 100)\n- [ ] Provenance: via_source_method uses codebase values (api/note_parse/description_parse), NOT spec values\n- [ ] Unresolved references (target_entity_id IS NULL) collected, not traversed\n- [ ] No duplicates: visited set by (entity_type, entity_id)\n- [ ] Self-references skipped\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_expand.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_expand;`)\n\n## TDD Loop\n\nRED: Tests in `src/core/timeline_expand.rs`:\n- `test_expand_depth_zero` - returns empty\n- `test_expand_finds_linked_entity` - seed issue -> closes -> linked MR\n- `test_expand_bidirectional` - starting from target also finds source\n- `test_expand_respects_max_entities`\n- `test_expand_skips_mentions_by_default`\n- `test_expand_includes_mentions_when_flagged`\n- `test_expand_collects_unresolved`\n- `test_expand_tracks_provenance` - verify via_source_method is 'api' not 'api_closes_issues'\n\nTests need in-memory DB with migrations 001-014 applied + entity_references test data.\n\nGREEN: Implement BFS.\n\nVERIFY: `cargo test --lib -- timeline_expand`\n\n## Edge Cases\n\n- Circular references: visited set prevents infinite loop\n- Entity referenced from multiple seeds: first-come provenance wins\n- Empty entity_references: returns empty, not error\n- Cross-project refs with NULL target_entity_id: add to unresolved","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.659381Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:49:46.868460Z","closed_at":"2026-02-05T21:49:46.868410Z","close_reason":"Completed: Created src/core/timeline_expand.rs with BFS cross-reference expansion. Bidirectional traversal, depth limiting, mention filtering, max entity cap, provenance tracking, unresolved reference collection. 10 tests pass. All quality gates pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-ypa","depends_on_id":"bd-32q","type":"blocks","created_at":"2026-02-02T21:33:37.448515Z","created_by":"tayloreernisse"},{"issue_id":"bd-ypa","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T21:33:37.528233Z","created_by":"tayloreernisse"},{"issue_id":"bd-ypa","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.661036Z","created_by":"tayloreernisse"}]} {"id":"bd-z0s","title":"[CP1] Final validation - Gate A through D","description":"Run all tests and verify all internal gates pass.\n\n## Gate A: Issues Only (Must Pass First)\n- [ ] gi ingest --type=issues fetches all issues from configured projects\n- [ ] Issues stored with correct schema, including last_seen_at\n- [ ] Cursor-based sync is resumable (re-run fetches only new/updated)\n- [ ] Incremental cursor updates every 100 issues\n- [ ] Raw payloads stored for each issue\n- [ ] gi list issues and gi count issues work\n\n## Gate B: Labels Correct (Must Pass)\n- [ ] Labels extracted and stored (name-only)\n- [ ] Label links created correctly\n- [ ] **Stale label links removed on re-sync** (verified with test)\n- [ ] Label count per issue matches GitLab\n\n## Gate C: Dependent Discussion Sync (Must Pass)\n- [ ] Discussions fetched for issues with updated_at advancement\n- [ ] Notes stored with is_system flag correctly set\n- [ ] Raw payloads stored for discussions and notes\n- [ ] discussions_synced_for_updated_at watermark updated after sync\n- [ ] **Unchanged issues skip discussion refetch** (verified with test)\n- [ ] Bounded concurrency (dependent_concurrency respected)\n\n## Gate D: Resumability Proof (Must Pass)\n- [ ] Kill mid-run, rerun; bounded redo (cursor progress preserved)\n- [ ] No redundant discussion refetch after crash recovery\n- [ ] Single-flight lock prevents concurrent runs\n\n## Final Gate (Must Pass)\n- [ ] All unit tests pass (cargo test)\n- [ ] All integration tests pass (mocked with wiremock)\n- [ ] cargo clippy passes with no warnings\n- [ ] cargo fmt --check passes\n- [ ] Compiles with --release\n\n## Validation Commands\ncargo test\ncargo clippy -- -D warnings\ncargo fmt --check\ncargo build --release\n\n## Data Integrity Checks\n- SELECT COUNT(*) FROM issues matches GitLab issue count\n- Every issue has a raw_payloads row\n- Every discussion has a raw_payloads row\n- Labels in issue_labels junction all exist in labels table\n- Re-running gi ingest --type=issues fetches 0 new items\n- After removing a label in GitLab and re-syncing, the link is removed\n\nFiles: All CP1 files\nDone when: All gate criteria pass","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.459095Z","created_by":"tayloreernisse","updated_at":"2026-01-25T23:27:09.567537Z","closed_at":"2026-01-25T23:27:09.567478Z","close_reason":"All gates pass: 71 tests, clippy clean, fmt clean, release build successful","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-z0s","depends_on_id":"bd-17v","type":"blocks","created_at":"2026-01-25T17:04:05.889114Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-2f0","type":"blocks","created_at":"2026-01-25T17:04:05.841210Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-39w","type":"blocks","created_at":"2026-01-25T17:04:05.913316Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-3n1","type":"blocks","created_at":"2026-01-25T17:04:05.817830Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-o7b","type":"blocks","created_at":"2026-01-25T17:04:05.864480Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-v6i","type":"blocks","created_at":"2026-01-25T17:04:05.794555Z","created_by":"tayloreernisse"}]} -{"id":"bd-z94","title":"Implement 'lore file-history' command with human and robot output","description":"## Background\n\nThe file-history command is Gate 4's user-facing CLI. It answers 'which MRs touched this file, and why?'\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.4-4.5.\n\n## Codebase Context\n\n- CLI pattern: Commands enum in src/cli/mod.rs, handler in src/main.rs, output in src/cli/commands/\n- Project resolution: resolve_project() returns project_id or exit 18 (Ambiguous)\n- Robot mode: {ok, data, meta} envelope pattern\n- merge_requests.merged_at exists (migration 006) — order by COALESCE(merged_at, updated_at) DESC\n- discussions table: issue_id, merge_request_id\n- notes table: position_new_path for DiffNotes (used for --discussions flag)\n- mr_file_changes table: migration 016 (bd-1oo)\n- resolve_rename_chain() from bd-1yx (src/core/file_history.rs) for rename handling\n- VALID_COMMANDS array in src/main.rs (line ~448)\n\n## Approach\n\n### 1. FileHistoryArgs subcommand (`src/cli/mod.rs`):\n```rust\n/// Show MRs that touched a file, with linked issues and discussions\n#[command(name = \"file-history\")]\nFileHistory(FileHistoryArgs),\n```\n\n```rust\n#[derive(Parser, Debug)]\npub struct FileHistoryArgs {\n /// File path to trace history for\n pub path: String,\n /// Scope to a specific project (fuzzy match)\n #[arg(short = 'p', long)]\n pub project: Option,\n /// Include discussion snippets from DiffNotes on this file\n #[arg(long)]\n pub discussions: bool,\n /// Disable rename chain resolution\n #[arg(long = \"no-follow-renames\")]\n pub no_follow_renames: bool,\n /// Only show merged MRs\n #[arg(long)]\n pub merged: bool,\n /// Maximum results\n #[arg(short = 'n', long = \"limit\", default_value = \"50\")]\n pub limit: usize,\n}\n```\n\n### 2. Query logic (`src/cli/commands/file_history.rs`):\n\n1. Resolve project (exit 18 on ambiguous)\n2. Call resolve_rename_chain() unless --no-follow-renames\n3. Query mr_file_changes for all resolved paths\n4. JOIN merge_requests for MR details\n5. Optionally fetch DiffNote discussions on the file\n6. Order by COALESCE(merged_at, updated_at) DESC\n7. Apply --merged filter and --limit\n\n### 3. Human output:\n```\nFile History: src/auth/oauth.rs (via 3 paths, 5 MRs)\nRename chain: src/authentication/oauth.rs -> src/auth/oauth.rs\n\n !456 \"Implement OAuth2 flow\" merged @alice 2024-01-22 modified\n !489 \"Fix OAuth token expiry\" merged @bob 2024-02-15 modified\n !512 \"Refactor auth module\" merged @carol 2024-03-01 renamed\n```\n\n### 4. Robot JSON:\n```json\n{\n \"ok\": true,\n \"data\": {\n \"path\": \"src/auth/oauth.rs\",\n \"rename_chain\": [\"src/authentication/oauth.rs\", \"src/auth/oauth.rs\"],\n \"merge_requests\": [\n {\n \"iid\": 456,\n \"title\": \"Implement OAuth2 flow\",\n \"state\": \"merged\",\n \"author\": \"alice\",\n \"merged_at\": \"2024-01-22T...\",\n \"change_type\": \"modified\",\n \"discussion_count\": 12,\n \"file_discussion_count\": 4,\n \"merge_commit_sha\": \"abc123\"\n }\n ]\n },\n \"meta\": {\n \"total_mrs\": 5,\n \"renames_followed\": true,\n \"paths_searched\": 2\n }\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `lore file-history src/foo.rs` works with human output\n- [ ] `lore --robot file-history src/foo.rs` works with JSON envelope\n- [ ] Rename chain displayed in human output when renames detected\n- [ ] Robot JSON includes rename_chain array\n- [ ] --no-follow-renames disables resolution (queries literal path only)\n- [ ] --merged filters to merged MRs only\n- [ ] --discussions includes DiffNote snippets from notes.position_new_path matching\n- [ ] -p for project scoping (exit 18 on ambiguous)\n- [ ] -n limits results\n- [ ] No MR history: friendly message (exit 0, not error)\n- [ ] \"file-history\" added to VALID_COMMANDS array\n- [ ] robot-docs manifest includes file-history command\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/cli/mod.rs` (FileHistoryArgs struct + Commands::FileHistory variant)\n- `src/cli/commands/file_history.rs` (NEW — query + human + robot output)\n- `src/cli/commands/mod.rs` (add `pub mod file_history;` + re-exports)\n- `src/main.rs` (handler dispatch + VALID_COMMANDS + robot-docs entry)\n\n## TDD Loop\n\nNo unit tests for CLI wiring. Verify with:\n\n```bash\ncargo check --all-targets\ncargo run -- file-history --help\n```\n\n## Edge Cases\n\n- File path with spaces: clap handles quoting\n- Path not in any MR: empty result, friendly message, not error\n- MRs ordered by COALESCE(merged_at, updated_at) DESC (unmerged MRs use updated_at)\n- --discussions with no DiffNotes: empty discussion section, not error\n- rename_chain omitted from robot JSON when --no-follow-renames is set\n","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:09.027259Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:57:44.467745Z","compaction_level":0,"original_size":0,"labels":["cli","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-z94","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:09.028633Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-1yx","type":"blocks","created_at":"2026-02-02T21:34:16.784122Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-2yo","type":"blocks","created_at":"2026-02-02T21:34:16.741201Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T21:34:16.824983Z","created_by":"tayloreernisse"}]} +{"id":"bd-z94","title":"Implement 'lore file-history' command with human and robot output","description":"## Background\n\nThe file-history command is Gate 4's user-facing CLI. It answers \"which MRs touched this file, and why?\"\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.4-4.5.\n\n## Codebase Context\n\n- CLI pattern: Commands enum in src/cli/mod.rs, handler in src/main.rs, output in src/cli/commands/\n- Project resolution: resolve_project() returns project_id or exit 18 (Ambiguous)\n- Robot mode: {ok, data, meta} envelope pattern\n- merge_requests.merged_at exists (migration 006) — order by COALESCE(merged_at, updated_at) DESC\n- discussions table: issue_id, merge_request_id\n- notes table: position_new_path for DiffNotes (used for --discussions flag)\n- **mr_file_changes table**: migration 016 — already exists and is populated by drain_mr_diffs() (orchestrator.rs lines 708-726, 1514+)\n- resolve_rename_chain() from bd-1yx (src/core/file_history.rs) for rename handling\n- VALID_COMMANDS array in src/main.rs (line ~448)\n- **26 migrations** exist (001-026). LATEST_SCHEMA_VERSION derived from MIGRATIONS.len().\n\n## Approach\n\n### 1. FileHistoryArgs subcommand (`src/cli/mod.rs`):\n```rust\n/// Show MRs that touched a file, with linked issues and discussions\n#[command(name = \"file-history\")]\nFileHistory(FileHistoryArgs),\n```\n\n```rust\n#[derive(Parser, Debug)]\npub struct FileHistoryArgs {\n /// File path to trace history for\n pub path: String,\n /// Scope to a specific project (fuzzy match)\n #[arg(short = 'p', long)]\n pub project: Option,\n /// Include discussion snippets from DiffNotes on this file\n #[arg(long)]\n pub discussions: bool,\n /// Disable rename chain resolution\n #[arg(long = \"no-follow-renames\")]\n pub no_follow_renames: bool,\n /// Only show merged MRs\n #[arg(long)]\n pub merged: bool,\n /// Maximum results\n #[arg(short = 'n', long = \"limit\", default_value = \"50\")]\n pub limit: usize,\n}\n```\n\n### 2. Query logic (`src/cli/commands/file_history.rs`):\n\n1. Resolve project (exit 18 on ambiguous)\n2. Call resolve_rename_chain() unless --no-follow-renames\n3. Query mr_file_changes for all resolved paths\n4. JOIN merge_requests for MR details\n5. Optionally fetch DiffNote discussions on the file (notes.position_new_path)\n6. Order by COALESCE(merged_at, updated_at) DESC\n7. Apply --merged filter and --limit\n\n### 3. Human output:\n```\nFile History: src/auth/oauth.rs (via 3 paths, 5 MRs)\nRename chain: src/authentication/oauth.rs -> src/auth/oauth.rs\n\n !456 \"Implement OAuth2 flow\" merged @alice 2024-01-22 modified\n !489 \"Fix OAuth token expiry\" merged @bob 2024-02-15 modified\n !512 \"Refactor auth module\" merged @carol 2024-03-01 renamed\n```\n\n### 4. Robot JSON:\n```json\n{\n \"ok\": true,\n \"data\": {\n \"path\": \"src/auth/oauth.rs\",\n \"rename_chain\": [\"src/authentication/oauth.rs\", \"src/auth/oauth.rs\"],\n \"merge_requests\": [\n {\n \"iid\": 456,\n \"title\": \"Implement OAuth2 flow\",\n \"state\": \"merged\",\n \"author\": \"alice\",\n \"merged_at\": \"2024-01-22T...\",\n \"change_type\": \"modified\",\n \"discussion_count\": 12,\n \"file_discussion_count\": 4,\n \"merge_commit_sha\": \"abc123\"\n }\n ]\n },\n \"meta\": {\n \"total_mrs\": 5,\n \"renames_followed\": true,\n \"paths_searched\": 2\n }\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `lore file-history src/foo.rs` works with human output\n- [ ] `lore --robot file-history src/foo.rs` works with JSON envelope\n- [ ] Rename chain displayed in human output when renames detected\n- [ ] Robot JSON includes rename_chain array\n- [ ] --no-follow-renames disables resolution (queries literal path only)\n- [ ] --merged filters to merged MRs only\n- [ ] --discussions includes DiffNote snippets from notes.position_new_path matching\n- [ ] -p for project scoping (exit 18 on ambiguous)\n- [ ] -n limits results\n- [ ] No MR history: friendly message (exit 0, not error)\n- [ ] \"file-history\" added to VALID_COMMANDS array\n- [ ] robot-docs manifest includes file-history command\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n- [ ] `cargo fmt --check` passes\n\n## Files\n\n- MODIFY: src/cli/mod.rs (FileHistoryArgs struct + Commands::FileHistory variant)\n- CREATE: src/cli/commands/file_history.rs (query + human + robot output)\n- MODIFY: src/cli/commands/mod.rs (add pub mod file_history + re-exports)\n- MODIFY: src/main.rs (handler dispatch + VALID_COMMANDS + robot-docs entry)\n\n## TDD Anchor\n\nRED: No unit tests for CLI wiring — verify with cargo check + manual run.\n\nGREEN: Implement query, human renderer, robot renderer.\n\nVERIFY:\n```bash\ncargo check --all-targets\ncargo run --release -- file-history --help\ncargo run --release -- file-history src/main.rs\ncargo run --release -- --robot file-history src/main.rs\n```\n\n## Edge Cases\n\n- File path with spaces: clap handles quoting\n- Path not in any MR: empty result, friendly message, exit 0 (not error)\n- MRs ordered by COALESCE(merged_at, updated_at) DESC (unmerged MRs use updated_at)\n- --discussions with no DiffNotes: empty discussion section, not error\n- rename_chain omitted from robot JSON when --no-follow-renames is set\n- mr_file_changes table empty (sync hasn't fetched diffs yet): friendly message suggesting `lore sync`\n\n## Dependency Context\n\n- **bd-1yx (resolve_rename_chain)**: provides resolve_rename_chain() in src/core/file_history.rs — takes a path and returns Vec of all historical paths. MUST be implemented before this bead.\n- **bd-2yo / migration 016 (mr_file_changes)**: provides the mr_file_changes table with new_path, old_path, change_type columns. Already exists and is populated by drain_mr_diffs() in orchestrator.rs (lines 708-726, 1514+).\n- **bd-3ia (closes_issues)**: provides entity_references with reference_type='closes' linking MRs to issues. Used for \"linked issues\" column if extended later.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:09.027259Z","created_by":"tayloreernisse","updated_at":"2026-02-17T17:57:21.258978Z","closed_at":"2026-02-17T17:57:21.258929Z","close_reason":"Implemented file-history command with human/robot output, rename chain resolution, DiffNote discussions, --merged/--no-follow-renames filters, autocorrect registry, robot-docs manifest","compaction_level":0,"original_size":0,"labels":["cli","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-z94","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:09.028633Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-1yx","type":"blocks","created_at":"2026-02-02T21:34:16.784122Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-2yo","type":"blocks","created_at":"2026-02-02T21:34:16.741201Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T21:34:16.824983Z","created_by":"tayloreernisse"}]} {"id":"bd-zibc","title":"WHO: VALID_COMMANDS + robot-docs manifest","description":"## Background\n\nRegister the who command in main.rs so that typo suggestions work and robot-docs manifest includes the command for agent self-discovery.\n\n## Approach\n\n### 1. VALID_COMMANDS array (~line 471 in suggest_similar_command):\nAdd \"who\" after \"timeline\":\n```rust\nconst VALID_COMMANDS: &[&str] = &[\n \"issues\", \"mrs\", /* ... existing ... */ \"timeline\", \"who\",\n];\n```\n\n### 2. robot-docs manifest (handle_robot_docs, after \"timeline\" entry):\n```json\n\"who\": {\n \"description\": \"People intelligence: experts, workload, active discussions, overlap, review patterns\",\n \"flags\": [\"\", \"--path \", \"--active\", \"--overlap \", \"--reviews\", \"--since \", \"-p/--project\", \"-n/--limit\"],\n \"modes\": {\n \"expert\": \"lore who — Who knows about this area? (also: --path for root files)\",\n \"workload\": \"lore who — What is someone working on?\",\n \"reviews\": \"lore who --reviews — Review pattern analysis\",\n \"active\": \"lore who --active — Active unresolved discussions\",\n \"overlap\": \"lore who --overlap — Who else is touching these files?\"\n },\n \"example\": \"lore --robot who src/features/auth/\",\n \"response_schema\": {\n \"ok\": \"bool\",\n \"data\": {\n \"mode\": \"string\",\n \"input\": {\"target\": \"string|null\", \"path\": \"string|null\", \"project\": \"string|null\", \"since\": \"string|null\", \"limit\": \"int\"},\n \"resolved_input\": {\"mode\": \"string\", \"project_id\": \"int|null\", \"project_path\": \"string|null\", \"since_ms\": \"int\", \"since_iso\": \"string\", \"since_mode\": \"string (default|explicit|none)\", \"limit\": \"int\"},\n \"...\": \"mode-specific fields\"\n },\n \"meta\": {\"elapsed_ms\": \"int\"}\n }\n}\n```\n\n### 3. workflows JSON — add people_intelligence:\n```json\n\"people_intelligence\": [\n \"lore --robot who src/path/to/feature/\",\n \"lore --robot who @username\",\n \"lore --robot who @username --reviews\",\n \"lore --robot who --active --since 7d\",\n \"lore --robot who --overlap src/path/\",\n \"lore --robot who --path README.md\"\n]\n```\n\n## Files\n\n- `src/main.rs`\n\n## TDD Loop\n\nVERIFY: `cargo check && cargo run --release -- robot-docs | python3 -c \"import json,sys; d=json.load(sys.stdin); assert 'who' in d['commands']\"`\n\n## Acceptance Criteria\n\n- [ ] \"who\" in VALID_COMMANDS\n- [ ] `lore robot-docs` JSON contains who command with all 5 modes\n- [ ] workflows contains people_intelligence array\n- [ ] cargo check passes\n\n## Edge Cases\n\n- The VALID_COMMANDS array is used for typo suggestion via Levenshtein distance — ensure \"who\" does not collide with other short commands (it does not; closest is \"show\" at distance 2)\n- robot-docs JSON is constructed via serde_json::json!() macro inside a raw string — ensure no trailing commas or JSON syntax errors in the manually-written JSON block\n- The response_schema in robot-docs is documentation-only (not validated at runtime) — ensure it matches actual output structure from bd-3mj2\n- If handle_robot_docs location has changed since plan was written, search for \"robot-docs\" or \"robot_docs\" in main.rs to find current location","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-08T02:41:35.098890Z","created_by":"tayloreernisse","updated_at":"2026-02-08T04:10:29.601819Z","closed_at":"2026-02-08T04:10:29.601785Z","close_reason":"Implemented by agent team: migration 017, CLI skeleton, all 5 query modes, human+robot output, 20 tests. All quality gates pass.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-zibc","depends_on_id":"bd-2rk9","type":"blocks","created_at":"2026-02-08T02:43:40.191734Z","created_by":"tayloreernisse"}]} {"id":"bd-zqpf","title":"WHO: Expert mode query (query_expert)","description":"## Background\n\nExpert mode answers \"Who should I talk to about this feature/file?\" by analyzing DiffNote activity at a given path. It scores users by a combination of review breadth (distinct MRs reviewed), authorship breadth (distinct MRs authored), and review intensity (DiffNote count). This is the primary use case for lore who.\n\n## Approach\n\nSingle CTE with two UNION ALL branches (reviewer + author), then SQL-level aggregation, scoring, sorting, and LIMIT.\n\n### Key SQL pattern (prefix variant — exact variant replaces LIKE with =):\n\n```sql\nWITH activity AS (\n -- Reviewer branch: DiffNotes on other people's MRs\n SELECT n.author_username AS username, 'reviewer' AS role,\n COUNT(DISTINCT m.id) AS mr_cnt, COUNT(*) AS note_cnt,\n MAX(n.created_at) AS last_seen_at\n FROM notes n\n JOIN discussions d ON n.discussion_id = d.id\n JOIN merge_requests m ON d.merge_request_id = m.id\n WHERE n.note_type = 'DiffNote' AND n.is_system = 0\n AND n.author_username IS NOT NULL\n AND (m.author_username IS NULL OR n.author_username != m.author_username) -- self-review exclusion\n AND m.state IN ('opened','merged')\n AND n.position_new_path LIKE ?1 ESCAPE '\\'\n AND n.created_at >= ?2\n AND (?3 IS NULL OR n.project_id = ?3)\n GROUP BY n.author_username\n\n UNION ALL\n\n -- Author branch: MR authors with DiffNote activity at this path\n SELECT m.author_username AS username, 'author' AS role,\n COUNT(DISTINCT m.id) AS mr_cnt, 0 AS note_cnt,\n MAX(n.created_at) AS last_seen_at\n FROM merge_requests m\n JOIN discussions d ON d.merge_request_id = m.id\n JOIN notes n ON n.discussion_id = d.id\n WHERE n.note_type = 'DiffNote' AND n.is_system = 0\n AND m.author_username IS NOT NULL\n AND n.position_new_path LIKE ?1 ESCAPE '\\'\n AND n.created_at >= ?2\n AND (?3 IS NULL OR n.project_id = ?3)\n GROUP BY m.author_username\n)\nSELECT username,\n SUM(CASE WHEN role='reviewer' THEN mr_cnt ELSE 0 END) AS review_mr_count,\n SUM(CASE WHEN role='reviewer' THEN note_cnt ELSE 0 END) AS review_note_count,\n SUM(CASE WHEN role='author' THEN mr_cnt ELSE 0 END) AS author_mr_count,\n MAX(last_seen_at) AS last_seen_at,\n (SUM(CASE WHEN role='reviewer' THEN mr_cnt ELSE 0 END) * 20 +\n SUM(CASE WHEN role='author' THEN mr_cnt ELSE 0 END) * 12 +\n SUM(CASE WHEN role='reviewer' THEN note_cnt ELSE 0 END) * 1) AS score\nFROM activity\nGROUP BY username\nORDER BY score DESC, last_seen_at DESC, username ASC\nLIMIT ?4\n```\n\n### Two static SQL strings selected via `if pq.is_prefix { sql_prefix } else { sql_exact }` — the only difference is LIKE vs = on position_new_path. Both use prepare_cached().\n\n### Scoring formula: review_mr * 20 + author_mr * 12 + review_notes * 1\n- MR breadth dominates (prevents \"comment storm\" gaming)\n- Integer arithmetic (no f64 display issues)\n\n### LIMIT+1 truncation pattern:\n```rust\nlet limit_plus_one = (limit + 1) as i64;\n// ... query with limit_plus_one ...\nlet truncated = experts.len() > limit;\nlet experts = experts.into_iter().take(limit).collect();\n```\n\n### ExpertResult struct:\n```rust\npub struct ExpertResult {\n pub path_query: String,\n pub path_match: String, // \"exact\" or \"prefix\"\n pub experts: Vec,\n pub truncated: bool,\n}\npub struct Expert {\n pub username: String, pub score: i64,\n pub review_mr_count: u32, pub review_note_count: u32,\n pub author_mr_count: u32, pub last_seen_ms: i64,\n}\n```\n\n## Files\n\n- `src/cli/commands/who.rs`\n\n## TDD Loop\n\nRED:\n```\ntest_expert_query — insert project, MR, discussion, 3 DiffNotes; verify expert ranking\ntest_expert_excludes_self_review_notes — author_a comments on own MR; review_mr_count must be 0\ntest_expert_truncation — 3 experts, limit=2 -> truncated=true, len=2; limit=10 -> false\n```\n\nGREEN: Implement query_expert with both SQL variants\nVERIFY: `cargo test -- expert`\n\n## Acceptance Criteria\n\n- [ ] test_expert_query passes (reviewer_b ranked first by score)\n- [ ] test_expert_excludes_self_review_notes passes (author_a has review_mr_count=0)\n- [ ] test_expert_truncation passes (truncated flag correct at both limits)\n- [ ] Default since window: 6m\n\n## Edge Cases\n\n- Self-review: MR author commenting on own diff must NOT count as reviewer (filter n.author_username != m.author_username with IS NULL guard on m.author_username)\n- MR state: only 'opened' and 'merged' — closed/unmerged MRs are noise\n- Project scoping is on n.project_id (not m.project_id) to maximize index usage\n- Author branch also filters n.is_system = 0 for consistency","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-08T02:40:20.990590Z","created_by":"tayloreernisse","updated_at":"2026-02-08T04:10:29.596337Z","closed_at":"2026-02-08T04:10:29.596299Z","close_reason":"Implemented by agent team: migration 017, CLI skeleton, all 5 query modes, human+robot output, 20 tests. All quality gates pass.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-zqpf","depends_on_id":"bd-2ldg","type":"blocks","created_at":"2026-02-08T02:43:36.714415Z","created_by":"tayloreernisse"},{"issue_id":"bd-zqpf","depends_on_id":"bd-34rr","type":"blocks","created_at":"2026-02-08T02:43:36.905828Z","created_by":"tayloreernisse"}]} diff --git a/.beads/last-touched b/.beads/last-touched index ad6ea2a..9ba1c54 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -bd-1yx +bd-z94 diff --git a/docs/plan-expose-discussion-ids.feedback-1.md b/docs/plan-expose-discussion-ids.feedback-1.md new file mode 100644 index 0000000..28a6a07 --- /dev/null +++ b/docs/plan-expose-discussion-ids.feedback-1.md @@ -0,0 +1,202 @@ +No `## Rejected Recommendations` section appears in the plan you pasted, so the revisions below are all net-new. + +1. **Add an explicit “Bridge Contract” and fix scope inconsistency** +Analysis: The plan says “Three changes” but defines four. More importantly, identifier requirements are scattered. A single contract section prevents drift and makes every new read surface prove it can drive a write call. + +```diff +@@ +-**Scope**: Three changes, delivered in order: ++**Scope**: Four workstreams, delivered in order: + 1. Add `gitlab_discussion_id` to notes output + 2. Add `gitlab_discussion_id` to show command discussion groups + 3. Add a standalone `discussions` list command + 4. Fix robot-docs to list actual field names instead of opaque type references ++ ++## Bridge Contract (Cross-Cutting) ++Every read payload that surfaces notes/discussions MUST include: ++- `project_path` ++- `noteable_type` ++- `parent_iid` ++- `gitlab_discussion_id` ++- `gitlab_note_id` (when note-level data is returned) ++This contract is required so agents can deterministically construct `glab api` write calls. +``` + +2. **Normalize identifier naming now (break ambiguous names)** +Analysis: Current `id`/`gitlab_id` naming is ambiguous in mixed payloads. Rename to explicit `note_id` and `gitlab_note_id` now (you explicitly don’t care about backward compatibility). This reduces automation mistakes. + +```diff +@@ 1b. Add field to `NoteListRow` +-pub struct NoteListRow { +- pub id: i64, +- pub gitlab_id: i64, ++pub struct NoteListRow { ++ pub note_id: i64, // local DB id ++ pub gitlab_note_id: i64, // GitLab note id +@@ +@@ 1c. Add field to `NoteListRowJson` +-pub struct NoteListRowJson { +- pub id: i64, +- pub gitlab_id: i64, ++pub struct NoteListRowJson { ++ pub note_id: i64, ++ pub gitlab_note_id: i64, +@@ +-#### 2f. Add `gitlab_note_id` to note detail structs in show +-While we're here, add `gitlab_id` to `NoteDetail`, `MrNoteDetail`, and their JSON ++#### 2f. Add `gitlab_note_id` to note detail structs in show ++While we're here, add `gitlab_note_id` to `NoteDetail`, `MrNoteDetail`, and their JSON + counterparts. +``` + +3. **Stop positional column indexing for these changes** +Analysis: In `list.rs`, row extraction is positional (`row.get(18)`, etc.). Adding fields is fragile and easy to break silently. Use named aliases and named lookup for robustness. + +```diff +@@ 1a/1b SQL + query_map +- p.path_with_namespace AS project_path ++ p.path_with_namespace AS project_path, ++ d.gitlab_discussion_id AS gitlab_discussion_id +@@ +- project_path: row.get(18)?, +- gitlab_discussion_id: row.get(19)?, ++ project_path: row.get("project_path")?, ++ gitlab_discussion_id: row.get("gitlab_discussion_id")?, +``` + +4. **Redesign `discussions` query to avoid correlated subquery fanout** +Analysis: Proposed query uses many correlated subqueries per row. That’s acceptable for tiny MR-scoped sets, but degrades for project-wide scans. Use a base CTE + one rollup pass over notes. + +```diff +@@ 3c. SQL Query +-SELECT +- d.id, +- ... +- (SELECT COUNT(*) FROM notes n2 WHERE n2.discussion_id = d.id AND n2.is_system = 0) AS note_count, +- (SELECT n3.author_username FROM notes n3 WHERE n3.discussion_id = d.id ORDER BY n3.position LIMIT 1) AS first_author, +- ... +-FROM discussions d ++WITH base AS ( ++ SELECT d.id, d.gitlab_discussion_id, d.noteable_type, d.project_id, d.issue_id, d.merge_request_id, ++ d.individual_note, d.first_note_at, d.last_note_at, d.resolvable, d.resolved ++ FROM discussions d ++ {where_sql} ++), ++note_rollup AS ( ++ SELECT n.discussion_id, ++ COUNT(*) FILTER (WHERE n.is_system = 0) AS user_note_count, ++ COUNT(*) AS total_note_count, ++ MIN(CASE WHEN n.is_system = 0 THEN n.position END) AS first_user_pos ++ FROM notes n ++ JOIN base b ON b.id = n.discussion_id ++ GROUP BY n.discussion_id ++) ++SELECT ... ++FROM base b ++LEFT JOIN note_rollup r ON r.discussion_id = b.id +``` + +5. **Add explicit index work for new access patterns** +Analysis: Existing indexes are good but not ideal for new list patterns (`project + last_note`, note position ordering inside discussion). Add migration entries to keep latency stable. + +```diff +@@ ## 3. Add Standalone `discussions` List Command ++#### 3h. Add migration for discussion-list performance ++**File**: `migrations/027_discussions_list_indexes.sql` ++```sql ++CREATE INDEX IF NOT EXISTS idx_discussions_project_last_note ++ ON discussions(project_id, last_note_at DESC, id DESC); ++CREATE INDEX IF NOT EXISTS idx_discussions_project_first_note ++ ON discussions(project_id, first_note_at DESC, id DESC); ++CREATE INDEX IF NOT EXISTS idx_notes_discussion_position ++ ON notes(discussion_id, position); ++``` +``` + +6. **Add keyset pagination (critical for agent workflows)** +Analysis: `--limit` alone is not enough for automation over large datasets. Add cursor-based pagination with deterministic sort keys and `next_cursor` in JSON. + +```diff +@@ 3a. CLI Args ++ /// Keyset cursor from previous response ++ #[arg(long, help_heading = "Output")] ++ pub cursor: Option, +@@ +@@ Response Schema +- "total_count": 15, +- "showing": 15 ++ "total_count": 15, ++ "showing": 15, ++ "next_cursor": "eyJsYXN0X25vdGVfYXQiOjE3MDAwMDAwMDAwMDAsImlkIjoxMjN9" +@@ +@@ Validation Criteria ++7. `lore -J discussions ... --cursor ` returns the next stable page without duplicates/skips +``` + +7. **Fix semantic ambiguities in discussion summary fields** +Analysis: `note_count` is ambiguous, and `first_author` can accidentally be a system note author. Make fields explicit and consistent with non-system default behavior. + +```diff +@@ Response Schema +- "note_count": 3, +- "first_author": "elovegrove", ++ "user_note_count": 3, ++ "total_note_count": 4, ++ "first_user_author": "elovegrove", +@@ +@@ 3d. Filters struct / path behavior +-- `path` → `EXISTS (SELECT 1 FROM notes n WHERE n.discussion_id = d.id AND n.position_new_path LIKE ?)` ++- `path` → match on BOTH `position_new_path` and `position_old_path` (exact/prefix) +``` + +8. **Enrich show outputs with actionable thread metadata** +Analysis: Adding only discussion id helps, but agents still need thread state and note ids to pick targets correctly. Add `resolvable`, `resolved`, `last_note_at_iso`, and `gitlab_note_id` in show discussion payloads. + +```diff +@@ 2a/2b show discussion structs + pub struct DiscussionDetailJson { + pub gitlab_discussion_id: String, ++ pub resolvable: bool, ++ pub resolved: bool, ++ pub last_note_at_iso: String, + pub notes: Vec, +@@ + pub struct NoteDetailJson { ++ pub gitlab_note_id: i64, + pub author_username: String, +``` + +9. **Harden robot-docs against schema drift with tests** +Analysis: Static JSON in `main.rs` will drift again. Add a lightweight contract test that asserts docs include required fields for `notes`, `discussions`, and show payloads. + +```diff +@@ 4. Fix Robot-Docs Response Schemas ++#### 4f. Add robot-docs contract tests ++**File**: `src/main.rs` (or dedicated test module) ++- Assert `robot-docs` contains `gitlab_discussion_id` and `gitlab_note_id` in: ++ - `notes.response_schema` ++ - `issues.response_schema.show` ++ - `mrs.response_schema.show` ++ - `discussions.response_schema` +``` + +10. **Adjust delivery order to reduce rework and include missing CSV path** +Analysis: In your sample `handle_discussions`, `csv` is declared in args but not handled. Also, robot-docs should land after all payload changes. Sequence should minimize churn. + +```diff +@@ Delivery Order +-3. **Change 4** (robot-docs) — depends on 1 and 2 being done so schemas are accurate. +-4. **Change 3** (discussions command) — largest change, depends on 1 for design consistency. ++3. **Change 3** (discussions command + indexes + pagination) — largest change. ++4. **Change 4** (robot-docs + contract tests) — last, after payloads are final. +@@ 3e. Handler wiring +- match format { ++ match format { + "json" => ... + "jsonl" => ... ++ "csv" => print_list_discussions_csv(&result), + _ => ... + } +``` + +If you want, I can produce a single consolidated revised plan markdown with these edits applied so you can drop it in directly. \ No newline at end of file diff --git a/docs/plan-expose-discussion-ids.feedback-2.md b/docs/plan-expose-discussion-ids.feedback-2.md new file mode 100644 index 0000000..a6d2524 --- /dev/null +++ b/docs/plan-expose-discussion-ids.feedback-2.md @@ -0,0 +1,162 @@ +Best non-rejected upgrades I’d make to this plan are below. They focus on reducing schema drift, making robot output safer to consume, and improving performance behavior at scale. + +1. Add a shared contract model and field constants first (before workstreams 1-4) +Rationale: Right now each command has its own structs and ad-hoc mapping. That is exactly how drift happens. A single contract definition reused by `notes`, `show`, `discussions`, and robot-docs gives compile-time coupling between output payloads and docs. It also makes future fields cheaper and safer to add. + +```diff +@@ Scope: Four workstreams, delivered in order: +-1. Add `gitlab_discussion_id` to notes output +-2. Add `gitlab_discussion_id` to show command discussion groups +-3. Add a standalone `discussions` list command +-4. Fix robot-docs to list actual field names instead of opaque type references ++0. Introduce shared Bridge Contract model/constants used by notes/show/discussions/robot-docs ++1. Add `gitlab_discussion_id` to notes output ++2. Add `gitlab_discussion_id` to show command discussion groups ++3. Add a standalone `discussions` list command ++4. Fix robot-docs to list actual field names instead of opaque type references + ++## 0. Shared Contract Model (Cross-Cutting) ++Define canonical required-field constants and shared mapping helpers, then consume them in: ++- `src/cli/commands/list.rs` ++- `src/cli/commands/show.rs` ++- `src/cli/robot.rs` ++- `src/main.rs` robot-docs builder ++This removes duplicated field-name strings and prevents docs/output mismatch. +``` + +2. Make bridge fields “non-droppable” in robot mode +Rationale: The current plan adds fields, but `--fields` can still remove them. That breaks the core read/write bridge contract in exactly the workflows this change is trying to fix. In robot mode, contract fields should always be force-included. + +```diff +@@ ## Bridge Contract (Cross-Cutting) + Every read payload that surfaces notes or discussions **MUST** include: + - `project_path` + - `noteable_type` + - `parent_iid` + - `gitlab_discussion_id` + - `gitlab_note_id` (when note-level data is returned — i.e., in notes list and show detail) + ++### Field Filtering Guardrail ++In robot mode, `filter_fields` must force-include Bridge Contract fields even when users pass a narrower `--fields` list. ++Human/table mode keeps existing behavior. +``` + +3. Replace correlated subqueries in `discussions` rollup with a single-pass window/aggregate pattern +Rationale: Your CTE is better than naive fanout, but it still uses multiple correlated sub-selects per discussion for first author/body/path. At 200K+ discussions this can regress badly depending on cache/index state. A window-ranked `notes` CTE with grouped aggregates is usually faster and more predictable in SQLite. + +```diff +@@ #### 3c. SQL Query +-Core query uses a CTE + rollup to avoid correlated subquery fanout on larger result sets: ++Core query uses a CTE + ranked-notes rollup (window function) to avoid per-row correlated subqueries: + +-WITH filtered_discussions AS (...), +-note_rollup AS ( +- SELECT +- n.discussion_id, +- SUM(...) AS note_count, +- (SELECT ... LIMIT 1) AS first_author, +- (SELECT ... LIMIT 1) AS first_note_body, +- (SELECT ... LIMIT 1) AS position_new_path, +- (SELECT ... LIMIT 1) AS position_new_line +- FROM notes n +- ... +-) ++WITH filtered_discussions AS (...), ++ranked_notes AS ( ++ SELECT ++ n.*, ++ ROW_NUMBER() OVER (PARTITION BY n.discussion_id ORDER BY n.position, n.id) AS rn ++ FROM notes n ++ WHERE n.discussion_id IN (SELECT id FROM filtered_discussions) ++), ++note_rollup AS ( ++ SELECT ++ discussion_id, ++ SUM(CASE WHEN is_system = 0 THEN 1 ELSE 0 END) AS note_count, ++ MAX(CASE WHEN rn = 1 AND is_system = 0 THEN author_username END) AS first_author, ++ MAX(CASE WHEN rn = 1 AND is_system = 0 THEN body END) AS first_note_body, ++ MAX(CASE WHEN position_new_path IS NOT NULL THEN position_new_path END) AS position_new_path, ++ MAX(CASE WHEN position_new_line IS NOT NULL THEN position_new_line END) AS position_new_line ++ FROM ranked_notes ++ GROUP BY discussion_id ++) +``` + +4. Add direct GitLab ID filters for deterministic bridging +Rationale: Bridge workflows often start from one known ID. You already have `gitlab_note_id` in notes filters, but discussion filtering still looks internal-ID-centric. Add explicit GitLab-ID filters so agents do not need extra translation calls. + +```diff +@@ #### 3a. CLI Args + pub struct DiscussionsArgs { ++ /// Filter by GitLab discussion ID ++ #[arg(long, help_heading = "Filters")] ++ pub gitlab_discussion_id: Option, +@@ + +@@ #### 3d. Filters struct + pub struct DiscussionListFilters { ++ pub gitlab_discussion_id: Option, +@@ + } +``` + +```diff +@@ ## 1. Add `gitlab_discussion_id` to Notes Output ++#### 1g. Add `--gitlab-discussion-id` filter to notes ++Allow filtering notes directly by GitLab thread ID (not only internal discussion ID). ++This enables one-hop note retrieval from external references. +``` + +5. Add optional note expansion to `discussions` for fewer round-trips +Rationale: Today the agent flow is often `discussions -> show`. Optional embedded notes (`--include-notes N`) gives a fast path for “list unresolved threads with latest context” without forcing full show payloads. + +```diff +@@ ### Design + lore -J discussions --for-mr 99 --resolution unresolved ++lore -J discussions --for-mr 99 --resolution unresolved --include-notes 2 + +@@ #### 3a. CLI Args ++ /// Include up to N latest notes per discussion (0 = none) ++ #[arg(long, default_value = "0", help_heading = "Output")] ++ pub include_notes: usize, +``` + +6. Upgrade robot-docs from string blobs to structured schema + explicit contract block +Rationale: `contains("gitlab_discussion_id")` tests on schema strings are brittle. A structured schema object gives machine-checked docs and reliable test assertions. Add a contract section for agent consumers. + +```diff +@@ ## 4. Fix Robot-Docs Response Schemas +-#### 4a. Notes response_schema +-Replace stringly-typed schema snippets... ++#### 4a. Notes response_schema (structured) ++Represent response fields as JSON objects (field -> type/nullable), not freeform strings. + ++#### 4g. Add `bridge_contract` section in robot-docs ++Publish canonical required fields per entity: ++- notes ++- discussions ++- show.discussions ++- show.notes +``` + +7. Strengthen validation: add CLI-level contract tests and perf guardrails +Rationale: Most current tests are unit-level struct/query checks. Add end-to-end JSON contract tests via command handlers, plus a benchmark-style regression test (ignored by default) so performance work stays intentional. + +```diff +@@ ## Validation Criteria + 8. Bridge Contract fields (...) are present in every applicable read payload ++9. Contract fields remain present even with `--fields` in robot mode ++10. `discussions` query meets performance guardrail on representative fixture (documented threshold) + +@@ ### Tests ++#### Test: robot-mode fields cannot drop bridge contract keys ++Run notes/discussions JSON output through `filter_fields` path and assert required keys remain. ++ ++#### Test: CLI contract integration ++Invoke command handlers for `notes`, `discussions`, `mrs `, parse JSON, assert required keys and types. ++ ++#### Test (ignored): large-fixture performance regression ++Generate representative fixture and assert `query_discussions` stays under target elapsed time. +``` + +If you want, I can now produce a full “v2 plan” document that applies these diffs end-to-end (including revised delivery order and complete updated sections). \ No newline at end of file diff --git a/docs/plan-expose-discussion-ids.feedback-4.md.bak b/docs/plan-expose-discussion-ids.feedback-4.md.bak new file mode 100644 index 0000000..6d4c179 --- /dev/null +++ b/docs/plan-expose-discussion-ids.feedback-4.md.bak @@ -0,0 +1,160 @@ +I reviewed the plan end-to-end and focused only on new improvements (none of the items in `## Rejected Recommendations` are re-proposed). + +1. Add direct `--discussion-id` retrieval paths +Rationale: This removes a full discovery hop for the exact workflow that failed (replying to a known thread). It also reduces ambiguity and query cost when an agent already has the thread ID. + +```diff +@@ Core Changes + | 7 | Fix robot-docs to list actual field names | Docs | Small | ++| 8 | Add direct `--discussion-id` filter to notes/discussions/show | Core | Small | + +@@ Change 3: Add Standalone `discussions` List Command + lore -J discussions --for-mr 99 --cursor # keyset pagination ++lore -J discussions --discussion-id 6a9c1750b37d... # direct lookup + +@@ 3a. CLI Args ++ #[arg(long, conflicts_with_all = ["for_issue", "for_mr"], help_heading = "Filters")] ++ pub discussion_id: Option, + +@@ Change 1: Add `gitlab_discussion_id` to Notes Output ++Add `--discussion-id ` filter to `notes` for direct note retrieval within one thread. +``` + +2. Add a shared filter compiler to eliminate count/query drift +Rationale: The plan currently repeats filters across data query, `total_count`, and `incomplete_rows` count queries. That is a classic reliability bug source. A single compiled filter object makes count semantics provably consistent. + +```diff +@@ Count Semantics (Cross-Cutting Convention) ++## Filter Compiler (NEW, Cross-Cutting Convention) ++All list commands must build predicates via a shared `CompiledFilters` object that emits: ++- SQL predicate fragment ++- bind parameters ++- canonical filter string (for cursor hash) ++The same compiled object is reused by: ++- page data query ++- `total_count` query ++- `incomplete_rows` query +``` + +3. Harden keyset pagination semantics for `DESC`, limits, and client ergonomics +Rationale: `(sort_value, id) > (?, ?)` is only correct for ascending order. Descending sort needs `<`. Also add explicit `has_more` so clients don’t infer from cursor nullability. + +```diff +@@ Keyset Pagination (Cross-Cutting, Change B) +-```sql +-WHERE (sort_value, id) > (?, ?) +-``` ++Use comparator by order: ++- ASC: `(sort_value, id) > (?, ?)` ++- DESC: `(sort_value, id) < (?, ?)` + +@@ 3a. CLI Args ++ #[arg(short = 'n', long = "limit", default_value = "50", value_parser = clap::value_parser!(usize).range(1..=500), help_heading = "Output")] ++ pub limit: usize, + +@@ Response Schema +- "next_cursor": "aW...xyz==" ++ "next_cursor": "aW...xyz==", ++ "has_more": true +``` + +4. Add DB-level entity integrity invariants (not just response invariants) +Rationale: Response-side filtering is good, but DB correctness should also be guarded. This prevents silent corruption and bad joins from ingestion or future migrations. + +```diff +@@ Contract Invariants (NEW) ++### Entity Integrity Invariants (DB + Ingest) ++1. `discussions` must belong to exactly one parent (`issue_id XOR merge_request_id`). ++2. `discussions.noteable_type` must match the populated parent column. ++3. Natural-key uniqueness is enforced where valid: ++ - `(project_id, gitlab_discussion_id)` unique for discussions. ++4. Ingestion must reject/quarantine rows violating invariants and report counts. + +@@ Supporting Indexes (Cross-Cutting, Change D) ++CREATE UNIQUE INDEX IF NOT EXISTS idx_discussions_project_gitlab_discussion_id ++ ON discussions(project_id, gitlab_discussion_id); +``` + +5. Switch bulk note loading to streaming grouping (avoid large intermediate vecs) +Rationale: Current bulk strategy still materializes all notes before grouping. Streaming into the map cuts peak memory and improves large-MR stability. + +```diff +@@ Change 2e. Constructor — use bulk notes map +-let all_note_rows: Vec = ... // From bulk query above +-let notes_by_discussion: HashMap> = +- all_note_rows.into_iter().fold(HashMap::new(), |mut map, note| { +- map.entry(note.discussion_id).or_insert_with(Vec::new).push(note); +- map +- }); ++let mut notes_by_discussion: HashMap> = HashMap::new(); ++for row in bulk_note_stmt.query_map(params, map_note_row)? { ++ let note = row?; ++ notes_by_discussion.entry(note.discussion_id).or_default().push(note); ++} +``` + +6. Make freshness tri-state (`fresh|stale|unknown`) and fail closed on unknown with `--require-fresh` +Rationale: `stale: bool` alone cannot represent “never synced / unknown project freshness.” For write safety, unknown freshness should be explicit and reject under freshness constraints. + +```diff +@@ Freshness Metadata & Staleness Guards + pub struct ResponseMeta { + pub elapsed_ms: i64, + pub data_as_of_iso: String, + pub sync_lag_seconds: i64, + pub stale: bool, ++ pub freshness_state: String, // "fresh" | "stale" | "unknown" ++ #[serde(skip_serializing_if = "Option::is_none")] ++ pub freshness_reason: Option, + pub incomplete_rows: i64, +@@ +-if sync_lag_seconds > max_age_secs { ++if freshness_state == "unknown" || sync_lag_seconds > max_age_secs { +``` + +7. Tune indexes to match actual ORDER BY paths in window queries +Rationale: `idx_notes_discussion_position` is likely insufficient for the two window orderings. A covering-style index aligned with partition/order keys reduces random table lookups. + +```diff +@@ Supporting Indexes (Cross-Cutting, Change D) +--- Notes: window function ORDER BY (discussion_id, position) for ROW_NUMBER() +-CREATE INDEX IF NOT EXISTS idx_notes_discussion_position +- ON notes(discussion_id, position); ++-- Notes: support dual ROW_NUMBER() orderings and reduce table lookups ++CREATE INDEX IF NOT EXISTS idx_notes_discussion_window ++ ON notes(discussion_id, is_system, position, created_at, gitlab_id); +``` + +8. Add a phased rollout gate before strict exclusion becomes default +Rationale: Enforcing `gitlab_* IS NOT NULL` immediately can hide data if existing rows are incomplete. A short observation gate prevents sudden regressions while preserving the end-state contract. + +```diff +@@ Delivery Order ++Batch 0: Observability gate (NEW) ++- Ship `incomplete_rows` and freshness meta first ++- Measure incomplete rate across real datasets ++- If incomplete ratio <= threshold, enable strict exclusion defaults ++- If above threshold, block rollout and fix ingestion quality first ++ + Change 1 (notes output) ──┐ +``` + +9. Add property-based invariants for pagination/count correctness +Rationale: Your current tests are scenario-based and good, but randomized property tests are much better at catching edge-case cursor/count bugs. + +```diff +@@ Tests (Change 3 / Change B) ++**Test 12**: Property-based pagination invariants (`proptest`) ++```rust ++#[test] ++fn prop_discussion_cursor_no_overlap_no_gap_under_random_data() { /* ... */ } ++``` ++ ++**Test 13**: Property-based count invariants ++```rust ++#[test] ++fn prop_total_count_and_incomplete_rows_match_filter_partition() { /* ... */ } ++``` +``` + +If you want, I can now produce a fully consolidated “Plan v4” that applies these diffs cleanly into your original document so it reads as a single coherent spec. \ No newline at end of file diff --git a/docs/plan-expose-discussion-ids.feedback-5.md.bak b/docs/plan-expose-discussion-ids.feedback-5.md.bak new file mode 100644 index 0000000..40b9b3a --- /dev/null +++ b/docs/plan-expose-discussion-ids.feedback-5.md.bak @@ -0,0 +1,158 @@ +I reviewed the whole plan and only proposed changes that are not in your `## Rejected Recommendations`. + +1. **Fix plan-internal inconsistencies first** +Analysis: The plan currently has a few self-contradictions (`8` vs `9` cross-cutting improvements, `stale` still referenced after moving to tri-state freshness). Cleaning this prevents implementation drift and bad AC validation. + +```diff +--- a/plan.md ++++ b/plan.md +@@ +-**Scope**: 8 core changes + 8 cross-cutting architectural improvements across 3 tiers: ++**Scope**: 8 core changes + 9 cross-cutting architectural improvements across 3 tiers: +@@ AC-7: Freshness Metadata Present & Staleness Guards Work +-lore -J notes -n 1 | jq '.meta | {data_as_of_iso, sync_lag_seconds, stale}' +-# All fields present, stale=false if recently synced ++lore -J notes -n 1 | jq '.meta | {data_as_of_iso, sync_lag_seconds, freshness_state}' ++# All fields present, freshness_state is one of fresh|stale|unknown +@@ Change 6 Response Schema example +- "stale": false, ++ "freshness_state": "fresh", +``` + +2. **Require snapshot-consistent list responses (page + counts)** +Analysis: `total_count`, `incomplete_rows`, and page rows can drift if sync writes between queries. Enforcing a single read snapshot for all list commands makes pagination and counts deterministic. + +```diff +--- a/plan.md ++++ b/plan.md +@@ Count Semantics (Cross-Cutting Convention) + All list commands use consistent count fields: ++All three queries (`page`, `total_count`, `incomplete_rows`) MUST execute inside one read transaction/snapshot. ++This guarantees count/page consistency under concurrent sync writes. +``` + +3. **Use RAII transactions instead of manual `BEGIN/COMMIT`** +Analysis: Manual `execute_batch("BEGIN...")` is fragile on early returns. `rusqlite::Transaction` guarantees rollback on error and removes transaction-leak risk. + +```diff +--- a/plan.md ++++ b/plan.md +@@ Change 2: Consistency guarantee +-conn.execute_batch("BEGIN DEFERRED")?; +-// ... discussion query ... +-// ... bulk note query ... +-conn.execute_batch("COMMIT")?; ++let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Deferred)?; ++// ... discussion query ... ++// ... bulk note query ... ++tx.commit()?; +``` + +4. **Allow small focused new modules for query infrastructure** +Analysis: Keeping everything in `list.rs`/`show.rs` will become a maintenance hotspot as filters/cursors/freshness expand. A small module split reduces coupling and regression risk. + +```diff +--- a/plan.md ++++ b/plan.md +@@ Change 3: File Architecture +-**No new files.** Follow existing patterns: ++Allow focused infra modules for shared logic: ++- `src/cli/query/filters.rs` (CompiledFilters + builders) ++- `src/cli/query/cursor.rs` (encode/decode/validate v2 cursors) ++- `src/cli/query/freshness.rs` (freshness computation + guards) ++Command handlers remain in existing files. +``` + +5. **Add ingest-time `discussion_rollups` to avoid repeated heavy window scans** +Analysis: Window functions are good, but doing them on every read over large note volumes is still expensive. Precomputing rollups during ingest gives lower and more predictable p95 latency while keeping read paths simpler. + +```diff +--- a/plan.md ++++ b/plan.md +@@ Architectural Improvements (Cross-Cutting) ++| J | Ingest-time discussion rollups (`discussion_rollups`) | Performance | Medium | +@@ Change 3 SQL strategy +-Use `ROW_NUMBER()` window function instead of correlated subqueries... ++Primary path: join precomputed `discussion_rollups` for `note_count`, `first_author`, ++`first_note_body`, `position_new_path`, `position_new_line`. ++Fallback path: window-function recompute if rollup row is missing (defensive correctness). +``` + +6. **Add deterministic numeric project selector `--project-id`** +Analysis: `-p group/repo` is human-friendly, but numeric project IDs are safer for robots and avoid fuzzy/project-path ambiguity. This reduces false ambiguity failures and lookup overhead. + +```diff +--- a/plan.md ++++ b/plan.md +@@ DiscussionsArgs + #[arg(short = 'p', long, help_heading = "Filters")] + pub project: Option, ++ #[arg(long, conflicts_with = "project", help_heading = "Filters")] ++ pub project_id: Option, +@@ Ambiguity handling ++If `--project-id` is provided, IID resolution is scoped directly to that project. ++`--project-id` takes precedence over path-based project matching. +``` + +7. **Make path filtering rename-aware (`old` + `new`)** +Analysis: Current `--path` strategy only using `position_new_path` misses deleted/renamed-file discussions. Supporting side selection makes the feature materially more useful for review workflows. + +```diff +--- a/plan.md ++++ b/plan.md +@@ DiscussionsArgs + #[arg(long, help_heading = "Filters")] + pub path: Option, ++ #[arg(long, value_parser = ["either", "new", "old"], default_value = "either", help_heading = "Filters")] ++ pub path_side: String, +@@ Change 3 filtering +-Path filter matches `position_new_path`. ++Path filter semantics: ++- `either` (default): match `position_new_path` OR `position_old_path` ++- `new`: match only `position_new_path` ++- `old`: match only `position_old_path` +``` + +8. **Add explicit freshness behavior for empty-result queries + bootstrap backfill** +Analysis: Freshness based only on “participating rows” is undefined when results are empty. Define deterministic behavior and backfill `project_sync_state` on migration so `unknown` doesn’t spike unexpectedly after deploy. + +```diff +--- a/plan.md ++++ b/plan.md +@@ Freshness state logic ++Empty-result rules: ++- If query is project-scoped (`-p` or `--project-id`), freshness is computed from that project even when no rows match. ++- If query is unscoped and returns zero rows, freshness is computed from all tracked projects. +@@ A1. Track per-project sync timestamp ++Migration step: seed `project_sync_state` from latest known sync metadata where available ++to avoid mass `unknown` freshness immediately after rollout. +``` + +9. **Upgrade `--discussion-id` from filter-only to first-class thread retrieval** +Analysis: Filtering list output by discussion ID still returns list-shaped data and partial note context. A direct thread retrieval mode is faster for agent workflows and avoids extra commands. + +```diff +--- a/plan.md ++++ b/plan.md +@@ Core Changes +-| 8 | Add direct `--discussion-id` filter to notes/discussions/show | Core | Small | ++| 8 | Add direct `--discussion-id` filter + single-thread retrieval mode | Core | Medium | +@@ Change 8 ++lore -J discussions --discussion-id --full-thread ++# Returns one discussion with full notes payload (same note schema as show command). +``` + +10. **Replace ad-hoc AC performance timing with repeatable perf harness** +Analysis: `time lore ...` is noisy and machine-dependent. A reproducible seeded benchmark test gives stable guardrails and catches regressions earlier. + +```diff +--- a/plan.md ++++ b/plan.md +@@ AC-10: Performance Budget +-time lore -J discussions --for-mr -n 100 +-# real 0m0.100s (p95 < 150ms) ++cargo test --test perf_discussions -- --ignored --nocapture ++# Uses seeded fixture DB and N repeated runs; asserts p95 < 150ms for target query shape. +``` + +If you want, I can also produce a fully merged “iteration 5” rewritten plan document with these edits applied end-to-end so it’s directly executable by an implementation agent. \ No newline at end of file diff --git a/docs/plan-expose-discussion-ids.feedback-6.md.bak b/docs/plan-expose-discussion-ids.feedback-6.md.bak new file mode 100644 index 0000000..1f71139 --- /dev/null +++ b/docs/plan-expose-discussion-ids.feedback-6.md.bak @@ -0,0 +1,143 @@ +Strong plan overall. The biggest gaps I’d fix are around sync-health correctness, idempotency/integrity under repeated ingests, deleted-entity lifecycle, and reducing schema drift risk without heavy reflection machinery. + +I avoided everything in your `## Rejected Recommendations` section. + +**1. Add Sync Health Semantics (not just age)** +Time freshness alone can mislead after partial/failed syncs. Agents need to know whether data is both recent and complete. + +```diff +@@ ## Freshness Metadata & Staleness Guards (Cross-Cutting, Change A/F/G) +- pub freshness_state: String, // "fresh" | "stale" | "unknown" ++ pub freshness_state: String, // "fresh" | "stale" | "unknown" ++ pub sync_status: String, // "ok" | "partial" | "failed" | "never" ++ pub last_successful_sync_run_id: Option, ++ pub last_attempted_sync_run_id: Option, +@@ +-#[arg(long, help_heading = "Freshness")] +-pub require_fresh: Option, ++#[arg(long, help_heading = "Freshness")] ++pub require_fresh: Option, ++#[arg(long, help_heading = "Freshness")] ++pub require_sync_ok: bool, +``` + +Rationale: this prevents false confidence when one project is fresh-by-time but latest sync actually failed or was partial. + +--- + +**2. Add `--require-complete` Guard for Missing Required IDs** +You already expose `meta.incomplete_rows`; add a hard gate for automation. + +```diff +@@ ## Count Semantics (Cross-Cutting Convention) + `incomplete_rows` is computed via a dedicated COUNT query... ++Add CLI guard: ++`--require-complete` fails with exit code 19 when `meta.incomplete_rows > 0`. ++Suggested action: `lore sync --full`. +``` + +Rationale: agents can fail fast instead of silently acting on partial datasets. + +--- + +**3. Strengthen Ingestion Idempotency + Referential Integrity for Notes** +You added natural-key uniqueness for discussions; do the same for notes and enforce parent integrity at DB level. + +```diff +@@ ## Supporting Indexes (Cross-Cutting, Change D) + CREATE UNIQUE INDEX IF NOT EXISTS idx_discussions_project_gitlab_discussion_id + ON discussions(project_id, gitlab_discussion_id); ++CREATE UNIQUE INDEX IF NOT EXISTS idx_notes_project_gitlab_id ++ ON notes(project_id, gitlab_id); ++ ++-- Referential integrity ++-- notes.discussion_id REFERENCES discussions(id) ++-- notes.project_id REFERENCES projects(id) +``` + +Rationale: repeated syncs and retries won’t duplicate notes, and orphaned rows can’t accumulate. + +--- + +**4. Add Deleted/Tombstoned Entity Lifecycle** +Current plan excludes null IDs but doesn’t define behavior when GitLab entities are deleted after sync. + +```diff +@@ ## Contract Invariants (NEW) ++### Deletion Lifecycle Invariant ++1. Notes/discussions deleted upstream are tombstoned locally (`deleted_at`), not hard-deleted. ++2. All list/show commands exclude tombstoned rows by default. ++3. Optional flag `--include-deleted` exposes tombstoned rows for audit/debug. +``` + +Rationale: preserves auditability, prevents ghost actions on deleted objects, and avoids destructive resync behavior. + +--- + +**5. Expand Discussions Payload for Rename Accuracy + Better Triage** +`--path-side old` is great, but output currently only returns `position_new_*`. + +```diff +@@ ## Change 3: Add Standalone `discussions` List Command + pub position_new_path: Option, + pub position_new_line: Option, ++ pub position_old_path: Option, ++ pub position_old_line: Option, ++ pub last_author: Option, ++ pub participant_usernames: Vec, +``` + +Rationale: for renamed/deleted files, agents need old and new coordinates to act confidently; participants/last_author improve thread routing and prioritization. + +--- + +**6. Add SQLite Busy Handling + Retry Policy** +Read transactions + concurrent sync writes can still produce `SQLITE_BUSY` under load. + +```diff +@@ ## Count Semantics (Cross-Cutting Convention) + **Snapshot consistency**: All three queries ... inside a single read transaction ... ++**Busy handling**: set `PRAGMA busy_timeout` (e.g. 5000ms) and retry transient ++`SQLITE_BUSY` errors up to 3 times with jittered backoff for read commands. +``` + +Rationale: improves reliability in real multi-agent usage without changing semantics. + +--- + +**7. Make Field Definitions Single-Source (Lightweight Drift Prevention)** +You rejected full schema generation from code; a lower-cost middle ground is shared field manifests used by both docs and `--fields` validation. + +```diff +@@ ## Change 7: Fix Robot-Docs Response Schemas ++#### 7h. Single-source field manifests (no reflection) ++Define per-command field constants (e.g. `NOTES_FIELDS`, `DISCUSSIONS_FIELDS`) ++used by: ++1) `--fields` validation/filtering ++2) `--fields minimal` expansion ++3) `robot-docs` schema rendering +``` + +Rationale: cuts drift risk materially while staying much simpler than reflection/snapshot infra. + +--- + +**8. De-duplicate and Upgrade Test Strategy Around Concurrency** +There are duplicated tests across Change 2 and Change 3; add explicit race tests where sync writes happen between list subqueries to prove tx consistency. + +```diff +@@ ## Tests +-**Test 6**: `--project-id` scopes IID resolution directly +-**Test 7**: `--path-side old` matches renamed file discussions +-**Test 8**: `--path-side either` matches both old and new paths ++Move shared discussion-filter tests to a single section under Change 3. ++Add concurrency tests: ++1) count/page/incomplete consistency under concurrent sync writes ++2) show discussion+notes snapshot consistency under concurrent writes +``` + +Rationale: less maintenance noise, better coverage of your highest-risk correctness path. + +--- + +If you want, I can also produce a single consolidated patch block that rewrites your plan text end-to-end with these edits applied in-place. \ No newline at end of file diff --git a/docs/plan-expose-discussion-ids.md b/docs/plan-expose-discussion-ids.md index b59439f..7bb34d7 100644 --- a/docs/plan-expose-discussion-ids.md +++ b/docs/plan-expose-discussion-ids.md @@ -1,3 +1,15 @@ +--- +plan: true +title: "" +status: iterating +iteration: 2 +target_iterations: 8 +beads_revision: 0 +related_plans: [] +created: 2026-02-17 +updated: 2026-02-17 +--- + # Plan: Expose Discussion IDs Across the Read Surface **Problem**: Agents can't bridge from lore's read output to glab's write API because @@ -5,7 +17,7 @@ split contract requires lore to emit every identifier an agent needs to construct a glab write command. -**Scope**: Three changes, delivered in order: +**Scope**: Four workstreams, delivered in order: 1. Add `gitlab_discussion_id` to notes output 2. Add `gitlab_discussion_id` to show command discussion groups 3. Add a standalone `discussions` list command @@ -13,6 +25,47 @@ command. --- +## Bridge Contract (Cross-Cutting) + +Every read payload that surfaces notes or discussions **MUST** include: +- `project_path` +- `noteable_type` +- `parent_iid` +- `gitlab_discussion_id` +- `gitlab_note_id` (when note-level data is returned — i.e., in notes list and show detail) + +This contract exists so agents can deterministically construct `glab api` write calls without +cross-referencing multiple commands. Each workstream below must satisfy these fields in its +output. + +### Field Filtering Guardrail + +In robot mode, `filter_fields` **MUST** force-include Bridge Contract fields even when the +caller passes a narrower `--fields` list. This prevents agents from accidentally stripping +the identifiers they need for write operations. + +**Implementation**: Add a `BRIDGE_FIELDS` constant map per entity type. In `filter_fields()`, +when operating in robot mode, union the caller's requested fields with the bridge set before +filtering. Human/table mode keeps existing behavior (no forced fields). + +```rust +// src/cli/robot.rs +const BRIDGE_FIELDS_NOTES: &[&str] = &[ + "project_path", "noteable_type", "parent_iid", + "gitlab_discussion_id", "gitlab_note_id", +]; +const BRIDGE_FIELDS_DISCUSSIONS: &[&str] = &[ + "project_path", "noteable_type", "parent_iid", + "gitlab_discussion_id", +]; +``` + +In `filter_fields`, when entity is `"notes"` or `"discussions"`, merge the bridge set into the +requested fields before filtering the JSON value. This is a ~5-line change to the existing +function. + +--- + ## 1. Add `gitlab_discussion_id` to Notes Output ### Why @@ -92,9 +145,7 @@ Add `d.gitlab_discussion_id` to the SELECT list. Insert it after d.gitlab_discussion_id ``` -Column index shifts: the new field is at index 19 (0-based). - -#### 1b. Add field to `NoteListRow` +#### 1b. Add field to `NoteListRow` and switch to named column lookup **File**: `src/cli/commands/list.rs` line ~1060 @@ -106,16 +157,24 @@ pub struct NoteListRow { } ``` -And in the `query_map` closure (line ~1407): +And in the `query_map` closure (line ~1407), switch from positional indexing to named column +lookup for the new field and ideally all fields. At minimum, the new field uses named lookup +to avoid fragile positional shifts: ```rust Ok(NoteListRow { - // ... existing fields ... - project_path: row.get(18)?, - gitlab_discussion_id: row.get(19)?, // ADD + // ... existing fields using positional gets ... + project_path: row.get("project_path")?, + gitlab_discussion_id: row.get("gitlab_discussion_id")?, // ADD — named lookup }) ``` +**Implementation note**: If converting all existing fields from positional to named lookup is +low-risk, do it in this change. The SQL already uses aliases (`AS project_path`, `AS parent_iid`, +etc.) which rusqlite's `row.get("name")` can resolve. This eliminates the fragility of +column-index counting that has caused bugs in the past. If the conversion touches too many +lines, limit named lookup to just the new field and add a follow-up task. + #### 1c. Add field to `NoteListRowJson` **File**: `src/cli/commands/list.rs` line ~1093 @@ -167,6 +226,32 @@ Add a column showing a truncated discussion ID (first 8 chars) in the table view The discussion ID is critical enough for agent workflows that it belongs in `minimal`. +#### 1g. Add `--gitlab-discussion-id` filter to notes + +Allow filtering notes directly by GitLab discussion thread ID (the external string ID, not +the internal integer). This enables one-hop note retrieval from external references — an agent +that received a `gitlab_discussion_id` from another command or webhook can jump straight to +the relevant notes without knowing the internal discussion ID. + +**File**: `src/cli/mod.rs` (NotesArgs) + +```rust +/// Filter by GitLab discussion ID +#[arg(long, help_heading = "Filters")] +pub gitlab_discussion_id: Option, +``` + +**File**: `src/cli/commands/list.rs` (NoteListFilters + where clause) + +Add `gitlab_discussion_id: Option` to `NoteListFilters`. In the WHERE construction: + +```sql +-- When gitlab_discussion_id is provided: +AND d.gitlab_discussion_id = ? +``` + +This is a single WHERE clause addition — minimal complexity, high value for bridge workflows. + ### Tests **File**: `src/cli/commands/list_tests.rs` @@ -303,6 +388,54 @@ fn fields_filter_retains_gitlab_discussion_id() { } ``` +#### Test 4: Bridge fields survive aggressive --fields filtering in robot mode + +```rust +#[test] +fn bridge_fields_forced_in_robot_mode() { + // Agent requests only "body" — bridge fields must still appear + let mut value = serde_json::json!({ + "data": { + "notes": [{ + "id": 1, + "body": "test", + "project_path": "group/repo", + "noteable_type": "MergeRequest", + "parent_iid": 42, + "gitlab_discussion_id": "abc123", + "gitlab_note_id": 500 + }] + } + }); + + // In robot mode, filter_fields merges bridge set + filter_fields_robot( + &mut value, + "notes", + &["body".to_string()], + ); + + let note = &value["data"]["notes"][0]; + assert_eq!(note["body"], "test"); + // Bridge fields survive despite not being requested: + assert!(note.get("project_path").is_some()); + assert!(note.get("gitlab_discussion_id").is_some()); + assert!(note.get("parent_iid").is_some()); +} +``` + +#### Test 5: --gitlab-discussion-id filter returns matching notes + +```rust +#[test] +fn notes_filter_by_gitlab_discussion_id() { + let conn = create_test_db(); + // Insert 2 discussions with different gitlab_discussion_ids, each with notes + // Filter by one gitlab_discussion_id + // Assert only notes from matching discussion are returned +} +``` + --- ## 2. Add `gitlab_discussion_id` to Show Command Discussion Groups @@ -351,74 +484,98 @@ SELECT id, individual_note FROM discussions WHERE merge_request_id = ? ORDER BY ### Changes Required -#### 2a. Add field to domain structs +#### 2a. Add fields to domain structs **File**: `src/cli/commands/show.rs` ```rust pub struct DiscussionDetail { pub gitlab_discussion_id: String, // ADD + pub resolvable: bool, // ADD — agents need thread state + pub resolved: bool, // ADD — agents need thread state + pub last_note_at: i64, // ADD — for recency sorting pub notes: Vec, pub individual_note: bool, } pub struct MrDiscussionDetail { pub gitlab_discussion_id: String, // ADD + pub resolvable: bool, // ADD + pub resolved: bool, // ADD + pub last_note_at: i64, // ADD pub notes: Vec, pub individual_note: bool, } ``` -#### 2b. Add field to JSON structs +#### 2b. Add fields to JSON structs ```rust pub struct DiscussionDetailJson { pub gitlab_discussion_id: String, // ADD + pub resolvable: bool, // ADD + pub resolved: bool, // ADD + pub last_note_at_iso: String, // ADD — ISO formatted pub notes: Vec, pub individual_note: bool, } pub struct MrDiscussionDetailJson { pub gitlab_discussion_id: String, // ADD + pub resolvable: bool, // ADD + pub resolved: bool, // ADD + pub last_note_at_iso: String, // ADD — ISO formatted pub notes: Vec, pub individual_note: bool, } ``` -#### 2c. Update queries to SELECT gitlab_discussion_id +#### 2c. Update queries to SELECT new fields **Issue discussions** (`show.rs:325`): ```sql -SELECT id, gitlab_discussion_id, individual_note FROM discussions +SELECT id, gitlab_discussion_id, individual_note, resolvable, resolved, last_note_at +FROM discussions WHERE issue_id = ? ORDER BY first_note_at ``` **MR discussions** (`show.rs:537`): ```sql -SELECT id, gitlab_discussion_id, individual_note FROM discussions +SELECT id, gitlab_discussion_id, individual_note, resolvable, resolved, last_note_at +FROM discussions WHERE merge_request_id = ? ORDER BY first_note_at ``` #### 2d. Update query_map closures -The `disc_rows` tuple changes from `(i64, bool)` to `(i64, String, bool)`. +The `disc_rows` tuple changes from `(i64, bool)` to a richer shape. Use named columns here +too for clarity: Issue path (`show.rs:331-335`): ```rust -let disc_rows: Vec<(i64, String, bool)> = disc_stmt +let disc_rows: Vec<(i64, String, bool, bool, bool, i64)> = disc_stmt .query_map([issue_id], |row| { - let individual: i64 = row.get(2)?; - Ok((row.get(0)?, row.get(1)?, individual == 1)) + Ok(( + row.get("id")?, + row.get("gitlab_discussion_id")?, + row.get::<_, i64>("individual_note").map(|v| v == 1)?, + row.get::<_, i64>("resolvable").map(|v| v == 1)?, + row.get::<_, i64>("resolved").map(|v| v == 1)?, + row.get("last_note_at")?, + )) })? .collect::, _>>()?; ``` And where discussions are constructed (`show.rs:361`): ```rust -for (disc_id, gitlab_disc_id, individual_note) in disc_rows { +for (disc_id, gitlab_disc_id, individual_note, resolvable, resolved, last_note_at) in disc_rows { // ... existing note query ... discussions.push(DiscussionDetail { gitlab_discussion_id: gitlab_disc_id, + resolvable, + resolved, + last_note_at, notes, individual_note, }); @@ -434,6 +591,9 @@ impl From<&DiscussionDetail> for DiscussionDetailJson { fn from(disc: &DiscussionDetail) -> Self { Self { gitlab_discussion_id: disc.gitlab_discussion_id.clone(), + resolvable: disc.resolvable, + resolved: disc.resolved, + last_note_at_iso: format_iso_timestamp(disc.last_note_at), notes: disc.notes.iter().map(|n| n.into()).collect(), individual_note: disc.individual_note, } @@ -444,6 +604,9 @@ impl From<&MrDiscussionDetail> for MrDiscussionDetailJson { fn from(disc: &MrDiscussionDetail) -> Self { Self { gitlab_discussion_id: disc.gitlab_discussion_id.clone(), + resolvable: disc.resolvable, + resolved: disc.resolved, + last_note_at_iso: format_iso_timestamp(disc.last_note_at), notes: disc.notes.iter().map(|n| n.into()).collect(), individual_note: disc.individual_note, } @@ -453,9 +616,16 @@ impl From<&MrDiscussionDetail> for MrDiscussionDetailJson { #### 2f. Add `gitlab_note_id` to note detail structs in show -While we're here, add `gitlab_id` to `NoteDetail`, `MrNoteDetail`, and their JSON -counterparts. Currently show-command notes only have `author_username`, `body`, `created_at`, -`is_system` — no note ID at all, making it impossible to reference a specific note. +While we're here, add `gitlab_id` (as `gitlab_note_id` in JSON) to `NoteDetail`, +`MrNoteDetail`, and their JSON counterparts. Currently show-command notes only have +`author_username`, `body`, `created_at`, `is_system` — no note ID at all, making it impossible +to reference a specific note. This satisfies the Bridge Contract requirement for `gitlab_note_id` +on note-level data. + +**Domain structs** — add `gitlab_id: i64` field. +**JSON structs** — add `gitlab_note_id: i64` field. +**Queries** — add `n.gitlab_id` to the note SELECT within show. +**From impls** — map `gitlab_id` → `gitlab_note_id`. ### Tests @@ -494,12 +664,27 @@ Same pattern for MR path. fn discussion_detail_json_has_gitlab_discussion_id() { let detail = DiscussionDetail { gitlab_discussion_id: "deadbeef".to_string(), + resolvable: true, + resolved: false, + last_note_at: 1_700_000_000_000, notes: vec![], individual_note: false, }; let json = DiscussionDetailJson::from(&detail); let value = serde_json::to_value(&json).unwrap(); assert_eq!(value["gitlab_discussion_id"], "deadbeef"); + assert_eq!(value["resolvable"], true); + assert_eq!(value["resolved"], false); + assert!(value.get("last_note_at_iso").is_some()); +} +``` + +#### Test 4: Show note includes gitlab_note_id + +```rust +#[test] +fn show_note_detail_json_has_gitlab_note_id() { + // Verify NoteDetailJson serialization includes gitlab_note_id } ``` @@ -531,6 +716,12 @@ lore -J discussions --for-issue 42 # List discussions across a project lore -J discussions -p group/repo --since 7d + +# Look up a specific discussion by GitLab ID +lore -J discussions --gitlab-discussion-id 6a9c1750b37d + +# List unresolved threads with latest 2 notes inline (fewer round-trips) +lore -J discussions --for-mr 99 --resolution unresolved --include-notes 2 ``` ### Response Schema @@ -555,7 +746,8 @@ lore -J discussions -p group/repo --since 7d "resolvable": true, "resolved": false, "position_new_path": "src/components/SwitchHealthCard.vue", - "position_new_line": 42 + "position_new_line": 42, + "notes": [] } ], "total_count": 15, @@ -565,6 +757,10 @@ lore -J discussions -p group/repo --since 7d } ``` +The `notes` array is empty by default (zero overhead). When `--include-notes N` is provided, +each discussion includes up to N of its most recent notes inline. This covers the common +agent pattern of "show me unresolved threads with context" in a single round-trip. + ### File Architecture **No new files.** Follow the existing pattern: @@ -617,6 +813,10 @@ pub struct DiscussionsArgs { #[arg(short = 'p', long, help_heading = "Filters")] pub project: Option, + /// Filter by GitLab discussion ID + #[arg(long, help_heading = "Filters")] + pub gitlab_discussion_id: Option, + /// Filter by resolution status (unresolved, resolved) #[arg(long, value_parser = ["unresolved", "resolved"], help_heading = "Filters")] pub resolution: Option, @@ -633,6 +833,10 @@ pub struct DiscussionsArgs { #[arg(long, value_parser = ["Issue", "MergeRequest"], help_heading = "Filters")] pub noteable_type: Option, + /// Include up to N latest notes per discussion (0 = none, default) + #[arg(long, default_value = "0", help_heading = "Output")] + pub include_notes: usize, + /// Sort field (first_note, last_note) #[arg(long, value_parser = ["first_note", "last_note"], default_value = "last_note", help_heading = "Sorting")] pub sort: String, @@ -691,6 +895,8 @@ pub struct DiscussionListRowJson { pub position_new_path: Option, #[serde(skip_serializing_if = "Option::is_none")] pub position_new_line: Option, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub notes: Vec, } pub struct DiscussionListResult { @@ -708,6 +914,10 @@ pub struct DiscussionListResultJson { The `From` impl truncates `first_note_body` to ~120 chars for the snippet. +The `notes` field on `DiscussionListRowJson` is populated only when `--include-notes N > 0`. +It reuses the existing `NoteListRowJson` struct for consistency — agents get the same note +shape whether they come from `notes`, `show`, or `discussions --include-notes`. + #### 3c. SQL Query **File**: `src/cli/commands/list.rs` @@ -720,38 +930,116 @@ pub fn query_discussions( ) -> Result { ``` -Core query: +Core query uses a CTE + ranked-notes rollup (window function) to avoid per-row correlated +subqueries. The `ROW_NUMBER()` approach produces a single scan over the notes table, which +is more predictable than repeated LIMIT 1 sub-selects at scale (200K+ discussions): ```sql +WITH filtered_discussions AS ( + SELECT + d.id, d.gitlab_discussion_id, d.noteable_type, d.project_id, + d.issue_id, d.merge_request_id, d.individual_note, + d.first_note_at, d.last_note_at, d.resolvable, d.resolved + FROM discussions d + JOIN projects p ON d.project_id = p.id + {where_sql} +), +ranked_notes AS ( + SELECT + n.discussion_id, + n.author_username, + n.body, + n.is_system, + n.position_new_path, + n.position_new_line, + ROW_NUMBER() OVER ( + PARTITION BY n.discussion_id + ORDER BY n.position, n.id + ) AS rn + FROM notes n + WHERE n.discussion_id IN (SELECT id FROM filtered_discussions) +), +note_rollup AS ( + SELECT + discussion_id, + SUM(CASE WHEN is_system = 0 THEN 1 ELSE 0 END) AS note_count, + MAX(CASE WHEN rn = 1 AND is_system = 0 THEN author_username END) AS first_author, + MAX(CASE WHEN rn = 1 AND is_system = 0 THEN body END) AS first_note_body, + MAX(CASE WHEN position_new_path IS NOT NULL THEN position_new_path END) AS position_new_path, + MAX(CASE WHEN position_new_line IS NOT NULL THEN position_new_line END) AS position_new_line + FROM ranked_notes + GROUP BY discussion_id +) SELECT - d.id, - d.gitlab_discussion_id, - d.noteable_type, + fd.id, + fd.gitlab_discussion_id, + fd.noteable_type, COALESCE(i.iid, m.iid) AS parent_iid, COALESCE(i.title, m.title) AS parent_title, p.path_with_namespace AS project_path, - d.individual_note, - (SELECT COUNT(*) FROM notes n2 WHERE n2.discussion_id = d.id AND n2.is_system = 0) AS note_count, - (SELECT n3.author_username FROM notes n3 WHERE n3.discussion_id = d.id ORDER BY n3.position LIMIT 1) AS first_author, - (SELECT n4.body FROM notes n4 WHERE n4.discussion_id = d.id AND n4.is_system = 0 ORDER BY n4.position LIMIT 1) AS first_note_body, - d.first_note_at, - d.last_note_at, - d.resolvable, - d.resolved, - (SELECT n5.position_new_path FROM notes n5 WHERE n5.discussion_id = d.id AND n5.position_new_path IS NOT NULL LIMIT 1) AS position_new_path, - (SELECT n5.position_new_line FROM notes n5 WHERE n5.discussion_id = d.id AND n5.position_new_line IS NOT NULL LIMIT 1) AS position_new_line -FROM discussions d -JOIN projects p ON d.project_id = p.id -LEFT JOIN issues i ON d.issue_id = i.id -LEFT JOIN merge_requests m ON d.merge_request_id = m.id -{where_sql} + fd.individual_note, + COALESCE(nr.note_count, 0) AS note_count, + nr.first_author, + nr.first_note_body, + fd.first_note_at, + fd.last_note_at, + fd.resolvable, + fd.resolved, + nr.position_new_path, + nr.position_new_line +FROM filtered_discussions fd +JOIN projects p ON fd.project_id = p.id +LEFT JOIN issues i ON fd.issue_id = i.id +LEFT JOIN merge_requests m ON fd.merge_request_id = m.id +LEFT JOIN note_rollup nr ON nr.discussion_id = fd.id ORDER BY {sort_column} {order} LIMIT ? ``` -**Performance note**: The correlated subqueries for `note_count`, `first_author`, etc. are -fine because discussions are always filtered to a specific issue/MR (50-200 rows). For the -unscoped case (all discussions in a project), the LIMIT clause keeps it bounded. +**Performance rationale**: The CTE pre-filters discussions before joining notes. The +`ranked_notes` CTE uses `ROW_NUMBER()` (a single pass over the notes index) instead of +correlated `(SELECT ... LIMIT 1)` sub-selects per discussion. For MR-scoped queries +(50-200 discussions) the performance is equivalent. For project-wide scans with thousands +of discussions, the window function approach avoids repeated index probes and produces a +more predictable query plan. The `MAX(CASE WHEN rn = 1 ...)` pattern extracts first-note +attributes from the grouped output without additional lookups. + +**Note on SQLite FILTER syntax**: SQLite does not support `COUNT(*) FILTER (WHERE ...)`. +Use `SUM(CASE WHEN ... THEN 1 ELSE 0 END)` instead (as shown above). + +#### 3c-ii. Note expansion query (--include-notes) + +When `include_notes > 0`, after the main discussion query, run a follow-up query per +discussion to fetch its N most recent notes: + +```sql +SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, + n.is_system, n.created_at, n.updated_at, + n.position_new_path, n.position_new_line, + n.position_old_path, n.position_old_line, + n.resolvable, n.resolved, n.resolved_by, + d.noteable_type, + COALESCE(i.iid, m.iid) AS parent_iid, + COALESCE(i.title, m.title) AS parent_title, + p.path_with_namespace AS project_path, + d.gitlab_discussion_id +FROM notes n +JOIN discussions d ON n.discussion_id = d.id +JOIN projects p ON n.project_id = p.id +LEFT JOIN issues i ON d.issue_id = i.id +LEFT JOIN merge_requests m ON d.merge_request_id = m.id +WHERE d.id = ? +ORDER BY n.created_at DESC +LIMIT ? +``` + +**Optimization**: If discussion count is small (<= 50), batch all discussion IDs into a +single `WHERE d.id IN (?, ?, ...)` query with a secondary partition to split by discussion. +For larger result sets, fall back to per-discussion queries to avoid huge IN clauses. This +matches the existing note-loading pattern in `show.rs`. + +The returned `NoteListRow` rows reuse the same struct and `NoteListRowJson` conversion from +workstream 1, ensuring identical note shape across all commands. #### 3d. Filters struct @@ -761,18 +1049,21 @@ pub struct DiscussionListFilters { pub project: Option, pub for_issue_iid: Option, pub for_mr_iid: Option, + pub gitlab_discussion_id: Option, pub resolution: Option, pub since: Option, pub path: Option, pub noteable_type: Option, pub sort: String, pub order: String, + pub include_notes: usize, } ``` Where-clause construction follows the exact pattern from `query_notes()`: - `for_issue_iid` → subquery to resolve issue ID from IID + project - `for_mr_iid` → subquery to resolve MR ID from IID + project +- `gitlab_discussion_id` → `d.gitlab_discussion_id = ?` - `resolution` → `d.resolvable = 1 AND d.resolved = 0/1` - `since` → `d.first_note_at >= ?` (using `parse_since()`) - `path` → `EXISTS (SELECT 1 FROM notes n WHERE n.discussion_id = d.id AND n.position_new_path LIKE ?)` @@ -807,12 +1098,14 @@ fn handle_discussions( project: args.project, for_issue_iid: args.for_issue, for_mr_iid: args.for_mr, + gitlab_discussion_id: args.gitlab_discussion_id, resolution: args.resolution, since: args.since, path: args.path, noteable_type: args.noteable_type, sort: args.sort, order: order.to_string(), + include_notes: args.include_notes, }; let result = query_discussions(&conn, &filters, &config)?; @@ -828,8 +1121,10 @@ fn handle_discussions( &result, start.elapsed().as_millis() as u64, args.fields.as_deref(), + robot_mode, ), "jsonl" => print_list_discussions_jsonl(&result), + "csv" => print_list_discussions_csv(&result), _ => print_list_discussions(&result), } @@ -848,6 +1143,7 @@ pub fn print_list_discussions_json( result: &DiscussionListResult, elapsed_ms: u64, fields: Option<&[String]>, + robot_mode: bool, ) { let json_result = DiscussionListResultJson::from(result); let meta = RobotMeta { elapsed_ms }; @@ -859,7 +1155,11 @@ pub fn print_list_discussions_json( let mut output = output; if let Some(f) = fields { let expanded = expand_fields_preset(f, "discussions"); - filter_fields(&mut output, "discussions", &expanded); + if robot_mode { + filter_fields_robot(&mut output, "discussions", &expanded); + } else { + filter_fields(&mut output, "discussions", &expanded); + } } match serde_json::to_string(&output) { Ok(json) => println!("{json}"), @@ -871,6 +1171,8 @@ pub fn print_list_discussions_json( Table view: compact format showing discussion_id (first 8 chars), first author, note count, resolved status, path, snippet. +CSV view: all fields, following same pattern as `print_list_notes_csv`. + #### 3g. Fields preset **File**: `src/cli/robot.rs` @@ -981,6 +1283,104 @@ fn discussions_fields_minimal_preset() { } ``` +#### Test 6: CTE query handles empty note_rollup gracefully + +```rust +#[test] +fn query_discussions_with_no_notes() { + let conn = create_test_db(); + insert_project(&conn, 1); + insert_mr(&conn, 1, 1, 99, "Test MR"); + // Insert discussion with no notes (edge case: possible after sync issues) + insert_discussion(&conn, 1, "orphan123", 1, None, Some(1), "MergeRequest"); + + let filters = DiscussionListFilters::default_for_mr(99); + let result = query_discussions(&conn, &filters, &Config::default()).unwrap(); + + assert_eq!(result.discussions.len(), 1); + assert_eq!(result.discussions[0].note_count, 0); + assert!(result.discussions[0].first_author.is_none()); +} +``` + +#### Test 7: --gitlab-discussion-id filter returns exact match + +```rust +#[test] +fn query_discussions_by_gitlab_id() { + let conn = create_test_db(); + insert_project(&conn, 1); + insert_mr(&conn, 1, 1, 99, "Test MR"); + insert_discussion(&conn, 1, "target123", 1, None, Some(1), "MergeRequest"); + insert_discussion(&conn, 2, "other456", 1, None, Some(1), "MergeRequest"); + + let filters = DiscussionListFilters { + gitlab_discussion_id: Some("target123".to_string()), + ..DiscussionListFilters::default_for_mr(99) + }; + let result = query_discussions(&conn, &filters, &Config::default()).unwrap(); + + assert_eq!(result.discussions.len(), 1); + assert_eq!(result.discussions[0].gitlab_discussion_id, "target123"); +} +``` + +#### Test 8: --include-notes populates notes array + +```rust +#[test] +fn query_discussions_with_included_notes() { + let conn = create_test_db(); + insert_project(&conn, 1); + insert_mr(&conn, 1, 1, 99, "Test MR"); + insert_discussion(&conn, 1, "disc123", 1, None, Some(1), "MergeRequest"); + insert_note_in_discussion(&conn, 1, 500, 1, 1, "alice", "first"); + insert_note_in_discussion(&conn, 2, 501, 1, 1, "bob", "second"); + insert_note_in_discussion(&conn, 3, 502, 1, 1, "carol", "third"); + + let filters = DiscussionListFilters { + include_notes: 2, + ..DiscussionListFilters::default_for_mr(99) + }; + let result = query_discussions(&conn, &filters, &Config::default()).unwrap(); + + assert_eq!(result.discussions.len(), 1); + // Note: notes populated during JSON conversion, not in raw result + // Test at handler/print level for full integration +} +``` + +#### Test 9: Bridge fields survive --fields filtering in robot mode + +```rust +#[test] +fn discussions_bridge_fields_forced_in_robot_mode() { + // Request only "note_count" — bridge fields must still appear + let mut value = serde_json::json!({ + "data": { + "discussions": [{ + "gitlab_discussion_id": "abc", + "noteable_type": "MergeRequest", + "parent_iid": 99, + "project_path": "group/repo", + "note_count": 3 + }] + } + }); + + filter_fields_robot( + &mut value, + "discussions", + &["note_count".to_string()], + ); + + let disc = &value["data"]["discussions"][0]; + assert_eq!(disc["note_count"], 3); + assert!(disc.get("gitlab_discussion_id").is_some()); + assert!(disc.get("project_path").is_some()); +} +``` + --- ## 4. Fix Robot-Docs Response Schemas @@ -1026,21 +1426,23 @@ With: "--for-issue ", "--for-mr ", "-p/--project ", + "--gitlab-discussion-id ", "--resolution ", "--since ", "--path ", "--noteable-type ", + "--include-notes ", "--sort ", "--asc", "--fields ", - "--format " + "--format " ], "robot_flags": ["--format json", "--fields minimal"], "example": "lore --robot discussions --for-mr 99 --resolution unresolved", "response_schema": { "ok": "bool", "data": { - "discussions": "[{gitlab_discussion_id:string, noteable_type:string, parent_iid:int?, parent_title:string?, project_path:string, individual_note:bool, note_count:int, first_author:string?, first_note_body_snippet:string?, first_note_at_iso:string, last_note_at_iso:string, resolvable:bool, resolved:bool, position_new_path:string?, position_new_line:int?}]", + "discussions": "[{gitlab_discussion_id:string, noteable_type:string, parent_iid:int?, parent_title:string?, project_path:string, individual_note:bool, note_count:int, first_author:string?, first_note_body_snippet:string?, first_note_at_iso:string, last_note_at_iso:string, resolvable:bool, resolved:bool, position_new_path:string?, position_new_line:int?, notes:[NoteListRowJson]?}]", "total_count": "int", "showing": "int" }, @@ -1062,7 +1464,8 @@ With: #### 4d. Update show response_schema Update the `issues` and `mrs` show schemas to reflect that `discussions` now include -`gitlab_discussion_id`. +`gitlab_discussion_id`, `resolvable`, `resolved`, and `last_note_at_iso`. Also reflect that +notes within show discussions now include `gitlab_note_id`. #### 4e. Add to lore_exclusive list @@ -1070,9 +1473,97 @@ Update the `issues` and `mrs` show schemas to reflect that `discussions` now inc "discussions: Thread-level discussion listing with gitlab_discussion_id for API integration" ``` +#### 4f. Add robot-docs contract tests + +**File**: `src/main.rs` (within `#[cfg(test)]` module) + +Add lightweight tests that parse the robot-docs JSON output and assert required Bridge +Contract fields are present. This prevents schema drift — if someone adds a field to the +struct but forgets to update robot-docs, the test fails. + +```rust +#[test] +fn robot_docs_notes_schema_includes_bridge_fields() { + let docs = get_robot_docs_json(); // helper that builds the robot-docs Value + let notes_schema = docs["commands"]["notes"]["response_schema"]["data"]["notes"] + .as_str().unwrap(); + assert!(notes_schema.contains("gitlab_discussion_id")); + assert!(notes_schema.contains("project_path")); + assert!(notes_schema.contains("parent_iid")); +} + +#[test] +fn robot_docs_discussions_schema_includes_bridge_fields() { + let docs = get_robot_docs_json(); + let disc_schema = docs["commands"]["discussions"]["response_schema"]["data"]["discussions"] + .as_str().unwrap(); + assert!(disc_schema.contains("gitlab_discussion_id")); + assert!(disc_schema.contains("project_path")); + assert!(disc_schema.contains("parent_iid")); +} + +#[test] +fn robot_docs_show_schema_includes_discussion_id() { + let docs = get_robot_docs_json(); + // Verify issues and mrs show schemas reference gitlab_discussion_id + // in their discussion sub-schemas +} +``` + +#### 4g. Add CLI-level contract integration tests + +**File**: `src/cli/commands/list_tests.rs` or `src/main.rs` `#[cfg(test)]` + +Add handler-level tests that invoke the command handlers with an in-memory DB and parse the +JSON output, asserting Bridge Contract fields are present. These are stronger than unit tests +on structs because they exercise the full path from query through serialization. + +```rust +#[test] +fn notes_handler_json_includes_bridge_fields() { + // Setup in-memory DB with project, discussion, note + // Capture stdout from handle_notes (or call query_notes + print_list_notes_json) + // Parse JSON, assert bridge fields present on every note + let conn = create_test_db(); + insert_project(&conn, 1); + insert_mr(&conn, 1, 1, 99, "Test MR"); + insert_discussion(&conn, 1, "abc123", 1, None, Some(1), "MergeRequest"); + insert_note_in_discussion(&conn, 1, 500, 1, 1, "alice", "hello"); + + let result = query_notes(&conn, &NoteListFilters::default_for_mr(99), &Config::default()).unwrap(); + let json_result = NoteListResultJson::from(&result); + let value = serde_json::to_value(&json_result).unwrap(); + + for note in value["notes"].as_array().unwrap() { + assert!(note.get("gitlab_discussion_id").is_some(), "missing gitlab_discussion_id"); + assert!(note.get("project_path").is_some(), "missing project_path"); + assert!(note.get("parent_iid").is_some(), "missing parent_iid"); + } +} + +#[test] +fn discussions_handler_json_includes_bridge_fields() { + let conn = create_test_db(); + insert_project(&conn, 1); + insert_mr(&conn, 1, 1, 99, "Test MR"); + insert_discussion(&conn, 1, "abc123", 1, None, Some(1), "MergeRequest"); + insert_note_in_discussion(&conn, 1, 500, 1, 1, "alice", "hello"); + + let result = query_discussions(&conn, &DiscussionListFilters::default_for_mr(99), &Config::default()).unwrap(); + let json_result = DiscussionListResultJson::from(&result); + let value = serde_json::to_value(&json_result).unwrap(); + + for disc in value["discussions"].as_array().unwrap() { + assert!(disc.get("gitlab_discussion_id").is_some(), "missing gitlab_discussion_id"); + assert!(disc.get("project_path").is_some(), "missing project_path"); + assert!(disc.get("parent_iid").is_some(), "missing parent_iid"); + } +} +``` + ### Tests -No code tests needed for robot-docs (it's static JSON). Verified by running +Beyond the contract tests above, robot-docs changes are verified by running `lore robot-docs` and inspecting output. --- @@ -1081,11 +1572,17 @@ No code tests needed for robot-docs (it's static JSON). Verified by running 1. **Change 1** (notes output) — standalone, no dependencies. Can be released immediately. 2. **Change 2** (show output) — standalone, no dependencies. Can be released alongside 1. -3. **Change 4** (robot-docs) — depends on 1 and 2 being done so schemas are accurate. -4. **Change 3** (discussions command) — largest change, depends on 1 for design consistency. +3. **Change 3** (discussions command) — largest change, benefits from 1+2 being reviewed first + to lock down field naming and serialization patterns. +4. **Change 4** (robot-docs + contract tests) — last, after all payloads are finalized. -Changes 1 and 2 can be done in parallel. Change 3 is independent but should come after 1+2 -are reviewed to avoid rework if the field naming or serialization approach changes. +Changes 1 and 2 can be done in parallel. Change 4 must come last since it documents the +final schema of all preceding changes. + +**Cross-cutting**: The Bridge Contract field guardrail (force-including bridge fields in robot +mode) should be implemented as part of Change 1, since it modifies `filter_fields` in +`robot.rs` which all subsequent changes depend on. The `BRIDGE_FIELDS_*` constants are defined +once and reused by Changes 3 and 4. --- @@ -1097,8 +1594,30 @@ After all changes: `gitlab_discussion_id` in the response 2. An agent can run `lore -J discussions --for-mr 3929 --resolution unresolved` to see all open threads with their IDs -3. An agent can run `lore -J mrs 3929` and see `gitlab_discussion_id` on each discussion - group +3. An agent can run `lore -J mrs 3929` and see `gitlab_discussion_id`, `resolvable`, + `resolved`, and `last_note_at_iso` on each discussion group, plus `gitlab_note_id` on + each note within 4. `lore robot-docs` lists actual field names for all commands 5. All existing tests still pass 6. No clippy warnings (pedantic + nursery) +7. Robot-docs contract tests pass, preventing future schema drift +8. Bridge Contract fields (`project_path`, `noteable_type`, `parent_iid`, + `gitlab_discussion_id`, `gitlab_note_id`) are present in every applicable read payload +9. Bridge Contract fields survive `--fields` filtering in robot mode (guardrail enforced) +10. `--gitlab-discussion-id` filter works on both `notes` and `discussions` commands +11. `--include-notes N` populates inline notes on `discussions` output +12. CLI-level contract integration tests verify bridge fields through the full handler path + +--- + +## Rejected Recommendations + +- **Rename `id`→`note_id` and `gitlab_id`→`gitlab_note_id` in notes list output** — rejected because every existing consumer (agents, scripts, field presets) uses `id` and `gitlab_id`. The fields are unambiguous within the `notes` context. The show-command note structs are a different story (they have no IDs at all), so we add `gitlab_note_id` there where it's genuinely missing. Renaming established fields is churn without proportional benefit. +- **Keyset cursor-based pagination (`--cursor` flag)** — rejected because no existing lore command has pagination, agents use `--limit` effectively, and adding a cursor mechanism is significant scope creep. Tracked as potential future work if agents hit real pagination needs. +- **Split `note_count` into `user_note_count`/`total_note_count` and rename `first_author` to `first_user_author`** — rejected because `note_count` already excludes system notes by query design (the `WHERE is_system = 0` / `CASE WHEN` filter), and `first_author` already targets the first non-system note. The current naming is clear and consistent with how `notes --include-system` works elsewhere. +- **Match path filter on both `position_new_path` and `position_old_path`** — rejected because agents care about where code is *now* (new path), not where it was before a rename. Matching old paths adds complexity and returns confusing results for moved files. +- **Separate migration file for discussion-list indexes** — rejected because this project uses a `MIGRATIONS` array in `src/core/db.rs`, not separate migration files. If profiling shows the new query needs indexes, they'll be added to the migration array in the standard way. Premature index creation without measurement is against project practice. +- **Shared contract model / workstream 0 (shared constants module)** — rejected because 4 structs sharing field names in a codebase this size isn't drift-prone. We have compile-time contract tests (robot-docs assertions + handler-level JSON tests) that catch drift. A constants module for field name strings adds indirection without proportional gain. The Bridge Contract field guardrail (`BRIDGE_FIELDS_*` arrays in robot.rs) provides the centralized definition where it matters — at the filtering enforcement point. +- **Structured robot-docs schema (JSON objects instead of string blobs)** — rejected because the current compact string format is intentionally token-efficient for agent consumption. Switching to nested JSON objects per field would significantly bloat robot-docs output. The string-based contract tests are sufficient — they test what agents actually parse. Agents already work with the inline field listing format used by `issues` and `mrs`. +- **`bridge_contract` meta-section in robot-docs output** — rejected because agents don't need a separate meta-contract section; they need correct field listings per command, which we already provide. Adding a cross-cutting contract section to robot-docs adds documentation surface area without improving the agent workflow. +- **Performance regression benchmark test (ignored by default)** — rejected because timing-based assertions are inherently flaky across machines, CI environments, and load conditions. Performance is validated through query plan analysis (EXPLAIN) and manual profiling, not hard-coded elapsed-time thresholds. diff --git a/docs/plan-surgical-sync.feedback-3.md b/docs/plan-surgical-sync.feedback-3.md new file mode 100644 index 0000000..9ea3cb9 --- /dev/null +++ b/docs/plan-surgical-sync.feedback-3.md @@ -0,0 +1,169 @@ +Below are the strongest **new** revisions I’d make (excluding everything in your rejected list), with rationale and plan-level diffs. + +### 1. Add a durable run ledger (`sync_runs`) with phase state +This makes surgical sync crash-resumable, auditable, and safer under Ctrl+C. Right now `run_id` is mostly ephemeral; persisting phase state removes ambiguity about what completed. + +```diff +@@ Design Constraints ++9. **Durable run state**: Surgical sync MUST persist a `sync_runs` row keyed by `run_id` ++ with phase transitions (`preflight`, `ingest`, `dependents`, `docs`, `embed`, `done`, `failed`). ++ This is required for crash recovery, observability, and deterministic retries. + +@@ Step 9: Create `run_sync_surgical` ++Before Stage 0, insert `sync_runs(run_id, project_id, mode='surgical', requested_counts, started_at)`. ++After each stage, update `sync_runs.phase`, counters, and `last_error` if present. ++On success/failure, set terminal state (`done`/`failed`) and `finished_at`. +``` + +### 2. Add `--preflight-only` (network validation without writes) +`--dry-run` is intentionally zero-network, so it cannot validate IIDs. `--preflight-only` is high-value for agents: verifies existence/permissions quickly with no DB mutation. + +```diff +@@ CLI Interface + lore sync --dry-run --issue 123 -p myproject ++lore sync --preflight-only --issue 123 -p myproject + +@@ Step 2: Add `--issue`, `--mr`, `-p` to `SyncArgs` ++ /// Validate remote entities and auth without any DB writes ++ #[arg(long, default_value_t = false)] ++ pub preflight_only: bool, + +@@ Step 10: Add branch in `run_sync` ++if options.preflight_only && options.is_surgical() { ++ return run_sync_surgical_preflight_only(config, &options, run_id, signal).await; ++} +``` + +### 3. Preflight should aggregate all missing/failed IIDs, not fail-fast +Fail-fast causes repeated reruns. Aggregating errors gives one-shot correction and better robot automation. + +```diff +@@ Step 7: Create `src/ingestion/surgical.rs` +-/// Returns the fetched payloads. If ANY fetch fails, the entire operation should abort. ++/// Returns fetched payloads plus per-IID failures; caller aborts writes if failures exist. + pub async fn preflight_fetch(...) -> Result { + +@@ + #[derive(Debug, Default)] + pub struct PreflightResult { + pub issues: Vec, + pub merge_requests: Vec, ++ pub failures: Vec, // stage="fetch" + } + +@@ Step 9: Create `run_sync_surgical` +-let preflight = preflight_fetch(...).await?; ++let preflight = preflight_fetch(...).await?; ++if !preflight.failures.is_empty() { ++ result.entity_failures = preflight.failures; ++ return Err(LoreError::Other("Surgical preflight failed for one or more IIDs".into()).into()); ++} +``` + +### 4. Stop filtering scoped queue drains with raw `json_extract` scans +`json_extract(payload_json, '$.scope_run_id')` in hot drain queries will degrade as queue grows. Use indexed scope metadata. + +```diff +@@ Step 9b: Implement scoped drain helpers +-// claim query adds: +-// AND json_extract(payload_json, '$.scope_run_id') = ? ++// Add migration: ++// 1) Add `scope_run_id` generated/stored column derived from payload_json (or explicit column) ++// 2) Create index on (project_id, job_type, scope_run_id, status, id) ++// Scoped drains filter by indexed `scope_run_id`, not full-table JSON extraction. +``` + +### 5. Replace `dirty_source_ids` collection-by-query with explicit run scoping +Current approach can accidentally include prior dirty rows for same source and can duplicate work. Tag dirty rows with `origin_run_id` and consume by run. + +```diff +@@ Design Constraints +-2. **Dirty queue scoping**: ... MUST call ... `run_generate_docs_for_dirty_ids` ++2. **Dirty queue scoping**: Surgical sync MUST scope docs by `origin_run_id` on `dirty_sources` ++ (or equivalent exact run marker) and MUST NOT drain unrelated dirty rows. + +@@ Step 7: `SurgicalIngestResult` +- pub dirty_source_ids: Vec, ++ pub origin_run_id: String, + +@@ Step 9a: Implement `run_generate_docs_for_dirty_ids` +-pub fn run_generate_docs_for_dirty_ids(config: &Config, dirty_source_ids: &[i64]) -> Result<...> ++pub fn run_generate_docs_for_run_id(config: &Config, run_id: &str) -> Result<...> +``` + +### 6. Enforce transaction safety at the type boundary +`unchecked_transaction()` + `&Connection` signatures is fragile. Accept `&Transaction` for ingest internals and use `TransactionBehavior::Immediate` for deterministic lock behavior. + +```diff +@@ Step 7: Create `src/ingestion/surgical.rs` +-pub fn ingest_issue_by_iid_from_payload(conn: &Connection, ...) ++pub fn ingest_issue_by_iid_from_payload(tx: &rusqlite::Transaction<'_>, ...) + +-pub fn ingest_mr_by_iid_from_payload(conn: &Connection, ...) ++pub fn ingest_mr_by_iid_from_payload(tx: &rusqlite::Transaction<'_>, ...) + +-let tx = conn.unchecked_transaction()?; ++let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?; +``` + +### 7. Acquire sync lock only for mutation phases, not remote preflight +This materially reduces lock contention and keeps normal sync throughput higher, while still guaranteeing mutation serialization. + +```diff +@@ Design Constraints ++10. **Lock window minimization**: Preflight fetch runs without sync lock; lock is acquired immediately ++ before first DB mutation and held through all mutation stages. + +@@ Step 9: Create `run_sync_surgical` +-// ── Acquire sync lock ── +-... +-// ── Stage 0: Preflight fetch ── ++// ── Stage 0: Preflight fetch (no lock, no writes) ── + ... ++// ── Acquire sync lock just before Stage 1 mutation ── +``` + +### 8. Add explicit transient retry policy beyond 429 +Client already handles rate limits; surgical reliability improves a lot if 5xx/timeouts are retried with bounded backoff. + +```diff +@@ Design Constraints ++11. **Transient retry policy**: Preflight and dependent remote fetches MUST retry boundedly on ++ timeout/5xx with jittered backoff; permanent errors (404/401/403) fail immediately. + +@@ Step 5: Add `get_issue_by_iid` / `get_mr_by_iid` ++Document retry behavior for transient transport/server failures. +``` + +### 9. Tighten automated tests around scoping invariants +You already list manual checks; these should be enforced in unit/integration tests to prevent regressions. + +```diff +@@ Step 1: TDD — Write Failing Tests First ++### 1d. New invariants tests ++- `surgical_docs_scope_ignores_preexisting_dirty_rows` ++- `scoped_queue_drain_ignores_orphaned_jobs` ++- `preflight_aggregates_multiple_missing_iids` ++- `preflight_only_performs_zero_writes` ++- `dry_run_performs_zero_network_calls` ++- `lock_window_does_not_block_during_preflight` + +@@ Acceptance Criteria ++32. Scoped queue/docs invariants are covered by automated tests (not manual-only verification). +``` + +### 10. Make robot-mode surgical output first-class +For agent workflows, include full stage telemetry and actionable recovery commands. + +```diff +@@ Step 15: Update `SyncResult` for robot mode structured output ++ /// Per-stage elapsed ms for deterministic performance tracking ++ pub stage_timings_ms: std::collections::BTreeMap, ++ /// Suggested recovery commands (robot ergonomics) ++ pub recovery_actions: Vec, + +@@ Step 14: Update `robot-docs` manifest ++Document surgical-specific error codes and `actions` schema for automated recovery. +``` + +If you want, I can now produce a fully rewritten **iteration 3** plan that merges these into your current structure end-to-end. \ No newline at end of file diff --git a/docs/plan-surgical-sync.feedback-4.md b/docs/plan-surgical-sync.feedback-4.md new file mode 100644 index 0000000..268cc28 --- /dev/null +++ b/docs/plan-surgical-sync.feedback-4.md @@ -0,0 +1,212 @@ +1. **Resolve the current contract contradictions (`preflight-only`, `dry-run`, `sync_runs`)** + +Why this improves the plan: +- Right now constraints conflict: “zero DB writes before commit” vs inserting `sync_runs` during preflight. +- This ambiguity will cause implementation drift and flaky acceptance tests. +- Splitting control-plane writes from content-plane writes keeps safety guarantees strict while preserving observability. + +```diff +@@ ## Design Constraints +-6. **Preflight-then-commit**: All remote fetches happen BEFORE any DB writes. If any IID fetch fails (404, network error), the entire operation aborts with zero DB mutations. ++6. **Preflight-then-commit (content-plane)**: All remote fetches happen BEFORE any writes to content tables (`issues`, `merge_requests`, `discussions`, `resource_events`, `documents`, `embeddings`). ++7. **Control-plane exception**: `sync_runs` / `sync_run_entities` writes are allowed during preflight for observability and crash diagnostics. +@@ +-11. **Preflight-only mode**: `--preflight-only` validates remote entity existence and permissions with zero DB writes. ++11. **Preflight-only mode**: `--preflight-only` performs zero content writes; control-plane run-ledger writes are allowed. +@@ ### For me to evaluate (functional): +-24. **Preflight-only mode** ... no DB mutations beyond the sync_runs ledger entry ++24. **Preflight-only mode** ... no content DB mutations; only run-ledger rows may be written +``` + +--- + +2. **Add stale-write protection to avoid TOCTOU regressions during unlocked preflight** + +Why this improves the plan: +- You intentionally preflight without lock; that’s good for throughput but introduces race risk. +- Without a guard, a slower surgical run can overwrite newer data ingested by a concurrent normal sync. +- This is a correctness bug under contention, not a nice-to-have. + +```diff +@@ ## Design Constraints ++12. **Stale-write protection**: Surgical ingest MUST NOT overwrite fresher local rows. If local `updated_at` is newer than the preflight payload’s `updated_at`, skip that entity and record `skipped_stale`. +@@ ## Step 7: Create `src/ingestion/surgical.rs` +- let labels_created = process_single_issue(conn, config, project_id, issue)?; ++ // Skip stale payloads to avoid TOCTOU overwrite after unlocked preflight. ++ if is_local_newer_issue(conn, project_id, issue.iid, issue.updated_at)? { ++ result.skipped_stale += 1; ++ return Ok(result); ++ } ++ let labels_created = process_single_issue(conn, config, project_id, issue)?; +@@ ++// same guard for MR path +@@ ## Step 15: Update `SyncResult` ++ /// Entities skipped because local row was newer than preflight payload ++ pub skipped_stale: usize, +@@ ### Edge cases to verify: ++38. **TOCTOU safety**: if a normal sync updates entity after preflight but before ingest, surgical run skips stale payload (no overwrite) +``` + +--- + +3. **Make dirty-source scoping exact (do not capture pre-existing rows for same entity)** + +Why this improves the plan: +- Current “query dirty rows by `source_id` after ingest” can accidentally include older dirty rows for the same entity. +- That silently violates strict run scoping and can delete unrelated backlog rows. +- You can fix this without adding `origin_run_id` to `dirty_sources` (which you already rejected). + +```diff +@@ ## Step 7: Create `src/ingestion/surgical.rs` +- // Collect dirty_source rows for this entity +- let mut stmt = conn.prepare( +- "SELECT id FROM dirty_sources WHERE source_type = 'issue' AND source_id = ?1" +- )?; ++ // Capture only rows inserted by THIS call using high-water mark. ++ let before_dirty_id: i64 = conn.query_row( ++ "SELECT COALESCE(MAX(id), 0) FROM dirty_sources", ++ [], |r| r.get(0), ++ )?; ++ // ... call process_single_issue ... ++ let mut stmt = conn.prepare( ++ "SELECT id FROM dirty_sources ++ WHERE id > ?1 AND source_type = 'issue' AND source_id = ?2" ++ )?; +@@ ++ // same pattern for MR +@@ ### 1d. Scoping invariant tests ++#[test] ++fn surgical_docs_scope_ignores_preexisting_dirty_rows_for_same_entity() { ++ // pre-insert dirty row for iid=7, then surgical ingest iid=7 ++ // assert result.dirty_source_ids only contains newly inserted rows ++} +``` + +--- + +4. **Fix embed-stage leakage when `--no-docs` is used in surgical mode** + +Why this improves the plan: +- Current design can run global embed even when docs stage is skipped, which may embed unrelated backlog docs. +- That breaks the surgical “scope only this run” promise. +- This is both correctness and operator-trust critical. + +```diff +@@ ## Step 9: Create `run_sync_surgical` +- if !options.no_embed { ++ // Surgical embed only runs when surgical docs actually regenerated docs in this run. ++ if !options.no_embed && !options.no_docs && result.documents_regenerated > 0 { +@@ ## Step 4: Wire new fields in `handle_sync_cmd` ++ if options.is_surgical() && options.no_docs && !options.no_embed { ++ return Err(Box::new(LoreError::Other( ++ "In surgical mode, --no-docs requires --no-embed (to preserve scoping guarantees)".to_string() ++ ))); ++ } +@@ ### For me to evaluate ++39. **No embed leakage**: `sync --issue X --no-docs` never embeds unrelated unembedded docs +``` + +--- + +5. **Add queue-failure hygiene so scoped jobs do not leak forever** + +Why this improves the plan: +- Scoped drains prevent accidental processing, but failed runs can strand pending jobs permanently. +- You need explicit terminalization (`aborted`) and optional replay mechanics. +- Otherwise queue bloat and confusing diagnostics accumulate. + +```diff +@@ ## Step 8a: Add `sync_runs` table migration ++ALTER TABLE dependent_queue ADD COLUMN aborted_reason TEXT; ++-- status domain now includes: pending, claimed, done, failed, aborted +@@ ## Step 9: run_sync_surgical failure paths ++// On run failure/cancel: ++conn.execute( ++ "UPDATE dependent_queue ++ SET status='aborted', aborted_reason=?1 ++ WHERE project_id=?2 AND scope_run_id=?3 AND status='pending'", ++ rusqlite::params![failure_summary, project_id, run_id], ++)?; +@@ ## Acceptance Criteria ++40. **No stranded scoped jobs**: failed surgical runs leave no `pending` rows for their `scope_run_id` +``` + +--- + +6. **Persist per-entity lifecycle (`sync_run_entities`) for real observability and deterministic retry** + +Why this improves the plan: +- `sync_runs` alone gives aggregate counters but not which IID failed at which stage. +- Per-entity records make retries deterministic and robot output far more useful. +- This is the missing piece for your stated “deterministic retry decisions.” + +```diff +@@ ## Step 8a: Add `sync_runs` table migration ++CREATE TABLE IF NOT EXISTS sync_run_entities ( ++ id INTEGER PRIMARY KEY, ++ run_id TEXT NOT NULL REFERENCES sync_runs(run_id), ++ entity_type TEXT NOT NULL CHECK(entity_type IN ('issue','merge_request')), ++ iid INTEGER NOT NULL, ++ stage TEXT NOT NULL, ++ status TEXT NOT NULL CHECK(status IN ('ok','failed','skipped_stale')), ++ error_code TEXT, ++ error_message TEXT, ++ updated_at INTEGER NOT NULL ++); ++CREATE INDEX IF NOT EXISTS idx_sync_run_entities_run ON sync_run_entities(run_id, entity_type, iid); +@@ ## Step 15: Update `SyncResult` ++ pub failed_iids: Vec<(String, u64)>, ++ pub skipped_stale_iids: Vec<(String, u64)>, +@@ ## CLI Interface ++lore --robot sync-runs --run-id ++lore --robot sync-runs --run-id --retry-failed +``` + +--- + +7. **Use explicit error type for surgical preflight failures (not `LoreError::Other`)** + +Why this improves the plan: +- `Other(String)` loses machine semantics, weakens robot mode, and leads to bad exit-code behavior. +- A typed error preserves structured failures and enables actionable recovery commands. + +```diff +@@ ## Step 9: run_sync_surgical +- return Err(LoreError::Other( +- format!("Surgical preflight failed for {} of {} IIDs: {}", ...) +- ).into()); ++ return Err(LoreError::SurgicalPreflightFailed { ++ run_id: run_id.to_string(), ++ total: total_items, ++ failures: preflight.failures.clone(), ++ }.into()); +@@ ## Step 15: Update `SyncResult` ++ /// Machine-actionable error summary for robot mode ++ pub error_code: Option, +@@ ## Acceptance Criteria ++41. **Typed failure**: preflight failures serialize structured errors (not generic `Other`) with machine-usable codes/actions +``` + +--- + +8. **Strengthen tests for rollback, contention, and stale-skip guarantees** + +Why this improves the plan: +- Current tests cover many happy-paths and scoping invariants, but key race/rollback behaviors are still under-tested. +- These are exactly where regressions will appear first in production. + +```diff +@@ ## Step 1: TDD — Write Failing Tests First ++### 1f. Transactional rollback + TOCTOU tests ++1. `preflight_success_then_ingest_failure_rolls_back_all_content_writes` ++2. `stale_payload_is_skipped_when_local_updated_at_is_newer` ++3. `failed_run_aborts_pending_scoped_jobs` ++4. `surgical_no_docs_requires_no_embed` +@@ ### Automated scoping invariants +-38. **Scoped queue/docs invariants are enforced by automated tests** ++42. **Rollback and race invariants are enforced by automated tests** (no partial writes on ingest failure, no stale overwrite) +``` + +--- + +These eight revisions keep your core approach intact, avoid your explicitly rejected ideas, and close the biggest correctness/operability gaps before implementation. \ No newline at end of file diff --git a/docs/plan-surgical-sync.feedback-5.md b/docs/plan-surgical-sync.feedback-5.md new file mode 100644 index 0000000..a325b51 --- /dev/null +++ b/docs/plan-surgical-sync.feedback-5.md @@ -0,0 +1,130 @@ +**Critical Gaps In Current Plan** +1. `dirty_sources` scoping is based on `id`, but `dirty_sources` has no `id` column and uses `(source_type, source_id)` UPSERT semantics. +2. Plan assumes a new `dependent_queue` with `status`, but current code uses `pending_dependent_fetches` (delete-on-complete), so queue-scoping design conflicts with existing invariants. +3. Constraint 6 says all remote fetches happen before any content writes, but the proposed surgical flow fetches discussions/events/diffs after ingest writes. +4. `sync_runs` is already an existing table and already used by `SyncRunRecorder`; the plan currently treats it like a new table. + +**Best Revisions** + +1. **Fix dirty-source scoping to match real schema (queued-at watermark, not `id` high-water).** +Why this is better: This removes a correctness bug and makes same-entity re-ingest deterministic under UPSERT behavior. + +```diff +@@ Design Constraints +-2. Dirty queue scoping: ... capture MAX(id) FROM dirty_sources ... run_generate_docs_for_dirty_ids ... ++2. Dirty queue scoping: `dirty_sources` is keyed by `(source_type, source_id)` and updated via UPSERT. ++ Surgical scoping MUST use: ++ 1) a run-level `run_dirty_floor_ms` captured before surgical ingest, and ++ 2) explicit touched source keys from ingest (`(source_type, source_id)`). ++ Surgical docs MUST call a scoped API (e.g. `run_generate_docs_for_sources`) and MUST NOT drain global dirty queue. +@@ Step 9a +-pub fn run_generate_docs_for_dirty_ids(config: &Config, dirty_source_ids: &[i64]) -> Result ++pub fn run_generate_docs_for_sources(config: &Config, sources: &[(SourceType, i64)]) -> Result +``` + +2. **Bypass shared dependent queue in surgical mode; run dependents inline per target.** +Why this is better: Avoids queue migration churn, avoids run-scope conflicts with existing unique constraints, and removes orphan-job hygiene complexity entirely. + +```diff +@@ Design Constraints +-4. Dependent queue scoping: ... scope_run_id indexed column on dependent_queue ... ++4. Surgical dependent execution: surgical mode MUST bypass `pending_dependent_fetches`. ++ Dependents (resource_events, mr_closes_issues, mr_diffs) run inline for targeted entities only. ++ Global queue remains for normal sync only. +@@ Design Constraints +-14. Queue failure hygiene: ... pending scoped jobs ... terminalized to aborted ... ++14. Surgical failure hygiene: surgical mode MUST leave no queue artifacts because it does not enqueue dependent jobs. +@@ Step 9b / 9c / Step 13 +-Implement scoped drain helpers and enqueue_job scope_run_id plumbing ++Replace with direct per-entity helpers in ingestion layer: ++ - sync_issue_resource_events_direct(...) ++ - sync_mr_resource_events_direct(...) ++ - sync_mr_closes_issues_direct(...) ++ - sync_mr_diffs_direct(...) +``` + +3. **Clarify atomicity contract to “primary-entity atomicity” (remove contradiction).** +Why this is better: Keeps strong zero-write guarantees for missing IIDs while matching practical staged pipeline behavior. + +```diff +@@ Design Constraints +-6. Preflight-then-commit (content-plane): All remote fetches happen BEFORE any writes to content tables ... ++6. Primary-entity atomicity: all requested issue/MR payload fetches complete before first content write. ++ If any primary IID fetch fails, primary ingest does zero content writes. ++ Dependent stages (discussions/events/diffs/closes) are post-ingest and best-effort, with structured per-stage failure reporting. +``` + +4. **Extend existing `sync_runs` schema instead of redefining it.** +Why this is better: Preserves compatibility with current `SyncRunRecorder`, `sync_status`, and existing historical data. + +```diff +@@ Step 8a +-Add `sync_runs` table migration (CREATE TABLE sync_runs ...) ++Add migration 027 to extend existing `sync_runs` table: ++ - ADD COLUMN mode TEXT NULL -- 'standard' | 'surgical' ++ - ADD COLUMN phase TEXT NULL -- preflight|ingest|dependents|docs|embed|done|failed ++ - ADD COLUMN surgical_summary_json TEXT NULL ++Reuse `SyncRunRecorder` row lifecycle; do not introduce a parallel run-ledger model. +``` + +5. **Strengthen TOCTOU stale protection for equal timestamps.** +Why this is better: Prevents regressions when `updated_at` is equal but a fresher local fetch already happened. + +```diff +@@ Design Constraints +-13. ... If local `updated_at` is newer than preflight payload `updated_at`, skip ... ++13. ... Skip stale when: ++ a) local.updated_at > payload.updated_at, OR ++ b) local.updated_at == payload.updated_at AND local.last_seen_at > preflight_started_at_ms. ++ This prevents equal-timestamp regressions under concurrent sync. +@@ Step 1f tests ++Add test: `equal_updated_at_but_newer_last_seen_is_skipped`. +``` + +6. **Shrink lock window further: release `sync` lock before embed; use dedicated embed lock.** +Why this is better: Prevents long embedding from blocking unrelated syncs and avoids concurrent embed writers. + +```diff +@@ Design Constraints +-11. Lock ... held through all mutation stages. ++11. Lock ... held through ingest/dependents/docs only. ++ Release `AppLock("sync")` before embed. ++ Embed stage uses `AppLock("embed")` for single-flight embedding writes. +@@ Step 9 +-Embed runs inside the same sync lock window ++Embed runs after sync lock release, under dedicated embed lock +``` + +7. **Add the missing `sync-runs` robot read path (the plan references it but doesn’t define it).** +Why this is better: Makes durable run-state actually useful for recovery automation and observability. + +```diff +@@ Step 14 (new) ++## Step 14a: Add `sync-runs` read command ++ ++CLI: ++ lore --robot sync-runs --limit 20 ++ lore --robot sync-runs --run-id ++ lore --robot sync-runs --state failed ++ ++Robot response fields: ++ run_id, mode, phase, status, started_at, finished_at, counters, failures, suggested_retry_command +``` + +8. **Add URL-native surgical targets (`--issue-url`, `--mr-url`) with project inference.** +Why this is better: Much more agent-friendly and reduces project-resolution errors from copy/paste workflows. + +```diff +@@ CLI Interface + lore sync --issue 123 --issue 456 -p myproject ++lore sync --issue-url https://gitlab.example.com/group/proj/-/issues/123 ++lore sync --mr-url https://gitlab.example.com/group/proj/-/merge_requests/789 +@@ Step 2 ++Add repeatable flags: ++ --issue-url ++ --mr-url ++Parse URL into (project_path, iid). If all targets are URL-derived and same project, `-p` is optional. ++If mixed projects are provided in one command, reject with clear error. +``` + +If you want, I can produce a single consolidated patched version of your plan (iteration 5 draft) with these revisions already merged. \ No newline at end of file diff --git a/docs/plan-surgical-sync.feedback-6.md b/docs/plan-surgical-sync.feedback-6.md new file mode 100644 index 0000000..cd30444 --- /dev/null +++ b/docs/plan-surgical-sync.feedback-6.md @@ -0,0 +1,152 @@ +Highest-impact revisions after reviewing your v5 plan: + +1. **Fix a real scoping hole: embed can still process unrelated docs** +Rationale: Current plan assumes scoped docs implies scoped embed, but that only holds while no other run creates unembedded docs. You explicitly release sync lock before embed, so another sync can enqueue/regenerate docs in between, and `run_embed` may embed unrelated backlog. This breaks surgical isolation and can hide backlog debt. +```diff +diff --git a/plan.md b/plan.md +@@ Design Constraints +-3. Embed scoping: Embedding runs only for documents regenerated by this surgical run. Because `run_embed` processes only unembedded docs, scoping is automatic IF docs are scoped correctly... ++3. Embed scoping: Embedding MUST be explicitly scoped to documents regenerated by this surgical run. ++ `run_generate_docs_for_sources` returns regenerated `document_ids`; surgical mode calls ++ `run_embed_for_document_ids(document_ids)` and never global `run_embed`. ++ This remains true even after lock release and under concurrent normal sync activity. +@@ Step 9a: Implement `run_generate_docs_for_sources` +-pub fn run_generate_docs_for_sources(...) -> Result { ++pub fn run_generate_docs_for_sources(...) -> Result { ++ // Return regenerated document IDs for scoped embedding. ++ // GenerateDocsResult { regenerated, errored, regenerated_document_ids: Vec } +@@ Step 9: Embed stage +- match run_embed(config, false, false, None, signal).await { ++ match run_embed_for_document_ids(config, &result.regenerated_document_ids, signal).await { +``` + +2. **Make run-ledger lifecycle actually durable (and consistent with your own constraint 10)** +Rationale: Plan text says “reuse `SyncRunRecorder`”, but Step 9 writes raw SQL directly. That creates lifecycle drift, missing heartbeats, and inconsistent failure handling as code evolves. +```diff +diff --git a/plan.md b/plan.md +@@ Design Constraints +-10. Durable run state: ... Reuses `SyncRunRecorder` row lifecycle ... ++10. Durable run state: surgical sync MUST use `SyncRunRecorder` end-to-end (no ad-hoc SQL updates). ++ Add recorder APIs for `set_mode`, `set_phase`, `set_counters`, `finish_succeeded`, ++ `finish_failed`, `finish_cancelled`, and periodic `heartbeat`. +@@ Step 9: Create `run_sync_surgical` +- conn.execute("INSERT INTO sync_runs ...") +- conn.execute("UPDATE sync_runs SET phase = ...") ++ let mut recorder = SyncRunRecorder::start_surgical(...)?; ++ recorder.set_phase("preflight")?; ++ recorder.heartbeat_if_due()?; ++ recorder.set_phase("ingest")?; ++ ... ++ recorder.finish_succeeded_with_warnings(...)?; +``` + +3. **Add explicit `cancelled` terminal state** +Rationale: Current early cancellation branches return `Ok(result)` without guaranteed run-row finalization. That leaves misleading `running` rows and weak crash diagnostics. +```diff +diff --git a/plan.md b/plan.md +@@ Design Constraints ++15. Cancellation semantics: If shutdown is observed after run start, phase is set to `cancelled`, ++ status is `cancelled`, `finished_at` is written, and lock is released before return. +@@ Step 8a migration ++ALTER TABLE sync_runs ADD COLUMN warnings_count INTEGER NOT NULL DEFAULT 0; ++ALTER TABLE sync_runs ADD COLUMN cancelled_at INTEGER; +@@ Acceptance Criteria ++47. Cancellation durability: Ctrl+C during surgical sync records `status='cancelled'`, ++ `phase='cancelled'`, and `finished_at` in `sync_runs`. +``` + +4. **Reduce lock contention further by separating dependent fetch and dependent write** +Rationale: You currently hold lock through network-heavy dependent stages. That maximizes contention and increases lock timeout risk. Better: fetch dependents unlocked, write in short locked transactions with per-entity freshness guards. +```diff +diff --git a/plan.md b/plan.md +@@ Design Constraints +-11. Lock window minimization: ... held through ingest, dependents, and docs stages. ++11. Lock window minimization: lock is held only for DB mutation windows. ++ Dependents run in two phases: ++ (a) fetch from GitLab without lock, ++ (b) write results under lock in short transactions. ++ Apply per-entity freshness checks before dependent writes. +@@ Step 9: Dependent stages +- // All dependents run INLINE per-entity ... while lock is held ++ // Dependents fetch outside lock, then write under lock with CAS-style watermark guards. +``` + +5. **Introduce stage timeout budgets to prevent hung surgical runs** +Rationale: A single slow GitLab endpoint can stall the whole run and hold resources too long. Timeout budgets plus per-entity failure recording keep the run bounded and predictable. +```diff +diff --git a/plan.md b/plan.md +@@ Design Constraints ++16. Stage timeout budgets: each dependent fetch has a per-entity timeout and a global stage budget. ++ Timed-out entities are recorded in `entity_failures` with code `TIMEOUT` and run continues best-effort. +@@ Step 9 notes ++ - Wrap dependent network calls with `tokio::time::timeout`. ++ - Add config knobs: ++ `sync.surgical_entity_timeout_seconds` (default 20), ++ `sync.surgical_dependents_budget_seconds` (default 120). +``` + +6. **Add payload integrity checks (project mismatch hard-fail)** +Rationale: Surgical mode is precision tooling. If API/proxy misconfiguration returns payloads from wrong project, you should fail preflight loudly, not trust downstream assumptions. +```diff +diff --git a/plan.md b/plan.md +@@ Step 7: preflight_fetch ++ // Integrity check: payload.project_id must equal requested gitlab_project_id. ++ // On mismatch, record EntityFailure { code: "PROJECT_MISMATCH", stage: "fetch" }. +@@ Step 9d: error codes ++PROJECT_MISMATCH -> usage/config data integrity failure (typed, machine-readable) +@@ Acceptance Criteria ++48. Project integrity: payloads with unexpected `project_id` are rejected in preflight ++ and produce zero content writes. +``` + +7. **Upgrade robot output from aggregate-only to per-entity lifecycle** +Rationale: `entity_failures` alone is not enough for robust automation. Agents need a complete entity outcome map (fetched, ingested, stale-skipped, dependent failures) to retry deterministically. +```diff +diff --git a/plan.md b/plan.md +@@ Step 15: Update `SyncResult` ++pub struct EntityOutcome { ++ pub entity_type: String, ++ pub iid: u64, ++ pub fetched: bool, ++ pub ingested: bool, ++ pub stale_skipped: bool, ++ pub dependent_failures: Vec, ++} +@@ ++pub entity_outcomes: Vec, ++pub completion_status: String, // succeeded | succeeded_with_warnings | failed | cancelled +@@ Robot mode +- enables agents to detect partial failures via `entity_failures` ++ enables deterministic, per-IID retry and richer UI messaging. +``` + +8. **Index `sync_runs` for real observability at scale** +Rationale: You’re adding mode/phase/counters and then querying recent surgical runs. Without indexes, this degrades as run history grows. +```diff +diff --git a/plan.md b/plan.md +@@ Step 8a migration ++CREATE INDEX IF NOT EXISTS idx_sync_runs_mode_started ++ ON sync_runs(mode, started_at DESC); ++CREATE INDEX IF NOT EXISTS idx_sync_runs_status_phase_started ++ ON sync_runs(status, phase, started_at DESC); +``` + +9. **Add tests specifically for the new failure-prone paths** +Rationale: Current tests are strong on ingest and scoping, but still miss new high-risk runtime behavior (cancel state, timeout handling, scoped embed under concurrency). +```diff +diff --git a/plan.md b/plan.md +@@ Step 1f tests ++#[tokio::test] ++async fn cancellation_marks_sync_run_cancelled() { ... } ++ ++#[tokio::test] ++async fn dependent_timeout_records_entity_failure_and_continues() { ... } ++ ++#[tokio::test] ++async fn scoped_embed_does_not_embed_unrelated_docs_created_after_docs_stage() { ... } +@@ Acceptance Criteria ++49. Scoped embed isolation under concurrency is verified by automated test. ++50. Timeout path is verified (TIMEOUT code + continued processing). +``` + +These revisions keep your core direction intact, avoid every rejected recommendation, and materially improve correctness under concurrency, operational observability, and agent automation quality. \ No newline at end of file diff --git a/docs/plan-surgical-sync.md b/docs/plan-surgical-sync.md new file mode 100644 index 0000000..ff9f791 --- /dev/null +++ b/docs/plan-surgical-sync.md @@ -0,0 +1,2240 @@ +--- +plan: true +title: "" +status: iterating +iteration: 6 +target_iterations: 8 +beads_revision: 0 +related_plans: [] +created: 2026-02-16 +updated: 2026-02-17 +--- + +# Surgical Per-IID Sync + +## Context + +Agents working on active issues/MRs need to refresh data for specific entities without a full sync. Currently `lore sync` always paginates through ALL issues/MRs from the cursor forward. This adds `--issue` and `--mr` flags to `sync` that accept lists of IIDs, fetching only those entities from GitLab and running the full pipeline (ingest → discussions → events → generate-docs → embed) scoped to just those items. Status enrichment is skipped in surgical mode. + +### Design Constraints + +1. **Sync locking**: The surgical path acquires `AppLock("sync")` but ONLY for mutation phases. The preflight (network-only) phase runs WITHOUT the lock to minimize contention with concurrent normal syncs. +2. **Dirty queue scoping**: `dirty_sources` is keyed by `(source_type, source_id)` with UPSERT semantics (no autoincrement `id` column). Surgical scoping MUST use explicit touched source keys collected during ingest — each `ingest_*_by_iid_from_payload` returns the `(source_type, source_id)` pairs it touched. Surgical docs MUST call a scoped API (`run_generate_docs_for_sources`) filtering by these exact keys, and MUST NOT drain the global dirty queue. +3. **Embed scoping**: Embedding MUST be explicitly scoped to documents regenerated by this surgical run. `run_generate_docs_for_sources` returns regenerated `document_ids` via `GenerateDocsResult.regenerated_document_ids: Vec`; surgical mode calls `run_embed_for_document_ids(document_ids)` and never global `run_embed`. This remains correct even after lock release and under concurrent normal sync activity. **Guard condition**: embed stage MUST NOT run when `--no-docs` is used in surgical mode, as it would embed unrelated backlog docs. Validation: `--no-docs` without `--no-embed` is rejected in surgical mode. +4. **Surgical dependent execution**: Surgical mode MUST bypass `pending_dependent_fetches`. Dependents (resource_events, mr_closes_issues, mr_diffs) run inline for targeted entities only via direct per-entity helper functions. The global `pending_dependent_fetches` queue remains exclusively for normal sync. This eliminates queue-scoping complexity, orphaned job cleanup, and schema migration to the shared queue table. +5. **MR dependent stages**: Normal MR ingest runs closes_issues and diffs stages. The surgical path must also run these for MR entities. +6. **Primary-entity atomicity**: All requested issue/MR payload fetches complete before the first content write. If any primary IID fetch fails (404, network error), primary ingest does zero content writes. Preflight **aggregates all failures** (does not fail-fast on the first error) so agents get a complete error report in one pass. Dependent stages (discussions, resource events, MR closes_issues, MR diffs) are post-ingest and best-effort — individual dependent stage failures are recorded per-entity but do not roll back the primary ingest. +7. **Control-plane exception**: `sync_runs` writes are allowed during preflight for observability and crash diagnostics. These are control-plane rows, not content data, and do not affect query results. +8. **Dry-run is zero-write**: The dry-run check MUST precede lock acquisition and DB connection. Dry-run produces zero side effects — no lock acquired, no DB connection opened, no network calls. +9. **defaultProject fallback**: When `-p` is omitted, fall back to `config.default_project` before erroring. +10. **Durable run state**: Surgical sync MUST use `SyncRunRecorder` end-to-end (no ad-hoc SQL updates to `sync_runs`). Extend `SyncRunRecorder` with APIs for surgical mode: `start_surgical(...)`, `set_phase(&str)`, `set_counters(SurgicalCounters)`, `finish_succeeded()`, `finish_succeeded_with_warnings(warnings_count)`, `finish_failed(error)`, `finish_cancelled()`, and periodic `heartbeat_if_due()`. Phase transitions (`preflight`, `ingest`, `dependents`, `docs`, `embed`, `done`, `failed`, `cancelled`) enable crash recovery and observability. +11. **Lock window minimization**: Preflight fetch runs WITHOUT the sync lock. Lock is acquired immediately before the first DB mutation (Stage 1) and held through ingest, dependents, and docs stages. Lock is RELEASED before the embed stage. Embed is naturally idempotent (processes only unembedded docs) and does not require the sync lock. +12. **Preflight-only mode**: `--preflight-only` performs zero content writes; control-plane run-ledger writes are allowed. Distinct from `--dry-run` (which is zero-network). Allows agents to verify IIDs are valid before committing to a full surgical run. +13. **Stale-write protection (TOCTOU)**: Because preflight runs WITHOUT the sync lock, a concurrent normal sync may update the same entity between preflight fetch and surgical ingest. Surgical ingest MUST NOT overwrite fresher local rows. Skip stale when: (a) `local.updated_at > payload.updated_at`, OR (b) `local.updated_at == payload.updated_at AND local.last_seen_at > preflight_started_at_ms`. This prevents equal-timestamp regressions under concurrent sync — the `last_seen_at` column acts as a monotonic tie-breaker when `updated_at` is identical. +14. **Surgical failure hygiene**: Surgical mode leaves no queue artifacts because it does not enqueue jobs into `pending_dependent_fetches`. Dependent stages execute inline and report failures per-entity in `SurgicalIngestResult.entity_failures`. No orphaned job cleanup is needed. +15. **Cancellation semantics**: If shutdown is observed after run start, the recorder finalizes with `finish_cancelled()`: phase is set to `cancelled`, status is `cancelled`, `finished_at` is written, and the lock is released before return. No silent `running` rows are left behind. +16. **Per-entity timeout**: Each dependent network fetch (discussions, resource events, MR dependents) is wrapped in `tokio::time::timeout` with a configurable per-entity budget. Timed-out entities are recorded in `entity_failures` with code `TIMEOUT` and the run continues best-effort. Config knob: `sync.surgical_entity_timeout_seconds` (default 30). +17. **Payload integrity**: Preflight validates that each returned payload's `project_id` matches the requested `gitlab_project_id`. On mismatch, the entity is recorded as `EntityFailure { code: "PROJECT_MISMATCH", stage: "fetch" }` and excluded from ingest. This catches API proxy misconfigurations that could silently corrupt data. + +## CLI Interface + +```bash +lore sync --issue 123 --issue 456 -p myproject +lore sync --mr 789 --mr 101 -p myproject +lore sync --issue 123 --mr 789 -p myproject +lore --robot sync --issue 123 -p myproject + +# -p is optional if config.defaultProject is set +lore sync --issue 123 + +# dry-run shows what would be fetched without writes or network calls +lore sync --dry-run --issue 123 -p myproject + +# preflight-only validates entities exist on GitLab without any DB writes +lore sync --preflight-only --issue 123 -p myproject +``` + +--- + +## Step 1: TDD — Write Failing Tests First + +### 1a. Test helper: `test_config()` (`src/ingestion/surgical_tests.rs`) + +`Config` has no `Default` impl (fields like `gitlab.base_url` are required). All surgical tests need a minimal config helper: + +```rust +fn test_config() -> crate::Config { + serde_json::from_value(serde_json::json!({ + "gitlab": { + "baseUrl": "https://gitlab.example.com", + "projects": ["group/project"] + }, + "storage": {} + })).unwrap() +} +``` + +### 1b. Test: `GitLabClient::get_issue_by_iid` (`src/gitlab/client.rs`) + +Add to existing `#[cfg(test)] mod tests` at line 766: + +```rust +#[tokio::test] +async fn get_issue_by_iid_returns_issue() { + use wiremock::{MockServer, Mock, ResponseTemplate}; + use wiremock::matchers::{method, path}; + + let mock_server = MockServer::start().await; + let issue_json = serde_json::json!({ + "id": 42, + "iid": 7, + "project_id": 1, + "title": "Test issue", + "description": "desc", + "state": "opened", + "created_at": "2024-01-15T10:00:00.000Z", + "updated_at": "2024-01-16T10:00:00.000Z", + "web_url": "https://gitlab.example.com/issues/7", + "author": {"id": 1, "username": "testuser", "name": "Test User"}, + "assignees": [], + "labels": [], + "milestone": null, + "due_date": null, + "references": {"short": "#7", "full": "group/project#7"} + }); + + Mock::given(method("GET")) + .and(path("/api/v4/projects/1/issues/7")) + .respond_with(ResponseTemplate::new(200).set_body_json(&issue_json)) + .mount(&mock_server) + .await; + + let client = GitLabClient::new(&mock_server.uri(), "test-token", None); + let issue = client.get_issue_by_iid(1, 7).await.unwrap(); + assert_eq!(issue.iid, 7); + assert_eq!(issue.title, "Test issue"); +} + +#[tokio::test] +async fn get_issue_by_iid_returns_not_found() { + use wiremock::{MockServer, Mock, ResponseTemplate}; + use wiremock::matchers::{method, path}; + + let mock_server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/api/v4/projects/1/issues/999")) + .respond_with(ResponseTemplate::new(404).set_body_json( + serde_json::json!({"message": "404 Not found"}) + )) + .mount(&mock_server) + .await; + + let client = GitLabClient::new(&mock_server.uri(), "test-token", None); + let result = client.get_issue_by_iid(1, 999).await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), LoreError::GitLabNotFound { .. })); +} +``` + +Same pattern for `get_mr_by_iid` with `GitLabMergeRequest` JSON. + +### 1c. Test: Surgical ingest functions (`src/ingestion/surgical_tests.rs`) + +Create `src/ingestion/surgical_tests.rs` (referenced from `surgical.rs` with `#[cfg(test)] #[path = "surgical_tests.rs"] mod tests;`): + +```rust +use std::path::Path; +use crate::core::db::{create_connection, run_migrations}; +use crate::ingestion::surgical::{ + ingest_issue_by_iid_from_payload, ingest_mr_by_iid_from_payload, + SurgicalIngestResult, +}; +use crate::gitlab::types::{GitLabIssue, GitLabMergeRequest}; + +fn test_config() -> crate::Config { + serde_json::from_value(serde_json::json!({ + "gitlab": { + "baseUrl": "https://gitlab.example.com", + "projects": ["group/project"] + }, + "storage": {} + })).unwrap() +} + +fn setup_db() -> rusqlite::Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn.execute( + "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) + VALUES (100, 'group/project', 'https://gitlab.example.com/group/project')", + [], + ).unwrap(); + conn +} + +fn make_test_issue(iid: i64) -> GitLabIssue { + serde_json::from_value(serde_json::json!({ + "id": 1000 + iid, + "iid": iid, + "project_id": 100, + "title": format!("Issue {iid}"), + "description": "test description", + "state": "opened", + "created_at": "2024-01-15T10:00:00.000Z", + "updated_at": "2024-01-16T10:00:00.000Z", + "web_url": format!("https://gitlab.example.com/issues/{iid}"), + "author": {"id": 1, "username": "testuser", "name": "Test User"}, + "assignees": [], + "labels": [], + "milestone": null, + "due_date": null, + "references": {"short": format!("#{iid}"), "full": format!("group/project#{iid}")} + })).unwrap() +} + +#[test] +fn ingest_issue_by_iid_inserts_and_marks_dirty() { + let conn = setup_db(); + let project_id = 1i64; // auto-assigned by INSERT above + let config = test_config(); + let issue = make_test_issue(7); + + let result = ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + assert_eq!(result.upserted, 1); + + // Verify issue exists + let count: i64 = conn.query_row( + "SELECT COUNT(*) FROM issues WHERE project_id = ? AND iid = 7", + [project_id], |r| r.get(0), + ).unwrap(); + assert_eq!(count, 1); + + // Verify dirty marker exists + let dirty: i64 = conn.query_row( + "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'issue'", + [], |r| r.get(0), + ).unwrap(); + assert!(dirty >= 1); + + // Verify dirty_source_keys were collected + assert!(!result.dirty_source_keys.is_empty(), + "Should have collected dirty source keys for scoped doc regeneration"); +} + +#[test] +fn ingest_issue_by_iid_resets_discussion_watermark() { + let conn = setup_db(); + let project_id = 1i64; + let config = test_config(); + let issue = make_test_issue(7); + + // First insert + ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + + // Simulate previous discussion sync by setting watermark + conn.execute( + "UPDATE issues SET discussions_synced_for_updated_at = updated_at + WHERE project_id = ? AND iid = 7", + [project_id], + ).unwrap(); + + // Second surgical ingest should reset the watermark + ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + + let watermark: Option = conn.query_row( + "SELECT discussions_synced_for_updated_at FROM issues WHERE project_id = ? AND iid = 7", + [project_id], |r| r.get(0), + ).unwrap(); + assert!(watermark.is_none(), "Surgical ingest should reset discussion watermark to NULL"); +} + +#[test] +fn ingest_issue_by_iid_resets_event_watermark() { + let conn = setup_db(); + let project_id = 1i64; + let config = test_config(); + let issue = make_test_issue(7); + + ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + + // Set event watermark + conn.execute( + "UPDATE issues SET resource_events_synced_for_updated_at = updated_at + WHERE project_id = ? AND iid = 7", + [project_id], + ).unwrap(); + + // Surgical re-ingest + ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + + let watermark: Option = conn.query_row( + "SELECT resource_events_synced_for_updated_at FROM issues WHERE project_id = ? AND iid = 7", + [project_id], |r| r.get(0), + ).unwrap(); + assert!(watermark.is_none(), "Surgical ingest should reset event watermark to NULL"); +} + +#[test] +fn duplicate_iids_are_idempotent() { + let conn = setup_db(); + let project_id = 1i64; + let config = test_config(); + let issue = make_test_issue(7); + + // Ingest same issue twice + ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + let result = ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + assert_eq!(result.upserted, 1); + + // Only one row + let count: i64 = conn.query_row( + "SELECT COUNT(*) FROM issues WHERE project_id = ? AND iid = 7", + [project_id], |r| r.get(0), + ).unwrap(); + assert_eq!(count, 1); +} +``` + +Same pattern for MR tests (with `make_test_mr` and `ingest_mr_by_iid_from_payload`). + +### 1d. Scoping invariant tests (`src/ingestion/surgical_tests.rs`) + +These tests enforce correctness of docs scoping — the most critical safety property of surgical sync: + +```rust +#[test] +fn surgical_docs_scope_ignores_preexisting_dirty_rows() { + // Setup: insert a dirty_sources row for a DIFFERENT entity (simulating prior failed sync) + // Run surgical ingest for a new entity + // Call run_generate_docs_for_sources with only the surgical run's dirty keys + // Assert: pre-existing dirty row is UNTOUCHED (still in dirty_sources) + // Assert: only the surgical entity's docs were regenerated + let conn = setup_db(); + let project_id = 1i64; + let config = test_config(); + + // Pre-existing dirty row for issue iid=99 (from prior sync) + conn.execute( + "INSERT INTO issues (project_id, gitlab_issue_id, iid, title, state, created_at, updated_at, web_url, last_seen_at) + VALUES (?1, 999, 99, 'Old issue', 'opened', 1000, 1000, 'https://example.com/99', 1000)", + [project_id], + ).unwrap(); + let old_issue_id: i64 = conn.query_row( + "SELECT id FROM issues WHERE iid = 99", [], |r| r.get(0), + ).unwrap(); + conn.execute( + "INSERT INTO dirty_sources (source_type, source_id, queued_at) VALUES ('issue', ?1, 1000)", + [old_issue_id], + ).unwrap(); + + // Surgical ingest of issue iid=7 + let issue = make_test_issue(7); + let result = ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + + // The pre-existing dirty row must still exist + let old_dirty_exists: bool = conn.query_row( + "SELECT COUNT(*) > 0 FROM dirty_sources WHERE source_type = 'issue' AND source_id = ?1", + [old_issue_id], |r| r.get(0), + ).unwrap(); + assert!(old_dirty_exists, "Pre-existing dirty rows must be preserved"); + + // The surgical result's dirty_source_keys should NOT include the pre-existing row's key + assert!(!result.dirty_source_keys.iter().any(|(st, sid)| st == "issue" && *sid == old_issue_id), + "Surgical dirty_source_keys must not include pre-existing dirty rows"); +} + +#[test] +fn surgical_docs_scope_ignores_preexisting_dirty_rows_for_same_entity() { + // Edge case: pre-existing dirty row for the SAME entity (iid=7) from a prior failed sync + // Surgical re-ingest of iid=7 should still collect the key (UPSERT updates queued_at) + // but scoped doc regen uses the collected keys, which correctly identifies this entity + let conn = setup_db(); + let project_id = 1i64; + let config = test_config(); + + // First ingest creates dirty row + let issue = make_test_issue(7); + let first_result = ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + + // Simulate: the dirty row from first ingest was never processed (orphaned) + // Under UPSERT semantics, the (source_type, source_id) key persists + + // Second surgical ingest of same entity — should still collect the key + let second_result = ingest_issue_by_iid_from_payload(&conn, &config, project_id, &issue, 0).unwrap(); + + // Result should contain the key for this entity (UPSERT touched the row) + assert!(!second_result.dirty_source_keys.is_empty(), + "Dirty source keys should be collected even for re-ingested entities"); +} + +#[tokio::test] +async fn preflight_aggregates_multiple_missing_iids() { + // Setup: mock server returns 404 for iids 888 and 999, 200 for iid 7 + // Call preflight_fetch with all three + // Assert: result contains 1 fetched issue AND 2 failures (not fail-fast) + use wiremock::{MockServer, Mock, ResponseTemplate}; + use wiremock::matchers::{method, path}; + use crate::ingestion::surgical::preflight_fetch; + + let mock_server = MockServer::start().await; + + // iid 7 succeeds + Mock::given(method("GET")) + .and(path("/api/v4/projects/1/issues/7")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "id": 42, "iid": 7, "project_id": 1, "title": "Good issue", + "description": "", "state": "opened", + "created_at": "2024-01-15T10:00:00.000Z", + "updated_at": "2024-01-16T10:00:00.000Z", + "web_url": "https://gitlab.example.com/issues/7", + "author": {"id": 1, "username": "u", "name": "U"}, + "assignees": [], "labels": [], "milestone": null, + "due_date": null, "references": {"short": "#7", "full": "g/p#7"} + }))) + .mount(&mock_server).await; + + // iid 888 and 999 return 404 + for iid in [888, 999] { + Mock::given(method("GET")) + .and(path(format!("/api/v4/projects/1/issues/{iid}"))) + .respond_with(ResponseTemplate::new(404).set_body_json( + serde_json::json!({"message": "404 Not found"}) + )) + .mount(&mock_server).await; + } + + let client = crate::gitlab::GitLabClient::new(&mock_server.uri(), "test-token", None); + let result = preflight_fetch( + &client, 1, &[7, 888, 999], &[], Duration::from_secs(30), + ).await.unwrap(); + + assert_eq!(result.issues.len(), 1, "Should have 1 successful fetch"); + assert_eq!(result.failures.len(), 2, "Should aggregate both 404 failures"); + assert!(result.failures.iter().all(|f| f.stage == "fetch")); +} +``` + +### 1e. Test: `sync_runs` table (`src/ingestion/surgical_tests.rs`) + +```rust +#[test] +fn sync_run_is_persisted_and_updated() { + let conn = setup_db(); + // Insert a sync_run row using existing table + new columns + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, phase, surgical_iids_json) + VALUES (strftime('%s','now') * 1000, strftime('%s','now') * 1000, 'running', 'sync', 'surgical', 'preflight', '[7]')", + [], + ).unwrap(); + + let run_id = conn.last_insert_rowid(); + + // Update phase + conn.execute( + "UPDATE sync_runs SET phase = 'ingest' WHERE id = ?1", + [run_id], + ).unwrap(); + + let phase: String = conn.query_row( + "SELECT phase FROM sync_runs WHERE id = ?1", + [run_id], |r| r.get(0), + ).unwrap(); + assert_eq!(phase, "ingest"); +} +``` + +### 1f. Transactional rollback, TOCTOU, and failure hygiene tests (`src/ingestion/surgical_tests.rs`) + +These tests cover race conditions, rollback guarantees, and failure cleanup — where regressions hit first in production: + +```rust +#[test] +fn stale_payload_is_skipped_when_local_updated_at_is_newer() { + // Setup: insert an issue with updated_at = 2000 + // Create a preflight payload with updated_at = 1000 (stale) + // Surgical ingest should skip the entity and record skipped_stale + let conn = setup_db(); + let project_id = 1i64; + let config = test_config(); + + // Insert issue with newer timestamp + conn.execute( + "INSERT INTO issues (project_id, gitlab_issue_id, iid, title, state, created_at, updated_at, web_url, last_seen_at) + VALUES (?1, 1007, 7, 'Fresh issue', 'opened', 1000, 2000, 'https://example.com/7', 2000)", + [project_id], + ).unwrap(); + + // Stale payload from preflight (updated_at older than local) + let stale_issue = make_test_issue_with_updated_at(7, 1000); + let result = ingest_issue_by_iid_from_payload(&conn, &config, project_id, &stale_issue, 0).unwrap(); + + assert_eq!(result.skipped_stale, 1, "Stale payload should be skipped"); + assert_eq!(result.upserted, 0, "No upsert for stale payload"); + + // Local row should be unchanged + let local_updated: i64 = conn.query_row( + "SELECT updated_at FROM issues WHERE project_id = ?1 AND iid = 7", + [project_id], |r| r.get(0), + ).unwrap(); + assert_eq!(local_updated, 2000, "Local fresher row must not be overwritten"); +} + +#[test] +fn equal_updated_at_but_newer_last_seen_is_skipped() { + // TOCTOU edge case: updated_at is equal but local last_seen_at is newer than preflight start + // This catches concurrent normal sync that fetched the same data after our preflight + let conn = setup_db(); + let project_id = 1i64; + let config = test_config(); + + // Insert issue where updated_at = 1500 and last_seen_at = 3000 (seen after preflight started) + conn.execute( + "INSERT INTO issues (project_id, gitlab_issue_id, iid, title, state, created_at, updated_at, web_url, last_seen_at) + VALUES (?1, 1007, 7, 'Concurrent issue', 'opened', 1000, 1500, 'https://example.com/7', 3000)", + [project_id], + ).unwrap(); + + // Payload has same updated_at=1500, but preflight started at time 2000 (before the concurrent sync at 3000) + let payload = make_test_issue_with_updated_at(7, 1500); + let preflight_started_at_ms = 2000i64; + let result = ingest_issue_by_iid_from_payload(&conn, &config, project_id, &payload, preflight_started_at_ms).unwrap(); + + assert_eq!(result.skipped_stale, 1, "Equal updated_at but newer last_seen_at should be skipped"); + assert_eq!(result.upserted, 0, "No upsert when local was refreshed after our preflight"); +} + +#[test] +fn preflight_success_then_ingest_failure_rolls_back_all_content_writes() { + // Setup: preflight succeeds for 2 issues + // First issue ingests fine, second causes an error (e.g., constraint violation) + // Assert: NEITHER issue is in the DB (transaction rolled back) + // Assert: sync_runs phase is 'failed' + let conn = setup_db(); + let project_id = 1i64; + let config = test_config(); + + let issue_ok = make_test_issue(7); + let issue_bad = make_test_issue_bad_data(8); // Missing required field, will fail ingest + + let preflight = PreflightResult { + issues: vec![issue_ok, issue_bad], + merge_requests: vec![], + failures: vec![], + }; + + let result = ingest_preflight_results(&conn, &config, project_id, &preflight, 0); + assert!(result.is_err(), "Ingest should fail on bad data"); + + // Verify rollback: no issues written + let count: i64 = conn.query_row( + "SELECT COUNT(*) FROM issues WHERE project_id = ?1", + [project_id], |r| r.get(0), + ).unwrap(); + assert_eq!(count, 0, "Transaction rollback should leave zero issues"); +} + +#[test] +fn surgical_no_docs_requires_no_embed_validation() { + // Verify that surgical mode with --no-docs but without --no-embed is rejected + let options = SyncOptions { + issue_iids: vec![7], + no_docs: true, + no_embed: false, + ..SyncOptions::default() + }; + assert!(options.is_surgical()); + // The validation logic in handle_sync_cmd should reject this combination + // (tested via integration or by calling the validation function directly) +} +``` + +### 1g. Cancellation, timeout, scoped embed isolation, and payload integrity tests (`src/ingestion/surgical_tests.rs`) + +These tests cover the new failure-prone runtime paths added by constraints 15-17: + +```rust +#[tokio::test] +async fn cancellation_marks_sync_run_cancelled() { + // Setup: start a surgical sync run, trigger cancellation signal after preflight + // Assert: sync_runs row has status='cancelled', phase='cancelled', finished_at populated + // Assert: no content writes occurred after cancellation + todo!("implement after SyncRunRecorder extensions in Step 8b") +} + +#[tokio::test] +async fn dependent_timeout_records_entity_failure_and_continues() { + // Setup: mock server with one fast-responding entity and one that hangs + // Set surgical_entity_timeout_seconds = 1 + // Assert: fast entity's dependents succeed + // Assert: slow entity has EntityFailure { code: "TIMEOUT", stage: "discussions" } + // Assert: run still completes (not aborted) + todo!("implement after inline dependent helpers in Step 9b") +} + +#[tokio::test] +async fn scoped_embed_does_not_embed_unrelated_docs_created_after_docs_stage() { + // Setup: surgical sync generates docs for entity A (collect document_ids) + // Between docs and embed stage, simulate another sync creating unembedded docs for entity B + // Call run_embed_for_document_ids with ONLY entity A's doc IDs + // Assert: entity B's docs remain unembedded + // Assert: entity A's docs are embedded + todo!("implement after run_embed_for_document_ids in Step 9a") +} + +#[test] +fn payload_project_id_mismatch_is_rejected_in_preflight() { + // Setup: create a payload where project_id != expected gitlab_project_id + // Assert: preflight records EntityFailure { code: "PROJECT_MISMATCH" } + // Assert: mismatched payload is NOT included in successful fetches + todo!("implement after preflight_fetch integrity check in Step 7") +} +``` + +--- + +## Step 2: Add `--issue`, `--mr`, `-p`, `--preflight-only` to `SyncArgs` + +**File: `src/cli/mod.rs`, struct `SyncArgs` (line 755)** + +Add after the existing `no_file_changes` field (around line 784): + +```rust + /// Surgically sync specific issues by IID (repeatable, must be positive) + #[arg(long, value_parser = clap::value_parser!(u64).range(1..), action = clap::ArgAction::Append)] + pub issue: Vec, + + /// Surgically sync specific merge requests by IID (repeatable, must be positive) + #[arg(long, value_parser = clap::value_parser!(u64).range(1..), action = clap::ArgAction::Append)] + pub mr: Vec, + + /// Scope to a single project (required when --issue or --mr is used, falls back to config.defaultProject) + #[arg(short = 'p', long)] + pub project: Option, + + /// Validate remote entities exist and permissions are valid, without any DB writes. + /// Runs the preflight network fetch phase only. Useful for agents to verify IIDs before committing to a full surgical sync. + #[arg(long, default_value_t = false)] + pub preflight_only: bool, +``` + +**Why `u64` with `range(1..)`**: IIDs are always positive integers. Parse-time validation via clap gives immediate, clear error messages (e.g., `error: 0 is not in 1..`) with zero runtime plumbing. The `u64` type makes invalid states unrepresentable. + +--- + +## Step 3: Extend `SyncOptions` + +**File: `src/cli/commands/sync.rs`, struct `SyncOptions` (line 20)** + +Add fields: + +```rust +#[derive(Debug, Default)] +pub struct SyncOptions { + pub full: bool, + pub force: bool, + pub no_embed: bool, + pub no_docs: bool, + pub no_events: bool, + pub robot_mode: bool, + pub dry_run: bool, + // NEW: + pub issue_iids: Vec, + pub mr_iids: Vec, + pub project: Option, + pub preflight_only: bool, +} +``` + +Add helper method: + +```rust +impl SyncOptions { + /// Maximum combined IIDs allowed in a single surgical sync. + const MAX_SURGICAL_TARGETS: usize = 100; + + pub fn is_surgical(&self) -> bool { + !self.issue_iids.is_empty() || !self.mr_iids.is_empty() + } +} +``` + +--- + +## Step 4: Wire new fields in `handle_sync_cmd` + +**File: `src/main.rs`, function `handle_sync_cmd` (line 2034)** + +After line 2058 (existing `dry_run` field), add: + +```rust + // Deduplicate IIDs before constructing SyncOptions + let mut issue_iids = args.issue; + let mut mr_iids = args.mr; + issue_iids.sort_unstable(); + issue_iids.dedup(); + mr_iids.sort_unstable(); + mr_iids.dedup(); + + let options = SyncOptions { + full: args.full && !args.no_full, + force: args.force && !args.no_force, + no_embed: args.no_embed, + no_docs: args.no_docs, + no_events: args.no_events, + robot_mode, + dry_run, + // NEW: + issue_iids, + mr_iids, + project: args.project, + preflight_only: args.preflight_only, + }; +``` + +Add validation before recording starts (after options creation): + +```rust + // Validate surgical mode constraints + if options.is_surgical() { + // Enforce hard cap on combined target count + let total_targets = options.issue_iids.len() + options.mr_iids.len(); + if total_targets > SyncOptions::MAX_SURGICAL_TARGETS { + return Err(Box::new(LoreError::Other( + format!( + "Too many surgical targets ({total_targets}). Maximum is {}.", + SyncOptions::MAX_SURGICAL_TARGETS + ) + ))); + } + + // Fall back to config.defaultProject when -p is omitted + let project = options.project.clone().or_else(|| config.default_project.clone()); + if project.is_none() { + return Err(Box::new(LoreError::Other( + "The --issue and --mr flags require --project (-p) or config.defaultProject".to_string() + ))); + } + // (reassign resolved project back into options) + + // Reject incompatible flags + if options.full { + return Err(Box::new(LoreError::Other( + "--full is incompatible with surgical sync (--issue/--mr)".to_string() + ))); + } + + // Reject --no-docs without --no-embed in surgical mode (embed leakage prevention) + if options.no_docs && !options.no_embed { + return Err(Box::new(LoreError::Other( + "In surgical mode, --no-docs requires --no-embed (to prevent embedding unrelated backlog docs)".to_string() + ))); + } + } + + // Validate preflight_only requires surgical mode + if options.preflight_only && !options.is_surgical() { + return Err(Box::new(LoreError::Other( + "--preflight-only requires --issue or --mr".to_string() + ))); + } +``` + +--- + +## Step 5: Add `get_issue_by_iid` and `get_mr_by_iid` to `GitLabClient` + +**File: `src/gitlab/client.rs`** + +Add after `paginate_issues` (around line 330), before `paginate_merge_requests`: + +```rust + /// Fetch a single issue by its project-scoped IID. + /// Uses: GET /api/v4/projects/:id/issues/:iid + pub async fn get_issue_by_iid( + &self, + gitlab_project_id: i64, + iid: u64, + ) -> Result { + let path = format!("/api/v4/projects/{gitlab_project_id}/issues/{iid}"); + self.request(&path).await + } + + /// Fetch a single merge request by its project-scoped IID. + /// Uses: GET /api/v4/projects/:id/merge_requests/:iid + pub async fn get_mr_by_iid( + &self, + gitlab_project_id: i64, + iid: u64, + ) -> Result { + let path = format!("/api/v4/projects/{gitlab_project_id}/merge_requests/{iid}"); + self.request(&path).await + } +``` + +These reuse the existing `request()` method (line 117) which handles: +- Auth via `PRIVATE-TOKEN` header +- Rate limiting via `RateLimiter` +- Retry on 429 with `retry_after` +- JSON deserialization via `handle_response` +- Error mapping (401 → `GitLabAuthFailed`, 404 → `GitLabNotFound`) + +--- + +## Step 6: Make `process_single_issue` and `process_single_mr` `pub(crate)` + +**File: `src/ingestion/issues.rs`, line 143** + +Change `fn process_single_issue` → `pub(crate) fn process_single_issue` + +**File: `src/ingestion/merge_requests.rs`, line 144** + +Change `fn process_single_mr` → `pub(crate) fn process_single_mr` + +Also add to `src/ingestion/mod.rs` exports (not pub, just ensure accessible via `crate::ingestion::issues::process_single_issue`). + +--- + +## Step 7: Create `src/ingestion/surgical.rs` + +This is the core new module. It provides three layers: +1. **Payload processing** (sync, no network — testable) — takes a `GitLabIssue`/`GitLabMergeRequest` already fetched +2. **Preflight fetch** (async, hits GitLab API) — fetches all entities BEFORE any DB writes, **aggregating all failures** instead of failing fast +3. **Transactional ingest** — applies all DB mutations inside a transaction after successful preflight + +```rust +use rusqlite::Connection; +use tracing::debug; + +use crate::Config; +use crate::core::error::Result; +use crate::gitlab::GitLabClient; +use crate::gitlab::types::{GitLabIssue, GitLabMergeRequest}; +use crate::ingestion::issues::IssueForDiscussionSync; +use crate::ingestion::merge_requests::MrForDiscussionSync; + +use super::issues::process_single_issue; +use super::merge_requests::process_single_mr; + +/// Per-entity failure info for robot mode structured error reporting. +#[derive(Debug, Clone)] +pub struct EntityFailure { + pub entity_type: &'static str, // "issue" or "merge_request" + pub iid: u64, + pub stage: &'static str, // "fetch", "ingest", "discussions", etc. + pub code: String, // e.g. "NOT_FOUND", "NETWORK_ERROR" + pub message: String, +} + +/// A dirty source key: (source_type, source_id) matching dirty_sources PK. +pub type DirtySourceKey = (String, i64); + +#[derive(Debug, Default)] +pub struct SurgicalIngestResult { + pub upserted: usize, + pub labels_created: usize, + pub issue_disc_sync: Vec, + pub mr_disc_sync: Vec, + /// Dirty source keys touched during this surgical ingest (for scoped doc regeneration). + /// Each key is `(source_type, source_id)` matching `dirty_sources` PK. + pub dirty_source_keys: Vec, + /// Per-entity failures for structured error reporting in robot mode + pub entity_failures: Vec, + /// Entities skipped because local row was newer than preflight payload (TOCTOU protection) + pub skipped_stale: usize, +} + +/// Check whether the local issue row has a fresher state than the payload. +/// Returns true if local is newer (payload is stale and should be skipped). +/// Uses two-tier check: (a) updated_at strictly newer, or (b) equal updated_at +/// but last_seen_at is newer than preflight start (concurrent sync fetched same data). +fn is_local_newer_issue(conn: &Connection, project_id: i64, iid: i64, payload_updated_at: i64, preflight_started_at_ms: i64) -> Result { + let row: Option<(i64, i64)> = conn.query_row( + "SELECT updated_at, last_seen_at FROM issues WHERE project_id = ?1 AND iid = ?2", + (project_id, iid), + |row| Ok((row.get(0)?, row.get(1)?)), + ).optional()?; + Ok(row.map_or(false, |(local_ts, local_seen)| { + local_ts > payload_updated_at + || (local_ts == payload_updated_at && local_seen > preflight_started_at_ms) + })) +} + +/// Check whether the local MR row has a fresher state than the payload. +fn is_local_newer_mr(conn: &Connection, project_id: i64, iid: i64, payload_updated_at: i64, preflight_started_at_ms: i64) -> Result { + let row: Option<(i64, i64)> = conn.query_row( + "SELECT updated_at, last_seen_at FROM merge_requests WHERE project_id = ?1 AND iid = ?2", + (project_id, iid), + |row| Ok((row.get(0)?, row.get(1)?)), + ).optional()?; + Ok(row.map_or(false, |(local_ts, local_seen)| { + local_ts > payload_updated_at + || (local_ts == payload_updated_at && local_seen > preflight_started_at_ms) + })) +} + +/// Process a single issue that has already been fetched from GitLab. +/// Upserts into DB, resets discussion + event watermarks so dependents re-sync. +/// Tracks dirty source keys for scoped doc regeneration. +/// Skips stale payloads to avoid TOCTOU overwrite after unlocked preflight. +/// +/// `preflight_started_at_ms`: ms-epoch timestamp captured before preflight fetch began. +/// Used for equal-timestamp TOCTOU tie-breaking via `last_seen_at`. +pub fn ingest_issue_by_iid_from_payload( + conn: &Connection, + config: &Config, + project_id: i64, + issue: &GitLabIssue, + preflight_started_at_ms: i64, +) -> Result { + let mut result = SurgicalIngestResult::default(); + + // TOCTOU guard: skip if local row is fresher than preflight payload + if is_local_newer_issue(conn, project_id, issue.iid, issue.updated_at, preflight_started_at_ms)? { + result.skipped_stale = 1; + debug!(iid = issue.iid, "Skipping stale payload (local is newer)"); + return Ok(result); + } + + let labels_created = process_single_issue(conn, config, project_id, issue)?; + result.upserted = 1; + result.labels_created = labels_created; + + // Reset watermarks so discussions + events re-sync for this issue + conn.execute( + "UPDATE issues SET + discussions_synced_for_updated_at = NULL, + resource_events_synced_for_updated_at = NULL + WHERE project_id = ?1 AND iid = ?2", + (project_id, issue.iid), + )?; + + // Collect dirty source key for scoped doc regeneration + let local_issue_id: i64 = conn.query_row( + "SELECT id FROM issues WHERE project_id = ?1 AND iid = ?2", + (project_id, issue.iid), + |row| row.get(0), + )?; + result.dirty_source_keys.push(("issue".to_string(), local_issue_id)); + + // Build the discussion sync descriptor + let row = conn.query_row( + "SELECT id, iid, updated_at FROM issues WHERE project_id = ?1 AND iid = ?2", + (project_id, issue.iid), + |row| { + Ok(IssueForDiscussionSync { + local_issue_id: row.get(0)?, + iid: row.get(1)?, + updated_at: row.get(2)?, + }) + }, + )?; + result.issue_disc_sync.push(row); + + debug!(iid = issue.iid, "Surgical issue ingest complete"); + Ok(result) +} + +/// Process a single MR that has already been fetched from GitLab. +/// Skips stale payloads to avoid TOCTOU overwrite after unlocked preflight. +pub fn ingest_mr_by_iid_from_payload( + conn: &Connection, + config: &Config, + project_id: i64, + mr: &GitLabMergeRequest, + preflight_started_at_ms: i64, +) -> Result { + let mut result = SurgicalIngestResult::default(); + + // TOCTOU guard: skip if local row is fresher than preflight payload + if is_local_newer_mr(conn, project_id, mr.iid, mr.updated_at, preflight_started_at_ms)? { + result.skipped_stale = 1; + debug!(iid = mr.iid, "Skipping stale MR payload (local is newer)"); + return Ok(result); + } + + let mr_result = process_single_mr(conn, config, project_id, mr)?; + result.upserted = 1; + result.labels_created = mr_result.labels_created; + + // Reset watermarks + conn.execute( + "UPDATE merge_requests SET + discussions_synced_for_updated_at = NULL, + resource_events_synced_for_updated_at = NULL + WHERE project_id = ?1 AND iid = ?2", + (project_id, mr.iid), + )?; + + // Collect dirty source key + let local_mr_id: i64 = conn.query_row( + "SELECT id FROM merge_requests WHERE project_id = ?1 AND iid = ?2", + (project_id, mr.iid), + |row| row.get(0), + )?; + result.dirty_source_keys.push(("merge_request".to_string(), local_mr_id)); + + let row = conn.query_row( + "SELECT id, iid, updated_at FROM merge_requests WHERE project_id = ?1 AND iid = ?2", + (project_id, mr.iid), + |row| { + Ok(MrForDiscussionSync { + local_mr_id: row.get(0)?, + iid: row.get(1)?, + updated_at: row.get(2)?, + }) + }, + )?; + result.mr_disc_sync.push(row); + + debug!(iid = mr.iid, "Surgical MR ingest complete"); + Ok(result) +} + +/// Preflight: fetch all requested entities from GitLab without any DB writes. +/// Aggregates ALL failures instead of failing fast — agents get a complete error report in one pass. +/// Returns the fetched payloads plus any per-IID failures. +/// Caller MUST check `result.failures` and abort writes if non-empty. +pub async fn preflight_fetch( + client: &GitLabClient, + gitlab_project_id: i64, + issue_iids: &[u64], + mr_iids: &[u64], + entity_timeout: Duration, +) -> Result { + let mut result = PreflightResult::default(); + + for &iid in issue_iids { + debug!(iid, "Preflight: fetching issue"); + match tokio::time::timeout(entity_timeout, client.get_issue_by_iid(gitlab_project_id, iid)).await { + Ok(Ok(issue)) => { + // Payload integrity check: project_id must match requested gitlab_project_id + if issue.project_id != gitlab_project_id { + result.failures.push(EntityFailure { + entity_type: "issue", + iid, + stage: "fetch", + code: "PROJECT_MISMATCH".to_string(), + message: format!( + "Payload project_id {} does not match requested {}", + issue.project_id, gitlab_project_id + ), + }); + } else { + result.issues.push(issue); + } + } + Ok(Err(e)) => { + let code = classify_error_code(&e); + result.failures.push(EntityFailure { + entity_type: "issue", + iid, + stage: "fetch", + code, + message: e.to_string(), + }); + } + Err(_elapsed) => { + result.failures.push(EntityFailure { + entity_type: "issue", + iid, + stage: "fetch", + code: "TIMEOUT".to_string(), + message: format!("Timed out after {}s", entity_timeout.as_secs()), + }); + } + } + } + + for &iid in mr_iids { + debug!(iid, "Preflight: fetching MR"); + match tokio::time::timeout(entity_timeout, client.get_mr_by_iid(gitlab_project_id, iid)).await { + Ok(Ok(mr)) => { + // Payload integrity check: project_id must match + if mr.target_project_id.unwrap_or(mr.project_id) != gitlab_project_id { + result.failures.push(EntityFailure { + entity_type: "merge_request", + iid, + stage: "fetch", + code: "PROJECT_MISMATCH".to_string(), + message: format!( + "Payload project_id does not match requested {}", + gitlab_project_id + ), + }); + } else { + result.merge_requests.push(mr); + } + } + Ok(Err(e)) => { + let code = classify_error_code(&e); + result.failures.push(EntityFailure { + entity_type: "merge_request", + iid, + stage: "fetch", + code, + message: e.to_string(), + }); + } + Err(_elapsed) => { + result.failures.push(EntityFailure { + entity_type: "merge_request", + iid, + stage: "fetch", + code: "TIMEOUT".to_string(), + message: format!("Timed out after {}s", entity_timeout.as_secs()), + }); + } + } + } + + Ok(result) +} + +/// Map a LoreError to a machine-readable error code string. +fn classify_error_code(e: &crate::core::error::LoreError) -> String { + match e { + crate::core::error::LoreError::GitLabNotFound { .. } => "NOT_FOUND".to_string(), + crate::core::error::LoreError::GitLabAuthFailed { .. } => "AUTH_FAILED".to_string(), + crate::core::error::LoreError::RateLimited { .. } => "RATE_LIMITED".to_string(), + _ => "FETCH_ERROR".to_string(), + } +} + +#[derive(Debug, Default)] +pub struct PreflightResult { + pub issues: Vec, + pub merge_requests: Vec, + /// Per-IID failures collected during preflight (empty = all succeeded) + pub failures: Vec, +} + +impl PreflightResult { + pub fn has_failures(&self) -> bool { + !self.failures.is_empty() + } +} + +/// Ingest all preflight-fetched entities into the DB inside a transaction. +/// Returns combined result with all dirty source keys and discussion sync descriptors. +/// +/// `preflight_started_at_ms`: ms-epoch timestamp captured before preflight began, +/// used for TOCTOU tie-breaking on equal `updated_at` timestamps. +pub fn ingest_preflight_results( + conn: &Connection, + config: &Config, + project_id: i64, + preflight: &PreflightResult, + preflight_started_at_ms: i64, +) -> Result { + let mut combined = SurgicalIngestResult::default(); + + // All writes happen inside a transaction — if any fails, all roll back + let tx = conn.unchecked_transaction()?; + + for issue in &preflight.issues { + let single = ingest_issue_by_iid_from_payload(&tx, config, project_id, issue, preflight_started_at_ms)?; + combined.upserted += single.upserted; + combined.skipped_stale += single.skipped_stale; + combined.labels_created += single.labels_created; + combined.issue_disc_sync.extend(single.issue_disc_sync); + combined.dirty_source_keys.extend(single.dirty_source_keys); + } + + for mr in &preflight.merge_requests { + let single = ingest_mr_by_iid_from_payload(&tx, config, project_id, mr, preflight_started_at_ms)?; + combined.upserted += single.upserted; + combined.skipped_stale += single.skipped_stale; + combined.labels_created += single.labels_created; + combined.mr_disc_sync.extend(single.mr_disc_sync); + combined.dirty_source_keys.extend(single.dirty_source_keys); + } + + tx.commit()?; + Ok(combined) +} + +#[cfg(test)] +#[path = "surgical_tests.rs"] +mod tests; +``` + +--- + +## Step 8: Register `surgical` module + +**File: `src/ingestion/mod.rs`** + +Add after line 8 (`pub mod orchestrator;`): + +```rust +pub mod surgical; +``` + +--- + +## Step 8a: Extend existing `sync_runs` table via migration + +**File: `src/core/db.rs`** + +The `sync_runs` table already exists (migration 001, enriched in migration 014). Add a new migration to extend it with surgical sync columns: + +```sql +-- Migration 027: Extend sync_runs for surgical sync observability +-- Adds mode/phase tracking and surgical-specific counters. +-- Reuses existing sync_runs row lifecycle (SyncRunRecorder). +ALTER TABLE sync_runs ADD COLUMN mode TEXT; -- 'standard' | 'surgical' (NULL for pre-existing rows) +ALTER TABLE sync_runs ADD COLUMN phase TEXT; -- preflight|ingest|dependents|docs|embed|done|failed +ALTER TABLE sync_runs ADD COLUMN surgical_iids_json TEXT; -- JSON: {"issues":[7,8],"mrs":[101]} +ALTER TABLE sync_runs ADD COLUMN issues_fetched INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN mrs_fetched INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN issues_ingested INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN mrs_ingested INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN skipped_stale INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN docs_regenerated INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN docs_embedded INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN warnings_count INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN cancelled_at INTEGER; + +-- Indexes for observability queries on surgical runs +CREATE INDEX IF NOT EXISTS idx_sync_runs_mode_started + ON sync_runs(mode, started_at DESC); +CREATE INDEX IF NOT EXISTS idx_sync_runs_status_phase_started + ON sync_runs(status, phase, started_at DESC); +``` + +**Note:** No changes to `pending_dependent_fetches` — surgical mode bypasses the queue entirely (see constraint 4). + +--- + +## Step 8b: Extend `SyncRunRecorder` for surgical mode + +**File: `src/core/sync.rs`** (or wherever `SyncRunRecorder` lives) + +Add methods for surgical mode lifecycle management. This replaces all ad-hoc SQL that Step 9 would otherwise need. The recorder owns the row ID and handles all `sync_runs` mutations: + +```rust +impl SyncRunRecorder { + /// Start a new surgical sync run. Records mode='surgical' and phase='preflight'. + pub fn start_surgical( + conn: &Connection, + iids_json: &str, + ) -> Result { + // INSERT INTO sync_runs with mode='surgical', phase='preflight', surgical_iids_json + // Returns Self with the row id for subsequent updates + todo!() + } + + /// Update the current phase (preflight -> ingest -> dependents -> docs -> embed -> done) + pub fn set_phase(&self, conn: &Connection, phase: &str) -> Result<()> { + conn.execute( + "UPDATE sync_runs SET phase = ?1, heartbeat_at = strftime('%s','now') * 1000 WHERE id = ?2", + rusqlite::params![phase, self.row_id], + )?; + Ok(()) + } + + /// Update surgical-specific counters (issues_fetched, mrs_fetched, etc.) + pub fn set_counters(&self, conn: &Connection, counters: &SurgicalCounters) -> Result<()> { + // Single UPDATE with all counter fields + todo!() + } + + /// Finalize: succeeded + pub fn finish_succeeded(&self, conn: &Connection) -> Result<()> { + // SET phase='done', status='succeeded', finished_at=now + todo!() + } + + /// Finalize: succeeded with warnings (partial dependent failures) + pub fn finish_succeeded_with_warnings(&self, conn: &Connection, warnings_count: usize) -> Result<()> { + // SET phase='done', status='succeeded', warnings_count=N, finished_at=now + todo!() + } + + /// Finalize: failed + pub fn finish_failed(&self, conn: &Connection, error: &str) -> Result<()> { + // SET phase='failed', status='failed', error=msg, finished_at=now + todo!() + } + + /// Finalize: cancelled (shutdown signal received) + pub fn finish_cancelled(&self, conn: &Connection) -> Result<()> { + // SET phase='cancelled', status='cancelled', cancelled_at=now, finished_at=now + todo!() + } + + /// Heartbeat if enough time has elapsed since last heartbeat + pub fn heartbeat_if_due(&self, conn: &Connection) -> Result<()> { + // UPDATE heartbeat_at if interval exceeded + todo!() + } +} + +/// Counter snapshot for surgical sync run updates +#[derive(Debug, Default)] +pub struct SurgicalCounters { + pub issues_fetched: usize, + pub mrs_fetched: usize, + pub issues_ingested: usize, + pub mrs_ingested: usize, + pub skipped_stale: usize, + pub docs_regenerated: usize, + pub docs_embedded: usize, +} +``` + +--- + +## Step 9: Create `run_sync_surgical` in `src/cli/commands/sync.rs` + +This is the surgical variant of `run_sync`. It uses a preflight-then-commit pattern: all primary entity fetches happen first (WITHOUT the sync lock), then DB writes happen transactionally (WITH the sync lock). Dependent stages (discussions, resource events, MR closes_issues, MR diffs) run inline per-entity — they do NOT use `pending_dependent_fetches`. + +**IMPORTANT implementation notes:** +- **Dry-run is zero-write**: Check `options.dry_run` BEFORE lock acquisition and DB connection. No side effects at all. +- **Preflight-only**: Check `options.preflight_only` after preflight fetch completes. Return results (including any failures) with zero content DB writes. Control-plane run-ledger writes are allowed. +- **Lock window minimization**: Lock is NOT held during preflight network I/O. Acquired immediately before Stage 1 (first content DB mutation). Released before embed stage. +- **Aggregate preflight failures**: Preflight collects ALL per-IID failures (including payload integrity mismatches). If any failures exist, abort with zero content DB writes and return structured error report. +- **Durable run state via SyncRunRecorder**: Use `SyncRunRecorder::start_surgical(...)` at run start. Call `recorder.set_phase()` after each stage transition. On success call `recorder.finish_succeeded()` (or `finish_succeeded_with_warnings`). On failure call `recorder.finish_failed()`. On cancellation call `recorder.finish_cancelled()`. No raw SQL updates to `sync_runs`. +- **Sync lock**: Acquire `AppLock("sync")` using the same stale lock and heartbeat settings as `run_ingest`. Hold lock for ingest + dependents + docs only. Release before embed. +- **Transactional ingest**: After successful preflight, apply all DB mutations inside `unchecked_transaction()`. +- **Stale-write protection**: Each entity's ingest checks local `updated_at` (and `last_seen_at` for equal-timestamp tie-breaking) vs preflight payload. Stale payloads are skipped (not overwritten). +- **Inline dependents**: Discussions, resource events, MR closes_issues, and MR diffs are fetched and written inline per-entity. No jobs are enqueued to `pending_dependent_fetches`. Individual dependent stage failures are recorded per-entity in `entity_failures` but do not abort the run. +- **Per-entity timeout**: Each dependent network call is wrapped in `tokio::time::timeout(Duration::from_secs(config.sync.surgical_entity_timeout_seconds.unwrap_or(30)))`. Timeouts produce `EntityFailure { code: "TIMEOUT" }`. +- **Scoped doc regeneration**: Call `run_generate_docs_for_sources(config, &dirty_source_keys)` which returns `GenerateDocsResult` including `regenerated_document_ids`. +- **Scoped embed**: Call `run_embed_for_document_ids(config, ®enerated_document_ids, signal)` — NOT global `run_embed`. This ensures isolation even after lock release and under concurrent normal sync. +- **Embed guard**: Embed stage only runs when surgical docs actually regenerated documents in this run (`!options.no_docs && !regenerated_document_ids.is_empty()`). +- **prefetch_mr_discussions signature**: Takes `(client, gitlab_project_id, local_project_id, MrForDiscussionSync)` — NOT a slice. +- **Preflight timestamp**: Capture `preflight_started_at_ms` before the first network call and pass it through to ingest functions for TOCTOU tie-breaking. +- **Cancellation handling**: Every `signal.is_cancelled()` check MUST call `recorder.finish_cancelled()` before returning. No early returns without finalizing the run row. +- **Payload integrity**: Preflight validates `payload.project_id == gitlab_project_id` for every fetched entity. Mismatches become `EntityFailure { code: "PROJECT_MISMATCH" }`. + +Add this function after `run_sync` (around line 360): + +```rust +pub async fn run_sync_surgical( + config: &Config, + options: &SyncOptions, + run_id: &str, + signal: &ShutdownSignal, +) -> Result { + use crate::core::db::create_connection; + use crate::core::lock::{AppLock, LockOptions}; + use crate::core::paths::get_db_path; + use crate::core::project::resolve_project; + use crate::core::sync::{SyncRunRecorder, SurgicalCounters}; + use crate::gitlab::GitLabClient; + use crate::ingestion::discussions::ingest_issue_discussions; + use crate::ingestion::mr_discussions::prefetch_mr_discussions; + use crate::ingestion::mr_discussions::write_prefetched_mr_discussions; + use crate::ingestion::surgical::{preflight_fetch, ingest_preflight_results}; + + let span = tracing::info_span!("sync_surgical", %run_id); + + async move { + let mut result = SyncResult { + run_id: run_id.to_string(), + ..SyncResult::default() + }; + + // ── Dry-run: zero side effects (no lock, no DB, no network) ── + if options.dry_run { + let project_str = options.project.as_deref().expect("validated in handle_sync_cmd"); + info!( + issues = ?options.issue_iids, + mrs = ?options.mr_iids, + project = project_str, + "Surgical sync dry-run: would fetch these IIDs" + ); + return Ok(result); + } + + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + + let project_str = options.project.as_deref().expect("validated in handle_sync_cmd"); + let project_id = resolve_project(&conn, project_str)?; + let gitlab_project_id: i64 = conn.query_row( + "SELECT gitlab_project_id FROM projects WHERE id = ?1", + [project_id], + |r| r.get(0), + )?; + + let token = config.resolve_token()?; + let client = GitLabClient::new( + &config.gitlab.base_url, &token, config.sync.rate_limit_rps, + ); + + // ── Record sync run via SyncRunRecorder (not ad-hoc SQL) ── + let iids_json = serde_json::to_string(&serde_json::json!({ + "issues": options.issue_iids, + "mrs": options.mr_iids, + })).unwrap_or_default(); + let recorder = SyncRunRecorder::start_surgical(&conn, &iids_json)?; + + // ── Capture preflight start timestamp for TOCTOU tie-breaking ── + let preflight_started_at_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() as i64; + + // ── Stage 0: Preflight fetch (NO lock, NO content DB writes — network only) ── + // Lock is NOT held during preflight to minimize contention with normal syncs. + let stage_start = Instant::now(); + let total_items = options.issue_iids.len() + options.mr_iids.len(); + let entity_timeout = Duration::from_secs( + config.sync.surgical_entity_timeout_seconds.unwrap_or(30) as u64 + ); + let spinner = stage_spinner_v2( + Icons::sync(), + "Preflight", + &format!("fetching {} items...", total_items), + options.robot_mode, + ); + + let preflight = preflight_fetch( + &client, gitlab_project_id, &options.issue_iids, &options.mr_iids, entity_timeout, + ).await?; + + // Update run ledger via recorder + recorder.set_phase(&conn, "preflight")?; + recorder.set_counters(&conn, &SurgicalCounters { + issues_fetched: preflight.issues.len(), + mrs_fetched: preflight.merge_requests.len(), + ..Default::default() + })?; + + let preflight_elapsed = stage_start.elapsed(); + result.stage_timings_ms.insert("preflight".to_string(), preflight_elapsed.as_millis() as u64); + + // Check for preflight failures — abort with structured error report if any + if preflight.has_failures() { + let failure_summary = preflight.failures.iter() + .map(|f| format!("{} #{}: {} ({})", f.entity_type, f.iid, f.code, f.message)) + .collect::>() + .join("; "); + + let fail_icon = color_icon(Icons::error(), true); + emit_stage_line(&spinner, &fail_icon, "Preflight", + &format!("{} of {} failed: {}", preflight.failures.len(), total_items, failure_summary), + preflight_elapsed); + + // Record failure via recorder + recorder.finish_failed(&conn, &failure_summary)?; + + result.entity_failures = preflight.failures.clone(); + return Err(LoreError::SurgicalPreflightFailed { + run_id: run_id.to_string(), + total: total_items, + failures: preflight.failures, + }.into()); + } + + let preflight_summary = format!( + "{} issues, {} MRs fetched", + preflight.issues.len(), preflight.merge_requests.len() + ); + let preflight_icon = color_icon(Icons::success(), false); + emit_stage_line(&spinner, &preflight_icon, "Preflight", &preflight_summary, preflight_elapsed); + + // ── Preflight-only mode: return after successful preflight, zero content DB writes ── + if options.preflight_only { + result.requested_count = total_items; + result.fetched_count = preflight.issues.len() + preflight.merge_requests.len(); + recorder.finish_succeeded(&conn)?; + return Ok(result); + } + + if signal.is_cancelled() { + recorder.finish_cancelled(&conn)?; + return Ok(result); + } + + // ── Acquire sync lock ONLY for mutation phases ── + // Lock is acquired here, AFTER preflight completes, to minimize contention. + let lock_conn = create_connection(&db_path)?; + let mut lock = AppLock::new( + lock_conn, + LockOptions { + name: "sync".to_string(), + stale_lock_minutes: config.sync.stale_lock_minutes, + heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds, + }, + ); + lock.acquire(false)?; // not force — respect existing locks + + // ── Stage 1: Transactional ingest (all-or-nothing content DB writes) ── + let stage_start = Instant::now(); + let spinner = stage_spinner_v2( + Icons::sync(), + "Ingest", + "writing to database...", + options.robot_mode, + ); + + recorder.set_phase(&conn, "ingest")?; + + let ingest_result = match ingest_preflight_results( + &conn, config, project_id, &preflight, preflight_started_at_ms, + ) { + Ok(r) => r, + Err(e) => { + recorder.finish_failed(&conn, &e.to_string())?; + return Err(e); + } + }; + + result.issues_updated = preflight.issues.len(); + result.mrs_updated = preflight.merge_requests.len(); + result.skipped_stale = ingest_result.skipped_stale; + let all_dirty_source_keys = ingest_result.dirty_source_keys.clone(); + + // Update run ledger counters + recorder.set_counters(&conn, &SurgicalCounters { + issues_fetched: preflight.issues.len(), + mrs_fetched: preflight.merge_requests.len(), + issues_ingested: result.issues_updated, + mrs_ingested: result.mrs_updated, + skipped_stale: result.skipped_stale, + ..Default::default() + })?; + + let ingest_elapsed = stage_start.elapsed(); + result.stage_timings_ms.insert("ingest".to_string(), ingest_elapsed.as_millis() as u64); + + let ingest_summary = format!( + "{} issues, {} MRs ingested{}", + result.issues_updated, result.mrs_updated, + if result.skipped_stale > 0 { format!(", {} skipped (stale)", result.skipped_stale) } else { String::new() } + ); + let ingest_icon = color_icon(Icons::success(), false); + emit_stage_line(&spinner, &ingest_icon, "Ingest", &ingest_summary, ingest_elapsed); + + if signal.is_cancelled() { + recorder.finish_cancelled(&conn)?; + return Ok(result); + } + + // ── Stage 2: Dependent stages (discussions, events, MR dependents) ── + // All dependents run INLINE per-entity — no jobs are enqueued to pending_dependent_fetches. + // Individual dependent failures are recorded per-entity but do not abort the run. + // Each network call is wrapped in tokio::time::timeout for bounded execution. + recorder.set_phase(&conn, "dependents")?; + + let stage_start = Instant::now(); + + // Stage 2a: Discussions for issues + if !ingest_result.issue_disc_sync.is_empty() { + for issue_info in &ingest_result.issue_disc_sync { + match tokio::time::timeout(entity_timeout, ingest_issue_discussions( + &conn, &client, config, gitlab_project_id, project_id, + std::slice::from_ref(issue_info), + )).await { + Ok(Ok(_)) => result.discussions_fetched += 1, + Ok(Err(e)) => { + result.entity_failures.push(EntityFailure { + entity_type: "issue", + iid: issue_info.iid as u64, + stage: "discussions", + code: "FETCH_ERROR".to_string(), + message: e.to_string(), + }); + } + Err(_elapsed) => { + result.entity_failures.push(EntityFailure { + entity_type: "issue", + iid: issue_info.iid as u64, + stage: "discussions", + code: "TIMEOUT".to_string(), + message: format!("Timed out after {}s", entity_timeout.as_secs()), + }); + } + } + } + } + + // Stage 2b: Resource events for issues (inline, no queue) + if !options.no_events && config.sync.fetch_resource_events { + for issue_info in &ingest_result.issue_disc_sync { + match tokio::time::timeout(entity_timeout, sync_issue_resource_events_direct( + &conn, &client, config, gitlab_project_id, project_id, issue_info, + )).await { + Ok(Ok(count)) => result.resource_events_fetched += count, + Ok(Err(e)) => { + result.entity_failures.push(EntityFailure { + entity_type: "issue", + iid: issue_info.iid as u64, + stage: "resource_events", + code: "FETCH_ERROR".to_string(), + message: e.to_string(), + }); + } + Err(_elapsed) => { + result.entity_failures.push(EntityFailure { + entity_type: "issue", + iid: issue_info.iid as u64, + stage: "resource_events", + code: "TIMEOUT".to_string(), + message: format!("Timed out after {}s", entity_timeout.as_secs()), + }); + } + } + } + } + + // Stage 2c: Discussions for MRs + if !ingest_result.mr_disc_sync.is_empty() { + for mr_info in &ingest_result.mr_disc_sync { + let prefetched = match tokio::time::timeout(entity_timeout, prefetch_mr_discussions( + &client, gitlab_project_id, project_id, mr_info.clone(), + )).await { + Ok(p) => p, + Err(_elapsed) => { + result.entity_failures.push(EntityFailure { + entity_type: "merge_request", + iid: mr_info.iid as u64, + stage: "discussions", + code: "TIMEOUT".to_string(), + message: format!("Timed out after {}s", entity_timeout.as_secs()), + }); + continue; + } + }; + match write_prefetched_mr_discussions( + &conn, config, project_id, &[prefetched], + ) { + Ok(disc_result) => result.discussions_fetched += disc_result.len(), + Err(e) => { + result.entity_failures.push(EntityFailure { + entity_type: "merge_request", + iid: mr_info.iid as u64, + stage: "discussions", + code: "WRITE_ERROR".to_string(), + message: e.to_string(), + }); + } + } + } + } + + // Stage 2d: Resource events for MRs (inline, no queue) + if !options.no_events && config.sync.fetch_resource_events && !ingest_result.mr_disc_sync.is_empty() { + for mr_info in &ingest_result.mr_disc_sync { + match tokio::time::timeout(entity_timeout, sync_mr_resource_events_direct( + &conn, &client, config, gitlab_project_id, project_id, mr_info, + )).await { + Ok(Ok(count)) => result.resource_events_fetched += count, + Ok(Err(e)) => { + result.entity_failures.push(EntityFailure { + entity_type: "merge_request", + iid: mr_info.iid as u64, + stage: "resource_events", + code: "FETCH_ERROR".to_string(), + message: e.to_string(), + }); + } + Err(_elapsed) => { + result.entity_failures.push(EntityFailure { + entity_type: "merge_request", + iid: mr_info.iid as u64, + stage: "resource_events", + code: "TIMEOUT".to_string(), + message: format!("Timed out after {}s", entity_timeout.as_secs()), + }); + } + } + } + } + + // Stage 2e: MR closes_issues (inline, no queue) + if config.sync.fetch_mr_closes_issues.unwrap_or(true) && !ingest_result.mr_disc_sync.is_empty() { + for mr_info in &ingest_result.mr_disc_sync { + match tokio::time::timeout(entity_timeout, sync_mr_closes_issues_direct( + &conn, &client, config, gitlab_project_id, project_id, mr_info, + )).await { + Ok(Ok(_)) => {}, + Ok(Err(e)) => { + result.entity_failures.push(EntityFailure { + entity_type: "merge_request", + iid: mr_info.iid as u64, + stage: "mr_closes_issues", + code: "FETCH_ERROR".to_string(), + message: e.to_string(), + }); + } + Err(_elapsed) => { + result.entity_failures.push(EntityFailure { + entity_type: "merge_request", + iid: mr_info.iid as u64, + stage: "mr_closes_issues", + code: "TIMEOUT".to_string(), + message: format!("Timed out after {}s", entity_timeout.as_secs()), + }); + } + } + } + } + + // Stage 2f: MR diffs (inline, no queue) + if config.sync.fetch_mr_diffs.unwrap_or(true) && !ingest_result.mr_disc_sync.is_empty() { + for mr_info in &ingest_result.mr_disc_sync { + match tokio::time::timeout(entity_timeout, sync_mr_diffs_direct( + &conn, &client, config, gitlab_project_id, project_id, mr_info, + )).await { + Ok(Ok(_)) => {}, + Ok(Err(e)) => { + result.entity_failures.push(EntityFailure { + entity_type: "merge_request", + iid: mr_info.iid as u64, + stage: "mr_diffs", + code: "FETCH_ERROR".to_string(), + message: e.to_string(), + }); + } + Err(_elapsed) => { + result.entity_failures.push(EntityFailure { + entity_type: "merge_request", + iid: mr_info.iid as u64, + stage: "mr_diffs", + code: "TIMEOUT".to_string(), + message: format!("Timed out after {}s", entity_timeout.as_secs()), + }); + } + } + } + } + + let dependents_elapsed = stage_start.elapsed(); + result.stage_timings_ms.insert("dependents".to_string(), dependents_elapsed.as_millis() as u64); + + if signal.is_cancelled() { + recorder.finish_cancelled(&conn)?; + return Ok(result); + } + + // ── Stage 3: Docs ── (scoped to dirty sources from THIS surgical run only) + let mut regenerated_document_ids: Vec = Vec::new(); + if !options.no_docs && !all_dirty_source_keys.is_empty() { + recorder.set_phase(&conn, "docs")?; + + let stage_start = Instant::now(); + let spinner = stage_spinner_v2(Icons::sync(), "Docs", "generating...", options.robot_mode); + // Process ONLY the dirty sources touched by this surgical run. + // Uses run_generate_docs_for_sources which filters by (source_type, source_id) PK + // and deletes only those specific dirty_sources rows after processing. + // Returns regenerated_document_ids for scoped embedding. + let docs_result = run_generate_docs_for_sources(config, &all_dirty_source_keys)?; + result.documents_regenerated = docs_result.regenerated; + result.documents_errored = docs_result.errored; + regenerated_document_ids = docs_result.regenerated_document_ids; + + // Update run ledger + recorder.set_counters(&conn, &SurgicalCounters { + issues_fetched: preflight.issues.len(), + mrs_fetched: preflight.merge_requests.len(), + issues_ingested: result.issues_updated, + mrs_ingested: result.mrs_updated, + skipped_stale: result.skipped_stale, + docs_regenerated: result.documents_regenerated, + ..Default::default() + })?; + + let docs_elapsed = stage_start.elapsed(); + result.stage_timings_ms.insert("docs".to_string(), docs_elapsed.as_millis() as u64); + + let docs_summary = format!("{} documents generated", result.documents_regenerated); + let docs_icon = color_icon( + if docs_result.errored > 0 { Icons::warning() } else { Icons::success() }, + docs_result.errored > 0, + ); + emit_stage_line(&spinner, &docs_icon, "Docs", &docs_summary, docs_elapsed); + } + + // ── Release sync lock before embed (embed is idempotent, doesn't need sync lock) ── + drop(lock); + + // ── Stage 4: Embed ── + // SCOPED: only embeds documents regenerated by THIS surgical run (via document_ids). + // Uses run_embed_for_document_ids — NOT global run_embed — to ensure isolation + // even after lock release and under concurrent normal sync activity. + if !options.no_embed && !options.no_docs && !regenerated_document_ids.is_empty() { + recorder.set_phase(&conn, "embed")?; + + let stage_start = Instant::now(); + let spinner = stage_spinner_v2(Icons::sync(), "Embed", "preparing...", options.robot_mode); + match run_embed_for_document_ids(config, ®enerated_document_ids, signal).await { + Ok(embed_result) => { + result.documents_embedded = embed_result.docs_embedded; + result.embedding_failed = embed_result.failed; + + // Update run ledger + recorder.set_counters(&conn, &SurgicalCounters { + issues_fetched: preflight.issues.len(), + mrs_fetched: preflight.merge_requests.len(), + issues_ingested: result.issues_updated, + mrs_ingested: result.mrs_updated, + skipped_stale: result.skipped_stale, + docs_regenerated: result.documents_regenerated, + docs_embedded: result.documents_embedded, + })?; + + let embed_elapsed = stage_start.elapsed(); + result.stage_timings_ms.insert("embed".to_string(), embed_elapsed.as_millis() as u64); + + let embed_summary = format!("{} chunks embedded", embed_result.chunks_embedded); + let embed_icon = color_icon( + if embed_result.failed > 0 { Icons::warning() } else { Icons::success() }, + embed_result.failed > 0, + ); + emit_stage_line(&spinner, &embed_icon, "Embed", &embed_summary, embed_elapsed); + } + Err(e) => { + let embed_elapsed = stage_start.elapsed(); + result.stage_timings_ms.insert("embed".to_string(), embed_elapsed.as_millis() as u64); + + let warn_icon = color_icon(Icons::warning(), true); + emit_stage_line(&spinner, &warn_icon, "Embed", &format!("skipped ({e})"), embed_elapsed); + warn!(error = %e, "Embedding stage failed, continuing"); + } + } + } + + // ── Mark run complete ── + let warnings_count = result.entity_failures.len(); + if warnings_count > 0 { + recorder.finish_succeeded_with_warnings(&conn, warnings_count)?; + } else { + recorder.finish_succeeded(&conn)?; + } + + result.requested_count = total_items; + result.fetched_count = preflight.issues.len() + preflight.merge_requests.len(); + result.processed_count = result.issues_updated + result.mrs_updated; + + Ok(result) + } + .instrument(span) + .await +} +``` + +### Step 9a: Implement `run_generate_docs_for_sources` and `run_embed_for_document_ids` + +**File: `src/documents/mod.rs`** (or wherever `run_generate_docs` lives) + +`run_generate_docs_for_sources` is a ~20-line variant of the existing `run_generate_docs` that filters by `(source_type, source_id)` primary key, processes only those docs, then deletes only those specific `dirty_sources` rows. It additionally returns the IDs of regenerated documents for scoped embedding: + +```rust +/// Scoped doc regeneration: process ONLY the specified dirty source keys. +/// Used by surgical sync to avoid draining the global dirty queue. +/// Each key is `(source_type, source_id)` matching `dirty_sources` PK. +/// Returns regenerated document IDs for use with scoped embedding. +pub fn run_generate_docs_for_sources( + config: &Config, + source_keys: &[(String, i64)], +) -> Result { + // Same as run_generate_docs but with: + // 1. SELECT ... FROM dirty_sources WHERE (source_type, source_id) IN (...) instead of full scan + // 2. DELETE FROM dirty_sources WHERE (source_type, source_id) IN (...) instead of full drain + // 3. Collect document IDs of regenerated rows into regenerated_document_ids + // All other logic (doc template rendering, indexing) is identical. + todo!("implement scoped variant") +} +``` + +**`GenerateDocsResult`** must be extended with: +```rust +pub struct GenerateDocsResult { + pub regenerated: usize, + pub errored: usize, + /// Document IDs of all regenerated documents (for scoped embedding in surgical mode) + pub regenerated_document_ids: Vec, +} +``` + +**File: `src/embedding/mod.rs`** (or wherever `run_embed` lives) + +Add a scoped embedding function that processes only specific document IDs: + +```rust +/// Scoped embedding: embed ONLY the specified document IDs. +/// Used by surgical sync to ensure embed isolation after lock release. +/// Unlike global `run_embed` (which processes all unembedded docs), +/// this ensures only documents from the current surgical run are embedded. +pub async fn run_embed_for_document_ids( + config: &Config, + document_ids: &[i64], + signal: &ShutdownSignal, +) -> Result { + // Same as run_embed but with: + // 1. SELECT ... FROM documents WHERE id IN (...) AND embedding IS NULL + // instead of SELECT ... FROM documents WHERE embedding IS NULL + // 2. All other logic (chunking, batching, Ollama calls) is identical + todo!("implement scoped variant") +} +``` + +### Step 9b: Implement inline dependent helpers + +**File: `src/ingestion/surgical.rs`** (or a new `src/ingestion/surgical_dependents.rs`) + +These functions fetch and write dependent data inline for a single entity, bypassing `pending_dependent_fetches` entirely. They are thin wrappers around the existing fetch + write logic from `orchestrator.rs`, extracted to operate on a single entity: + +```rust +/// Fetch and write resource events for a single issue, inline (no queue). +pub(crate) async fn sync_issue_resource_events_direct( + conn: &Connection, + client: &GitLabClient, + config: &Config, + gitlab_project_id: i64, + project_id: i64, + issue_info: &IssueForDiscussionSync, +) -> Result { + // Fetch resource events for this issue from GitLab API + // Write to resource_state_events / resource_label_events / resource_milestone_events + // Return count of events written + todo!("extract from orchestrator drain logic") +} + +/// Fetch and write resource events for a single MR, inline (no queue). +pub(crate) async fn sync_mr_resource_events_direct( + conn: &Connection, + client: &GitLabClient, + config: &Config, + gitlab_project_id: i64, + project_id: i64, + mr_info: &MrForDiscussionSync, +) -> Result { + todo!("extract from orchestrator drain logic") +} + +/// Fetch and write closes_issues data for a single MR, inline (no queue). +pub(crate) async fn sync_mr_closes_issues_direct( + conn: &Connection, + client: &GitLabClient, + config: &Config, + gitlab_project_id: i64, + project_id: i64, + mr_info: &MrForDiscussionSync, +) -> Result<()> { + todo!("extract from orchestrator drain logic") +} + +/// Fetch and write diff data for a single MR, inline (no queue). +pub(crate) async fn sync_mr_diffs_direct( + conn: &Connection, + client: &GitLabClient, + config: &Config, + gitlab_project_id: i64, + project_id: i64, + mr_info: &MrForDiscussionSync, +) -> Result<()> { + todo!("extract from orchestrator drain logic") +} +``` + +These are extracted from the existing drain loop bodies in `orchestrator.rs`. The drain functions iterate over queued jobs and call per-entity logic — these helpers are that per-entity logic, made callable directly. + +--- + +## Step 9d: Add `LoreError::SurgicalPreflightFailed` variant + +**File: `src/core/error.rs`** + +Add a typed error variant for surgical preflight failures. This preserves machine semantics for robot mode and enables structured exit codes: + +```rust + /// Surgical sync preflight failed — one or more IIDs could not be fetched. + /// Contains structured per-entity failure details for robot mode output. + SurgicalPreflightFailed { + run_id: String, + total: usize, + failures: Vec, + }, +``` + +Map to exit code 6 (resource not found) or a new dedicated exit code. The `Display` impl should produce a human-readable summary: + +```rust +Self::SurgicalPreflightFailed { run_id, total, failures } => { + write!(f, "Surgical preflight failed for {} of {} IIDs (run {}): {}", + failures.len(), total, run_id, + failures.iter().map(|f| format!("{} #{}: {}", f.entity_type, f.iid, f.code)).collect::>().join(", ")) +} +``` + +In robot mode, this serializes to structured JSON with per-entity details and actionable recovery commands, rather than a generic `Other` string. + +--- + +## Step 10: Add branch in `run_sync` for surgical mode + +**File: `src/cli/commands/sync.rs`, function `run_sync` (line 68)** + +**CRITICAL: dry_run must be checked BEFORE surgical to prevent accidental writes:** + +```rust + // Handle dry_run mode - must check BEFORE surgical to prevent writes + if options.dry_run { + if options.is_surgical() { + // Surgical dry-run is handled inside run_sync_surgical (returns before any writes/locks) + return run_sync_surgical(config, &options, run_id, signal).await; + } + return run_sync_dry_run(config, &options).await; + } + + // Handle preflight-only mode + if options.preflight_only { + return run_sync_surgical(config, &options, run_id, signal).await; + } + + // Handle surgical mode (specific IIDs) + if options.is_surgical() { + return run_sync_surgical(config, &options, run_id, signal).await; + } +``` + +Note: `run_sync_surgical` checks `options.dry_run` and `options.preflight_only` internally and returns early before ANY side effects. This avoids needing separate functions for each mode. + +--- + +## Step 11: Make `GitLabClient` constructible from config + +Check if `GitLabClient::from_config` already exists. If not, the surgical path needs to construct the client. Currently `run_ingest` constructs it via: + +```rust +let token = config.resolve_token()?; +let client = GitLabClient::new(&config.gitlab.base_url, &token, config.sync.rate_limit_rps); +``` + +The surgical function uses the same 2-line inline construction (shown in Step 9). + +--- + +## Step 12: `ProcessMrResult` visibility + +`process_single_mr` returns `ProcessMrResult` which is currently private. Make it `pub(crate)`: + +**File: `src/ingestion/merge_requests.rs`, line 138** + +```rust +pub(crate) struct ProcessMrResult { +``` + +--- + +## Step 13: Make orchestrator per-entity logic extractable + +The surgical inline dependent helpers (Step 9b) need the per-entity fetch+write logic currently embedded in orchestrator drain loops. There are two approaches: + +**Option A (preferred):** Extract the per-entity body of each drain loop into a standalone function. The drain function then calls the extracted function in a loop, and the surgical helpers call it directly. This avoids code duplication. + +**Option B:** If the drain loop body is tightly coupled to queue state (locked_at, attempts, etc.), write standalone versions for surgical that call the same underlying GitLab API functions and DB write functions, skipping queue management. + +**Functions to make accessible or extract:** +- Resource events fetch + write for a single entity (issue or MR) +- MR closes_issues fetch + write for a single MR +- MR diffs fetch + write for a single MR + +The existing orchestrator functions should remain unchanged for normal sync — only surgical mode uses the extracted per-entity versions. + +--- + +## Step 14: Update `robot-docs` manifest + +The `robot-docs` command outputs a manifest of all commands and flags. If it's auto-generated from clap derive, the new `--issue`, `--mr`, `-p`, `--preflight-only` flags will appear automatically. If it's hardcoded, update the sync command entry. + +**File:** Check `src/cli/robot.rs` or wherever `robot-docs` content is defined. The new flags should appear in the sync command's schema. Document the `--preflight-only` flag's behavior and its distinction from `--dry-run`. + +--- + +## Step 15: Update `SyncResult` for robot mode structured output + +**File: `src/cli/commands/sync.rs`** (wherever `SyncResult` is defined) + +Add fields for surgical-specific structured output: + +```rust +pub struct SyncResult { + // ... existing fields ... + + // NEW: Surgical sync metadata for robot mode + /// Per-entity failures (empty on success) + pub entity_failures: Vec, + /// Per-entity outcome map for deterministic retry and richer UI messaging + pub entity_outcomes: Vec, + /// Number of entities requested + pub requested_count: usize, + /// Number of entities successfully fetched in preflight + pub fetched_count: usize, + /// Number of entities successfully processed (ingested + dependents) + pub processed_count: usize, + /// Entities skipped because local row was newer than preflight payload (TOCTOU protection) + pub skipped_stale: usize, + /// Per-entity IIDs that were skipped as stale + pub skipped_stale_iids: Vec<(String, u64)>, + /// Per-stage elapsed ms for deterministic performance tracking + pub stage_timings_ms: std::collections::BTreeMap, + /// Suggested recovery commands when failures occur (robot ergonomics) + pub recovery_actions: Vec, + /// Overall completion status: succeeded | succeeded_with_warnings | failed | cancelled + pub completion_status: String, +} + +/// Per-entity outcome for robot mode structured output. +/// Enables deterministic per-IID retry and richer UI messaging. +#[derive(Debug, Clone, serde::Serialize)] +pub struct EntityOutcome { + pub entity_type: String, // "issue" or "merge_request" + pub iid: u64, + pub fetched: bool, + pub ingested: bool, + pub stale_skipped: bool, + pub dependent_failures: Vec, +} +``` + +Robot mode JSON output includes these fields when in surgical mode, enabling agents to: +- Programmatically detect partial failures via `entity_failures` +- Get complete per-entity lifecycle via `entity_outcomes` for deterministic retry +- Track performance regressions via `stage_timings_ms` +- Auto-recover via `recovery_actions` (e.g., `["lore sync --issue 888 -p myproject"]` for retry) +- Detect TOCTOU skips via `skipped_stale` and `skipped_stale_iids` +- Distinguish partial success via `completion_status` + +--- + +## Files Modified Summary + +| File | Change | Lines | +|------|--------|-------| +| `src/cli/mod.rs` | Add `issue: Vec`, `mr: Vec`, `project: Option`, `preflight_only: bool` to `SyncArgs` | ~790 | +| `src/cli/commands/sync.rs` | Add fields to `SyncOptions`, `is_surgical()`, `MAX_SURGICAL_TARGETS`, dry-run/surgical/preflight-only branch ordering, `run_sync_surgical` fn, `SyncResult` fields, `EntityOutcome` struct | ~20-30, new ~350 lines | +| `src/main.rs` | Wire new fields in `handle_sync_cmd`, add validation + defaultProject fallback + dedup + hard cap + preflight_only + no-docs/no-embed guard validation | ~2052-2060 | +| `src/gitlab/client.rs` | Add `get_issue_by_iid()`, `get_mr_by_iid()` (~10 lines each, `u64` iid param), add tests (~60 lines) | ~330, ~770 | +| `src/ingestion/surgical.rs` | **New file**: `EntityFailure`, `DirtySourceKey`, `PreflightResult` (with `failures` vec, payload integrity checks), `preflight_fetch` (aggregate-failures, per-entity timeout, project_id validation), `classify_error_code`, TOCTOU guards (`is_local_newer_*` with `last_seen_at` tie-breaking), dirty source key collection, inline dependent helpers, payload processors, `ingest_preflight_results` (~450 lines) | new | +| `src/ingestion/surgical_tests.rs` | **New file**: unit tests with `test_config()` helper, scoping invariant tests (including same-entity), TOCTOU tests (including equal-timestamp), rollback tests, preflight aggregation test, cancellation test, timeout test, scoped embed isolation test, payload integrity test (~450 lines) | new | +| `src/ingestion/mod.rs` | Add `pub mod surgical;` | line 9 | +| `src/ingestion/issues.rs` | `fn process_single_issue` → `pub(crate) fn` | line 143 | +| `src/ingestion/merge_requests.rs` | `fn process_single_mr` → `pub(crate) fn`, `struct ProcessMrResult` → `pub(crate)` | lines 138, 144 | +| `src/ingestion/orchestrator.rs` | Extract per-entity fetch+write logic into standalone functions callable by surgical inline helpers | TBD | +| `src/documents/mod.rs` | Add `run_generate_docs_for_sources` scoped variant (filters by PK, returns `regenerated_document_ids`), extend `GenerateDocsResult` | new fn ~30 lines | +| `src/embedding/mod.rs` | Add `run_embed_for_document_ids` scoped variant (embeds only specified document IDs) | new fn ~30 lines | +| `src/core/db.rs` | New migration: extend `sync_runs` with `mode`, `phase`, `surgical_iids_json`, surgical counters, `warnings_count`, `cancelled_at`, indexes | new migration | +| `src/core/sync.rs` | Extend `SyncRunRecorder` with `start_surgical`, `set_phase`, `set_counters`, `finish_*`, `heartbeat_if_due`, add `SurgicalCounters` struct | ~80 lines | +| `src/core/error.rs` | Add `SurgicalPreflightFailed` variant with structured fields | ~15 lines | + +--- + +## Acceptance Criteria + +### For agents to verify (automated): + +1. **Compilation**: `cargo check --all-targets` passes with zero errors +2. **Clippy**: `cargo clippy --all-targets -- -D warnings` passes (pedantic + nursery enabled) +3. **Format**: `cargo fmt --check` passes +4. **Tests**: `cargo test` passes, including all new tests +5. **New tests exist**: + - `get_issue_by_iid_returns_issue` — wiremock test in `src/gitlab/client.rs` + - `get_issue_by_iid_returns_not_found` — 404 error handling + - `get_mr_by_iid_returns_mr` — wiremock test + - `get_mr_by_iid_returns_not_found` + - `ingest_issue_by_iid_inserts_and_marks_dirty` — in-memory DB test + - `ingest_issue_by_iid_resets_discussion_watermark` + - `ingest_issue_by_iid_resets_event_watermark` + - `ingest_mr_by_iid_inserts_and_marks_dirty` + - `ingest_mr_by_iid_resets_discussion_watermark` + - `ingest_mr_by_iid_resets_event_watermark` + - `duplicate_iids_are_idempotent` — dedup/upsert behavior + - `surgical_docs_scope_ignores_preexisting_dirty_rows` — scoping invariant + - `surgical_docs_scope_ignores_preexisting_dirty_rows_for_same_entity` — same-entity scoping edge case + - `preflight_aggregates_multiple_missing_iids` — aggregate-failures behavior + - `sync_run_is_persisted_and_updated` — durable run ledger (extends existing table) + - `stale_payload_is_skipped_when_local_updated_at_is_newer` — TOCTOU protection + - `equal_updated_at_but_newer_last_seen_is_skipped` — TOCTOU equal-timestamp tie-breaking + - `preflight_success_then_ingest_failure_rolls_back_all_content_writes` — transactional rollback + - `surgical_no_docs_requires_no_embed_validation` — embed leakage prevention + - `cancellation_marks_sync_run_cancelled` — cancellation durability + - `dependent_timeout_records_entity_failure_and_continues` — timeout handling + - `scoped_embed_does_not_embed_unrelated_docs_created_after_docs_stage` — embed isolation under concurrency + - `payload_project_id_mismatch_is_rejected_in_preflight` — payload integrity + +### For me to evaluate (functional): + +6. **Basic surgical sync works**: `cargo run --release -- sync --issue -p ` fetches the issue, syncs its discussions and events, regenerates docs, and embeds +7. **Multiple IIDs work**: `cargo run --release -- sync --issue 1 --issue 2 --mr 3 -p ` +8. **Robot mode works**: `cargo run --release -- --robot sync --issue -p ` returns `{"ok":true,"data":{...},"meta":{...}}` with `stage_timings_ms` and `entity_outcomes` populated +9. **Error: missing project**: `cargo run --release -- sync --issue 1` fails with clear error about `-p` or `defaultProject` being required +10. **Error: nonexistent IID**: `cargo run --release -- sync --issue 999999 -p ` returns a clear "not found" error with the IID in the message, and zero DB mutations +11. **Error: mixed valid+invalid IIDs**: `cargo run --release -- sync --issue --issue 999999 -p ` reports BOTH results (1 success, 1 failure) in structured output, with zero DB mutations +12. **No status enrichment**: Surgical sync does NOT call GraphQL status enrichment +13. **--no-docs/--no-embed respected**: `cargo run --release -- sync --issue -p --no-docs --no-embed` skips those stages +14. **Dirty queue scoping**: After surgical sync of 1 issue, only that issue's documents are regenerated (not all dirty docs from previous syncs — verify by checking the count in docs stage output) +15. **Watermark reset**: After surgical sync, the issue's discussions and events are re-fetched even if they were previously synced (verify via the discussions_fetched count > 0) +16. **Normal sync unaffected**: `cargo run --release -- sync` (without --issue/--mr) still works exactly as before +17. **`lore robot-docs`**: The new `--issue`, `--mr`, `-p`, `--preflight-only` flags appear in the sync command schema +18. **defaultProject fallback**: `cargo run --release -- sync --issue ` works when config has `defaultProject` set +19. **Sync lock**: Surgical sync acquires lock — running two surgical syncs concurrently produces a lock error, not corruption +20. **--full incompatible**: `cargo run --release -- sync --full --issue 1 -p project` fails with clear error +21. **MR dependent stages**: Surgical MR sync fetches closes_issues and MR file diffs when enabled by config +22. **Parse-time validation**: `--issue 0` and `--issue -1` rejected by clap with clear error before any code runs +23. **Hard cap enforcement**: `--issue 1 --issue 2 ... --issue 101` (>100 combined targets) rejected with clear error +24. **Preflight-only mode**: `cargo run --release -- sync --preflight-only --issue -p ` validates the IID exists on GitLab and returns success with `fetched_count` populated, but no content DB mutations (only control-plane run-ledger entries) +25. **Preflight-only requires surgical**: `cargo run --release -- sync --preflight-only` (no --issue/--mr) fails with clear error +26. **Durable run ledger**: After surgical sync, `SELECT * FROM sync_runs WHERE mode = 'surgical' ORDER BY id DESC LIMIT 1` shows the correct phase (`done` on success, `failed` on error, `cancelled` on Ctrl+C) with counters populated +27. **Stage timings in robot output**: Robot mode JSON includes `stage_timings_ms` with entries for each completed stage +28. **Typed preflight error**: Preflight failures serialize as structured `SurgicalPreflightFailed` errors with per-entity details and machine-usable codes, not generic `Other` strings +29. **Dependent stage failures are best-effort**: A failing discussion fetch for one entity does not prevent other entities' dependents from running. Failures are recorded in `entity_failures`. + +### Edge cases to verify: + +30. **Duplicate IIDs**: `--issue 123 --issue 123` should work without error (deduplicated, idempotent) +31. **Mixed existing + new**: Syncing an IID that's already in the DB should update it +32. **Closed issues**: Surgical sync works for closed issues (state = "closed") +33. **Signal handling**: Ctrl+C during surgical sync records `status='cancelled'`, `phase='cancelled'`, and `finished_at` in `sync_runs` (no dangling `running` rows) +34. **Dry-run zero-write assertion**: `--dry-run --issue 123 -p project` produces zero side effects — no lock acquired, no DB connection opened, no network calls. DB file is byte-identical before and after. +35. **--no-docs without --no-embed rejected**: `--issue 1 -p project --no-docs` without `--no-embed` is rejected with clear error +36. **Lock released before embed**: A normal `lore sync` can acquire the sync lock while a surgical sync is in its embed phase. + +### Correctness under contention and rollback: + +37. **No partial writes on missing IID**: A mixed set of valid + invalid IIDs (e.g., `--issue 123 --issue 999999`) causes zero DB mutations — the preflight phase catches the 404 before any writes. The structured error report lists ALL failures, not just the first. +38. **Scoped docs**: Only documents tied to this surgical run's dirty source keys are regenerated. Other pending dirty_sources are untouched. +39. **Lock contention test**: A second concurrent surgical sync fails fast with a lock error and produces no writes. +40. **Lock not held during preflight**: Verify that a normal `lore sync` can acquire the lock while a surgical sync is in its preflight (network) phase. +41. **TOCTOU safety**: If a normal sync updates entity after preflight but before ingest, surgical run skips stale payload (no overwrite). Stale skips are reported in `skipped_stale` count. +42. **TOCTOU equal-timestamp safety**: If a concurrent sync fetches the same entity with identical `updated_at` but after our preflight started, surgical run detects the fresher `last_seen_at` and skips. +43. **No embed leakage**: Surgical embed uses `run_embed_for_document_ids` with explicit document IDs from the docs stage — never global `run_embed`. Even if another sync creates unembedded docs between lock release and embed, they are not processed. +44. **No queue artifacts**: Surgical sync creates zero rows in `pending_dependent_fetches`. Verify table is untouched after a surgical run. + +### Automated scoping invariants (covered by tests in 1d and 1f): + +45. **Scoped docs invariants are enforced by automated tests**, not manual-only verification. Tests `surgical_docs_scope_ignores_preexisting_dirty_rows`, `surgical_docs_scope_ignores_preexisting_dirty_rows_for_same_entity`, and `preflight_aggregates_multiple_missing_iids` prevent regressions. +46. **Rollback and race invariants are enforced by automated tests**: no partial writes on ingest failure, no stale overwrite (strict and equal-timestamp variants). +47. **Cancellation durability**: Ctrl+C during surgical sync records `status='cancelled'`, `phase='cancelled'`, and `finished_at` in `sync_runs`. Verified by `cancellation_marks_sync_run_cancelled` test. +48. **Payload integrity**: Payloads with unexpected `project_id` are rejected in preflight and produce zero content writes. Verified by `payload_project_id_mismatch_is_rejected_in_preflight` test. +49. **Scoped embed isolation under concurrency**: Verified by `scoped_embed_does_not_embed_unrelated_docs_created_after_docs_stage` test. +50. **Timeout path**: Verified by `dependent_timeout_records_entity_failure_and_continues` test (TIMEOUT code + continued processing). + +--- + +## Rejected Recommendations + +- **SyncMode enum replacing flat fields in SyncOptions** — rejected because it's overengineered for a boolean distinction (surgical vs standard). The `is_surgical()` helper is simpler, and the enum would require refactoring all existing SyncOptions construction sites. The flat fields approach is how the rest of the codebase works (see `full`, `force`, `dry_run`). +- **`value_delimiter = ','` on --issue/--mr** — rejected because `--issue 1,2,3` is non-idiomatic for this CLI (all other repeatable flags use `--flag val --flag val` pattern) and commas in shell can interact poorly with quoting. +- **Chunked `list_issues_by_iids` / `list_mrs_by_iids` batch fetch via `iids[]` query param** — rejected because surgical sync targets 1-5 IIDs typically (agent refreshing active work). Individual GET requests give precise per-IID error reporting (404 for missing IID) and add zero complexity. Batch optimization is premature for this use case. +- **`--strict` flag for fail-on-missing-IID behavior** — rejected for v1 because the default behavior (fail on 404) is already strict. A `--lenient` flag to skip missing IIDs would be the future extension if needed, not the other way around. Adding --strict now adds flag surface area for a mode that's already the default. +- **Separate `run_sync_surgical_dry_run` function** — rejected because the dry-run check is 5 lines inside `run_sync_surgical` (log + return early). A separate function would duplicate all the setup code (db connection, project resolution) just to print a message. +- **`SurgicalPlan` normalized object** — rejected because validation/dedup/project fallback is ~15 lines in one location (`handle_sync_cmd`). Introducing a dedicated struct with a builder method adds a new type, a new file, and indirection for code that runs exactly once. The flat validation approach is how all other SyncOptions constraints are checked in this codebase. +- **`--with-related` flag for auto-expanding related entities** — rejected for v1 as scope creep. Surgical sync's purpose is agent-driven refresh of known IIDs. Auto-expansion introduces unbounded work (related issues have related MRs have related issues...), requires expansion caps, and complicates the preflight-then-commit contract. This is a good v2 feature tracked separately. +- **Bounded concurrency (`buffer_unordered`) for IID fetches** — rejected for v1 because surgical sync targets 1-5 IIDs (agent refreshing active work). Sequential fetch of 5 items takes <2s. Adding `futures::stream`, `buffer_unordered`, config knobs (`surgical_max_in_flight`), and post-fetch sorting for determinism is premature complexity. If usage patterns show >10 IIDs becoming common, add concurrency then. +- **`origin_run_id` column on `dirty_sources` table** — rejected because it modifies shared schema (`dirty_sources`) for a surgical-only concern. The source-key-based scoping approach is self-contained and requires zero schema changes to `dirty_sources`. Adding a column would require updating all code paths that insert dirty rows. +- **Accept `&Transaction` instead of `&Connection` in surgical ingest functions** — rejected because the existing codebase uniformly uses `&Connection` (rusqlite's `Transaction` derefs to `Connection`). `process_single_issue` and all existing ingestion functions take `&Connection`. Changing surgical functions to `&Transaction` would create an inconsistency and require refactoring callers. The `unchecked_transaction` + `&Connection` pattern is used elsewhere (see `enrich_issue_statuses_txn`). +- **Transient retry policy (5xx/timeout with jittered backoff)** — rejected for surgical sync scope because the existing `request()` method already handles 429 retries. Adding 5xx retry applies to ALL GitLabClient methods, not just surgical. It should be a separate enhancement to the client layer, not coupled to surgical sync. +- **`sync_run_entities` per-entity lifecycle table** — rejected for v1 because it adds significant schema complexity (new table, FK, index, per-entity per-stage row inserts) for observability that can be achieved with simpler means: `SyncResult` already carries `skipped_stale_iids` and `entity_failures` for robot output, and `sync_runs` has aggregate counters. If retry-by-entity becomes a real need, this table is a clean v2 addition. +- **`--retry-failed` flag on `sync-runs` command** — rejected as scope creep for v1. Deterministic retry can be built on top of `sync_runs` data later. For now, agents can simply re-run `lore sync --issue ` based on the structured error output. +- **`--issue-url` / `--mr-url` URL-native surgical targets** — rejected for v1 because agents already have project + IID from their context (lore queries return both). Parsing GitLab URLs introduces URL format fragility (self-hosted instances vary), namespace disambiguation complexity, and additional validation code for marginal ergonomic gain. If copy-paste from browser becomes a common workflow, this is a clean v2 addition. +- **`sync-runs` read command (`lore --robot sync-runs`)** — rejected for v1 scope. The run ledger is useful for observability but the read path is not required for the surgical sync feature itself. Agents can query `sync_runs` directly via `sqlite3` or via a future `lore --robot sync-runs` command. Tracked as a v2 enhancement. +- **Scoped drain helpers with `scope_run_id` column on `pending_dependent_fetches`** — rejected because the inline execution approach (constraint 4) is strictly simpler. Adding `scope_run_id` + `aborted_reason` columns, a new covering index, scoped drain variants, and orphan cleanup logic to `pending_dependent_fetches` is significant schema and code churn for a table with an existing UNIQUE constraint and delete-on-complete semantics. The inline approach avoids all of this and eliminates an entire class of failure modes (orphaned jobs, queue scoping bugs). +- **`failed_run_aborts_pending_scoped_jobs` test** — rejected because surgical mode no longer enqueues jobs to any shared queue. The inline execution model eliminates the need for queue failure hygiene tests. Dependent stage failures are tracked per-entity in `entity_failures`. +- **Separate dependent fetch/write split with lock release between phases** — rejected because it significantly increases complexity for marginal contention reduction. Surgical sync targets 1-5 entities; dependent stage lock hold time is seconds. Splitting fetch (unlocked) and write (short locked transactions) per dependent type would require managing intermediate state, coordinating multiple lock acquire/release cycles, and applying per-entity freshness guards at the dependent level. The inline approach is simpler and adequate at our scale. +- **Global stage timeout budget (`surgical_dependents_budget_seconds`)** — rejected for v1 because per-entity timeout (constraint 16) provides sufficient bounding. A global budget across all entities adds complexity (tracking cumulative time, deciding which entities to skip when budget exhausted) for minimal incremental safety. If surgical sync scales to 50+ entities, this becomes worthwhile. diff --git a/docs/prd-per-note-search.feedback-1.md b/docs/prd-per-note-search.feedback-1.md deleted file mode 100644 index 74fb019..0000000 --- a/docs/prd-per-note-search.feedback-1.md +++ /dev/null @@ -1,174 +0,0 @@ -Highest-impact gaps I see in the current plan: - -1. `for-issue` / `for-mr` filtering is ambiguous across projects and can return incorrect rows. -2. `lore notes` has no pagination contract, so large exports and deterministic resumption are weak. -3. Migration `022` is high-risk (table rebuild + FTS + junction tables) without explicit integrity gates. -4. Note-doc freshness is incomplete for upstream note deletions and parent metadata changes (labels/title). - -Below are my best revisions, each with rationale and a git-diff-style plan edit. - ---- - -1. **Add gated rollout + rollback controls** -Rationale: You can still “ship together” while reducing blast radius. This makes recovery fast if note-doc generation causes DB/embedding pressure. - -```diff -@@ ## Design --Two phases, shipped together as one feature: -+Two phases, shipped together as one feature, but with runtime gates: -+ -+- `feature.notes_cli` (Phase 1 surface) -+- `feature.note_documents` (Phase 2 indexing/extraction path) -+ -+Rollout order: -+1) Enable `notes_cli` -+2) Run note-doc backfill in bounded batches -+3) Enable `note_documents` for continuous updates -+ -+Rollback: -+- Disabling `feature.note_documents` stops new note-doc generation without affecting issue/MR/discussion docs. -``` - -2. **Add keyset pagination + deterministic ordering** -Rationale: Needed for year-long reviewer analysis and reliable “continue where I left off” behavior under concurrent updates. - -```diff -@@ pub struct NoteListFilters<'a> { - pub limit: usize, -+ pub cursor: Option<&'a str>, // keyset token ":" -+ pub include_total_count: bool, // avoid COUNT(*) in hot paths -@@ -- pub sort: &'a str, // "created" (default) | "updated" -+ pub sort: &'a str, // "created" | "updated" -@@ query_notes SQL --ORDER BY {sort_column} {order} -+ORDER BY {sort_column} {order}, n.id {order} - LIMIT ? -``` - -3. **Make `for-issue` / `for-mr` project-scoped** -Rationale: IIDs are not globally unique. Requiring project avoids false positives and hard-to-debug cross-project leakage. - -```diff -@@ pub struct NotesArgs { -- #[arg(long = "for-issue", help_heading = "Filters", conflicts_with = "for_mr")] -+ #[arg(long = "for-issue", help_heading = "Filters", conflicts_with = "for_mr", requires = "project")] - pub for_issue: Option, -@@ -- #[arg(long = "for-mr", help_heading = "Filters", conflicts_with = "for_issue")] -+ #[arg(long = "for-mr", help_heading = "Filters", conflicts_with = "for_issue", requires = "project")] - pub for_mr: Option, -``` - -4. **Upgrade path filtering semantics** -Rationale: Review comments often reference renames/moves. Restricting to `position_new_path` misses relevant notes. - -```diff -@@ pub struct NotesArgs { -- /// Filter by file path (trailing / for prefix match) -+ /// Filter by file path - #[arg(long, help_heading = "Filters")] - pub path: Option, -+ /// Path mode: exact|prefix|glob -+ #[arg(long = "path-mode", value_parser = ["exact","prefix","glob"], default_value = "exact", help_heading = "Filters")] -+ pub path_mode: String, -+ /// Match against old path as well as new path -+ #[arg(long = "match-old-path", help_heading = "Filters")] -+ pub match_old_path: bool, -@@ query_notes filter mappings --- `path` ... n.position_new_path ... -+- `path` applies to `n.position_new_path` and optionally `n.position_old_path`. -+- `glob` mode translates `*`/`?` to SQL LIKE with escaping. -``` - -5. **Add explicit performance indexes (new migration)** -Rationale: `notes` becomes a first-class query surface; without indexes, filters degrade quickly at 10k+ note scale. - -```diff -@@ ## Phase 1: `lore notes` Command -+### Work Chunk 1E: Query Performance Indexes -+**Files:** `migrations/023_notes_query_indexes.sql`, `src/core/db.rs` -+ -+Add indexes: -+- `notes(project_id, created_at DESC, id DESC)` -+- `notes(author_username, created_at DESC, id DESC) WHERE is_system = 0` -+- `notes(discussion_id)` -+- `notes(position_new_path)` -+- `notes(position_old_path)` -+- `discussions(issue_id)` -+- `discussions(merge_request_id)` -``` - -6. **Harden migration 022 with transactional integrity checks** -Rationale: This is the riskiest part of the plan. Add hard fail-fast checks so corruption cannot silently pass. - -```diff -@@ ### Work Chunk 2A: Schema Migration (022) -+Migration safety requirements: -+- Execute in a single `BEGIN IMMEDIATE ... COMMIT` transaction. -+- Capture and compare pre/post row counts for `documents`, `document_labels`, `document_paths`, `dirty_sources`. -+- Run `PRAGMA foreign_key_check` and abort on any violation. -+- Run `PRAGMA integrity_check` and abort on non-`ok`. -+- Rebuild FTS and assert `documents_fts` rowcount equals `documents` rowcount. -``` - -7. **Add note deletion + parent-change propagation** -Rationale: Current plan handles create/update ingestion but not all staleness paths. Without this, note documents drift. - -```diff -@@ ## Phase 2: Per-Note Documents -+### Work Chunk 2G: Freshness Propagation -+**Files:** `src/ingestion/discussions.rs`, `src/ingestion/mr_discussions.rs`, `src/documents/regenerator.rs` -+ -+Rules: -+- If a previously stored note is missing from upstream payload, delete local note row and enqueue `(note, id)` for document deletion. -+- When parent issue/MR title or labels change, enqueue descendant note docs dirty (notes inherit parent metadata). -+- Keep idempotent behavior for repeated syncs. -``` - -8. **Separate FTS coverage from embedding coverage** -Rationale: Biggest cost/perf risk is embeddings. Index all notes in FTS, but embed selectively with policy knobs. - -```diff -@@ ## Estimated Document Volume Impact --FTS5 handles this comfortably. Embedding generation time scales linearly (~4x increase). -+FTS5 handles this comfortably. Embedding generation is policy-controlled: -+- FTS: index all non-system note docs -+- Embeddings default: only notes with body length >= 40 chars (configurable) -+- Add config: `documents.note_embeddings.min_chars`, `documents.note_embeddings.enabled` -+- Prioritize unresolved DiffNotes before other notes during embedding backfill -``` - -9. **Bring structured reviewer profiling into scope (not narrative reporting)** -Rationale: This directly serves the stated use case and makes the feature compelling immediately. - -```diff -@@ ## Non-Goals --- Adding a "reviewer profile" report command (that's a downstream use case built on this infrastructure) -+- Generating free-form narrative reviewer reports. -+ A structured profiling command is in scope. -+ -+## Phase 3: Structured Reviewer Profiling -+Add `lore notes profile --author --since ` returning: -+- top commented paths -+- top parent labels -+- unresolved-comment ratio -+- note-type distribution -+- median comment length -``` - -10. **Add operational SLOs + robot-mode status for note pipeline** -Rationale: Reliability improves when regressions are observable, not inferred from failures. - -```diff -@@ ## Verification Checklist -+Operational checks: -+- `lore -J stats` includes per-`source_type` document counts (including `note`) -+- Add queue lag metrics: oldest dirty note age, retry backlog size -+- Add extraction error breakdown by `source_type` -+- Add smoke assertion: disabling `feature.note_documents` leaves other source regeneration unaffected -``` - ---- - -If you want, I can produce a single consolidated revised PRD draft (fully merged text, not just diffs) as the next step. \ No newline at end of file diff --git a/docs/prd-per-note-search.feedback-2.md b/docs/prd-per-note-search.feedback-2.md deleted file mode 100644 index 9d99601..0000000 --- a/docs/prd-per-note-search.feedback-2.md +++ /dev/null @@ -1,200 +0,0 @@ -Below are the strongest revisions I’d make, excluding everything in your `## Rejected Recommendations` list. - -1. **Add a Phase 0 for stable note identity before any note-doc generation** -Rationale: your current plan still allows note document churn because Issue discussion ingestion is delete/reinsert-based. That makes local `notes.id` unstable, causing unnecessary dirtying/regeneration and potential stale-doc edge cases. Stabilizing identity first (upsert-by-GitLab-ID + sweep stale) improves correctness and cuts repeated work. - -```diff -@@ ## Design --Two phases, shipped together as one feature: -+Three phases, shipped together as one feature: -+- **Phase 0 (Foundation):** Stable note identity in local DB (upsert + sweep, no delete/reinsert churn) - - **Phase 1 (Option A):** `lore notes` command — direct SQL query over the `notes` table with rich filtering - - **Phase 2 (Option B):** Per-note documents — each non-system note becomes its own searchable document in the FTS/embedding pipeline -@@ -+## Phase 0: Stable Note Identity -+ -+### Work Chunk 0A: Upsert/Sweep for Issue Discussion Notes -+**Files:** `src/ingestion/discussions.rs`, `migrations/022_notes_identity_index.sql`, `src/core/db.rs` -+**Implementation:** -+- Add unique index: `UNIQUE(project_id, gitlab_id)` on `notes` -+- Replace delete/reinsert issue-note flow with upsert + `last_seen_at` sweep (same durability model as MR note sweep) -+- Ensure `insert_note/upsert_note` returns the stable local row id for both insert and update paths -``` - -2. **Replace `source_type` CHECK constraints with a registry table + FK in migration** -Rationale: table CHECKs force full table rebuild for every new source type forever. A `source_types` table with FK keeps DB-level integrity and future extensibility without rebuilding `documents`/`dirty_sources` every time. This is a major architecture hardening win. - -```diff -@@ ### Work Chunk 2A: Schema Migration (023) --Current migration ... CHECK constraints limiting `source_type` ... -+Current migration ... CHECK constraints limiting `source_type` ... -+Revision: migrate to `source_types` registry table + FK constraints. -@@ --1. `dirty_sources` — add `'note'` to source_type CHECK --2. `documents` — add `'note'` to source_type CHECK -+1. Create `source_types(name TEXT PRIMARY KEY)` and seed: `issue, merge_request, discussion, note` -+2. Rebuild `dirty_sources` and `documents` to replace CHECK with `REFERENCES source_types(name)` -+3. Future source-type additions become `INSERT INTO source_types(name) VALUES (?)` (no table rebuild) -@@ -+#### Additional integrity tests -+#[test] -+fn test_source_types_registry_contains_note() { ... } -+#[test] -+fn test_documents_source_type_fk_enforced() { ... } -+#[test] -+fn test_dirty_sources_source_type_fk_enforced() { ... } -``` - -3. **Mark note documents dirty only when note semantics actually changed** -Rationale: current loops mark every non-system note dirty every sync. With 8k+ notes this creates avoidable queue pressure and regeneration time. Change-aware dirtying (inserted/changed only) gives major performance and stability improvements. - -```diff -@@ ### Work Chunk 2D: Regenerator & Dirty Tracking Integration --for note in notes { -- let local_note_id = insert_note(&tx, local_discussion_id, ¬e, None)?; -- if !note.is_system { -- dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, local_note_id)?; -- } --} -+for note in notes { -+ let outcome = upsert_note(&tx, local_discussion_id, ¬e, None)?; -+ if !note.is_system && outcome.changed_semantics { -+ dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?; -+ } -+} -@@ -+// changed_semantics should include: body, note_type, path/line positions, resolvable/resolved/resolved_by, updated_at -``` - -4. **Expand filters to support real analysis windows and resolution state** -Rationale: reviewer profiling usually needs bounded windows and both resolved/unresolved views. Current `unresolved: bool` is too narrow and one-sided. Add `--until` and tri-state resolution filtering for better analytical power. - -```diff -@@ pub struct NoteListFilters<'a> { -- pub since: Option<&'a str>, -+ pub since: Option<&'a str>, -+ pub until: Option<&'a str>, -@@ -- pub unresolved: bool, -+ pub resolution: &'a str, // "any" (default) | "unresolved" | "resolved" -@@ -- pub author: Option<&'a str>, -+ pub author: Option<&'a str>, // case-insensitive match -@@ -- // Filter by time (7d, 2w, 1m, or YYYY-MM-DD) -+ // Filter by start time (7d, 2w, 1m, or YYYY-MM-DD) - pub since: Option, -+ /// Filter by end time (7d, 2w, 1m, or YYYY-MM-DD) -+ #[arg(long, help_heading = "Filters")] -+ pub until: Option, -@@ -- /// Only show unresolved review comments -- pub unresolved: bool, -+ /// Resolution filter: any, unresolved, resolved -+ #[arg(long, value_parser = ["any", "unresolved", "resolved"], default_value = "any", help_heading = "Filters")] -+ pub resolution: String, -``` - -5. **Broaden index strategy to match actual query shapes, not just author queries** -Rationale: `idx_notes_user_created` helps one path, but common usage also includes project+time scans and unresolved filters. Add two more partial composites for high-selectivity paths. - -```diff -@@ ### Work Chunk 1E: Composite Query Index - CREATE INDEX IF NOT EXISTS idx_notes_user_created - ON notes(project_id, author_username, created_at DESC, id DESC) - WHERE is_system = 0; -+ -+CREATE INDEX IF NOT EXISTS idx_notes_project_created -+ON notes(project_id, created_at DESC, id DESC) -+WHERE is_system = 0; -+ -+CREATE INDEX IF NOT EXISTS idx_notes_unresolved_project_created -+ON notes(project_id, created_at DESC, id DESC) -+WHERE is_system = 0 AND resolvable = 1 AND resolved = 0; -@@ -+#[test] -+fn test_notes_query_plan_uses_project_created_index_for_default_listing() { ... } -+#[test] -+fn test_notes_query_plan_uses_unresolved_index_when_resolution_unresolved() { ... } -``` - -6. **Improve per-note document payload with structured metadata header + minimal thread context** -Rationale: isolated single-note docs can lose meaning. A small structured header plus lightweight context (parent + one preceding note excerpt) improves semantic retrieval quality substantially without re-bundling full threads. - -```diff -@@ ### Work Chunk 2C: Note Document Extractor --// 6. Format content: --// [[Note]] {note_type or "Comment"} on {parent_type_prefix}: {parent_title} --// Project: {path_with_namespace} --// URL: {url} --// Author: @{author} --// Date: {format_date(created_at)} --// Labels: {labels_json} --// File: {position_new_path}:{position_new_line} (if DiffNote) --// --// --- Body --- --// --// {body} -+// 6. Format content with machine-readable header: -+// [[Note]] -+// source_type: note -+// note_gitlab_id: {gitlab_id} -+// project: {path_with_namespace} -+// parent_type: {Issue|MergeRequest} -+// parent_iid: {iid} -+// note_type: {DiffNote|DiscussionNote|Comment} -+// author: @{author} -+// created_at: {iso8601} -+// resolved: {true|false} -+// path: {position_new_path}:{position_new_line} -+// url: {url} -+// -+// --- Context --- -+// parent_title: {title} -+// previous_note_excerpt: {optional, max 200 chars} -+// -+// --- Body --- -+// {body} -``` - -7. **Add first-class export modes for downstream profiling pipelines** -Rationale: this makes the feature much more useful immediately (LLM prompts, notebook analysis, external scripts) without adding a profiling command. It stays within your non-goals and increases adoption. - -```diff -@@ pub struct NotesArgs { -+ /// Output format -+ #[arg(long, value_parser = ["table", "json", "jsonl", "csv"], default_value = "table", help_heading = "Output")] -+ pub format: String, -@@ -- if robot_mode { -+ if robot_mode || args.format == "json" || args.format == "jsonl" || args.format == "csv" { - print_list_notes_json(...) - } else { - print_list_notes(&result); - } -@@ ### Work Chunk 1C: Human & Robot Output Formatting -+Add `print_list_notes_csv()` and `print_list_notes_jsonl()`: -+- CSV columns mirror `NoteListRowJson` field names -+- JSONL emits one note object per line for streaming pipelines -``` - -8. **Strengthen verification with idempotence + migration data-preservation checks** -Rationale: this feature touches ingestion, migrations, indexing, and regeneration. Add explicit idempotence/perf checks so regressions surface early. - -```diff -@@ ## Verification Checklist - cargo test - cargo clippy --all-targets -- -D warnings - cargo fmt --check -+cargo test test_note_ingestion_idempotent_across_two_syncs -+cargo test test_note_document_count_stable_after_second_generate_docs_full -@@ -+lore sync -+lore generate-docs --full -+lore -J stats > /tmp/stats1.json -+lore generate-docs --full -+lore -J stats > /tmp/stats2.json -+# assert note doc count unchanged and dirty queue drains to zero -``` - -If you want, I can turn this into a fully rewritten PRD v2 draft with these changes merged in-place and renumbered work chunks end-to-end. \ No newline at end of file diff --git a/docs/prd-per-note-search.feedback-3.md b/docs/prd-per-note-search.feedback-3.md deleted file mode 100644 index f7f19b7..0000000 --- a/docs/prd-per-note-search.feedback-3.md +++ /dev/null @@ -1,162 +0,0 @@ -These are the highest-impact revisions I’d make. They avoid everything in your `## Rejected Recommendations` list. - -1. Add immediate note-document deletion propagation (don’t wait for `generate-docs --full`) -Why: right now, deleted notes can leave stale `source_type='note'` documents until a full rebuild. That creates incorrect search/reporting results and weakens trust in the dataset. -```diff -@@ Phase 0: Stable Note Identity -+### Work Chunk 0B: Immediate Deletion Propagation -+ -+When sweep deletes stale notes, propagate deletion to documents in the same transaction. -+Do not rely on eventual cleanup via `generate-docs --full`. -+ -+#### Tests to Write First -+#[test] -+fn test_issue_note_sweep_deletes_note_documents_immediately() { ... } -+#[test] -+fn test_mr_note_sweep_deletes_note_documents_immediately() { ... } -+ -+#### Implementation -+Use `DELETE ... RETURNING id, is_system` in note sweep functions. -+For returned non-system note ids: -+1) `DELETE FROM documents WHERE source_type='note' AND source_id=?` -+2) `DELETE FROM dirty_sources WHERE source_type='note' AND source_id=?` -``` - -2. Add one-time upgrade backfill for existing notes (migration 024) -Why: existing DBs will otherwise only get note-documents for changed/new notes. Historical notes remain invisible unless users manually run full rebuild. -```diff -@@ Phase 2: Per-Note Documents -+### Work Chunk 2H: Backfill Existing Notes After Upgrade (Migration 024) -+ -+Create migration `024_note_dirty_backfill.sql`: -+INSERT INTO dirty_sources (source_type, source_id, queued_at) -+SELECT 'note', n.id, unixepoch('now') * 1000 -+FROM notes n -+LEFT JOIN documents d -+ ON d.source_type='note' AND d.source_id=n.id -+WHERE n.is_system=0 AND d.id IS NULL -+ON CONFLICT(source_type, source_id) DO NOTHING; -+ -+Add migration test asserting idempotence and expected queue size. -``` - -3. Fix `--since/--until` semantics and validation -Why: reusing `parse_since` for `until` creates ambiguous windows and off-by-boundary behavior; your own example `--since 90d --until 180d` is chronologically reversed. -```diff -@@ Work Chunk 1A: Data Types & Query Layer -- since: parse_since(since_str) then n.created_at >= ? -- until: parse_since(until_str) then n.created_at <= ? -+ since: parse_since_start_bound(since_str) then n.created_at >= ? -+ until: parse_until_end_bound(until_str) then n.created_at <= ? -+ Validate since <= until; otherwise return a clear user error. -+ -+#### Tests to Write First -+#[test] fn test_query_notes_invalid_time_window_rejected() { ... } -+#[test] fn test_query_notes_until_date_is_end_of_day_inclusive() { ... } -``` - -4. Separate semantic-change detection from housekeeping updates -Why: current proposed `WHERE` includes `updated_at`, which will cause unnecessary dirty churn. You want `last_seen_at` to always refresh, but regeneration only when searchable semantics changed. -```diff -@@ Work Chunk 0A: Upsert/Sweep for Issue Discussion Notes -- OR notes.updated_at IS NOT excluded.updated_at -+ -- updated_at-only changes should not mark semantic dirty -+ -+Perform two-step logic: -+1) Upsert always updates persistence/housekeeping fields (`updated_at`, `last_seen_at`). -+2) `changed_semantics` is computed only from fields used by note documents/search filters -+ (body, note_type, resolved flags, paths, author, parent linkage). -+ -+#### Tests to Write First -+#[test] -+fn test_issue_note_upsert_updated_at_only_does_not_mark_semantic_change() { ... } -``` - -5. Make indexes align with actual query collation and join strategy -Why: `author` uses `COLLATE NOCASE`; without collation-aware index, SQLite can skip index use. Also, IID filters via scalar subqueries are harder for planner than direct join predicates. -```diff -@@ Work Chunk 1E: Composite Query Index --CREATE INDEX ... ON notes(project_id, author_username, created_at DESC, id DESC) WHERE is_system = 0; -+CREATE INDEX ... ON notes(project_id, author_username COLLATE NOCASE, created_at DESC, id DESC) WHERE is_system = 0; -+ -+CREATE INDEX IF NOT EXISTS idx_discussions_issue_id ON discussions(issue_id); -+CREATE INDEX IF NOT EXISTS idx_discussions_mr_id ON discussions(merge_request_id); -``` - -```diff -@@ Work Chunk 1A: query_notes() -- d.issue_id = (SELECT id FROM issues WHERE iid = ? AND project_id = ?) -+ i.iid = ? AND i.project_id = ? -- d.merge_request_id = (SELECT id FROM merge_requests WHERE iid = ? AND project_id = ?) -+ m.iid = ? AND m.project_id = ? -``` - -6. Replace manual CSV escaping with `csv` crate -Why: manual RFC4180 escaping is fragile (quotes/newlines/multi-byte edge cases). This is exactly where a mature library reduces long-term bug risk. -```diff -@@ Work Chunk 1C: Human & Robot Output Formatting -- Uses a minimal CSV writer (no external dependency — the format is simple enough for manual escaping). -+ Uses `csv::Writer` for RFC4180-compliant escaping and stable output across edge cases. -+ -+#### Tests to Write First -+#[test] fn test_csv_output_multiline_and_quotes_roundtrip() { ... } -``` - -7. Add `--contains` lexical body filter to `lore notes` -Why: useful middle ground between metadata filtering and semantic search; great for reviewer-pattern mining without requiring FTS query syntax. -```diff -@@ Work Chunk 1B: CLI Arguments & Command Wiring -+/// Filter by case-insensitive substring in note body -+#[arg(long, help_heading = "Filters")] -+pub contains: Option; -``` - -```diff -@@ Work Chunk 1A: NoteListFilters -+ pub contains: Option<&'a str>, -@@ query_notes dynamic filters -+ if contains.is_some() { -+ where_clauses.push("n.body LIKE ? COLLATE NOCASE"); -+ params.push(format!("%{}%", escape_like(contains.unwrap()))); -+ } -``` - -8. Reduce note-document embedding noise by slimming metadata header -Why: current verbose key-value header repeats low-signal tokens and consumes embedding budget. Keep context, but bias tokens toward actual review text. -```diff -@@ Work Chunk 2C: Note Document Extractor -- Build content with structured metadata header: -- [[Note]] -- source_type: note -- note_gitlab_id: ... -- project: ... -- ... -- --- Body --- -- {body} -+ Build content with compact, high-signal layout: -+ [[Note]] -+ @{author} on {Issue#|MR!}{iid} in {project_path} -+ path: {path:line} (only when available) -+ state: {resolved|unresolved} (only when resolvable) -+ -+ {body} -+ -+Keep detailed metadata in structured document columns/labels/paths/url, -+not repeated in verbose text. -``` - -9. Add explicit performance regression checks for the new hot paths -Why: this feature increases document volume ~4x; you should pin acceptable query behavior now so future changes don’t silently degrade. -```diff -@@ Verification Checklist -+Performance/plan checks: -+1) `EXPLAIN QUERY PLAN` for: -+ - author+since query -+ - project+date query -+ - for-mr / for-issue query -+2) Seed 50k-note synthetic fixture and assert: -+ - `lore notes --author ... --limit 100` stays under agreed local threshold -+ - `lore search --type note ...` remains deterministic and completes successfully -``` - -If you want, I can also provide a fully merged “iteration 3” PRD text with these edits applied end-to-end so you can drop it in directly. \ No newline at end of file diff --git a/docs/prd-per-note-search.feedback-4.md b/docs/prd-per-note-search.feedback-4.md deleted file mode 100644 index d1016c7..0000000 --- a/docs/prd-per-note-search.feedback-4.md +++ /dev/null @@ -1,187 +0,0 @@ -1. **Canonical note identity for documents: use `notes.gitlab_id` as `source_id`** -Why this is better: the current plan still couples document identity to local row IDs. Even with upsert+sweep, local IDs are a storage artifact and can be reused in edge cases. Using GitLab note IDs as canonical document IDs makes regeneration, backfill, and deletion propagation more stable and portable. - -```diff ---- a/PRD.md -+++ b/PRD.md -@@ Phase 0: Stable Note Identity --Phase 2 depends on `notes.id` as the `source_id` for note documents. -+Phase 2 uses `notes.gitlab_id` as the `source_id` for note documents. -+`notes.id` remains an internal relational key only. - -@@ Work Chunk 0A - pub struct NoteUpsertOutcome { - pub local_note_id: i64, -+ pub document_source_id: i64, // notes.gitlab_id - pub changed_semantics: bool, - } - -@@ Work Chunk 2D --if !note.is_system && outcome.changed_semantics { -- dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?; -+if !note.is_system && outcome.changed_semantics { -+ dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.document_source_id)?; - } - -@@ Work Chunk 2E --SELECT 'note', n.id, ?1 -+SELECT 'note', n.gitlab_id, ?1 - -@@ Work Chunk 2H --ON d.source_type = 'note' AND d.source_id = n.id -+ON d.source_type = 'note' AND d.source_id = n.gitlab_id -``` - -2. **Prevent false deletions on partial/incomplete syncs** -Why this is better: sweep-based deletion is correct only when a discussion’s notes were fully fetched. If a page fails mid-fetch, current logic can incorrectly delete valid notes. Add an explicit “fetch complete” guard before sweep. - -```diff ---- a/PRD.md -+++ b/PRD.md -@@ Phase 0 -+### Work Chunk 0C: Sweep Safety Guard (Partial Fetch Protection) -+ -+Only run stale-note sweep when note pagination completed successfully for that discussion. -+If fetch is partial/interrupted, skip sweep and keep prior notes intact. - -+#### Tests to Write First -+#[test] -+fn test_partial_fetch_does_not_sweep_notes() { /* ... */ } -+ -+#[test] -+fn test_complete_fetch_runs_sweep_notes() { /* ... */ } - -+#### Implementation -+if discussion_fetch_complete { -+ sweep_stale_issue_notes(...)?; -+} else { -+ tracing::warn!("Skipping stale sweep for discussion {} due to partial fetch", discussion_gitlab_id); -+} -``` - -3. **Make deletion propagation set-based (not per-note loop)** -Why this is better: the current per-note DELETE loop is O(N) statements and gets slow on large threads. A temp-table/CTE set-based delete is faster, simpler to reason about, and remains atomic. - -```diff ---- a/PRD.md -+++ b/PRD.md -@@ Work Chunk 0B Implementation -- for note_id in stale_note_ids { -- conn.execute("DELETE FROM documents WHERE source_type = 'note' AND source_id = ?", [note_id])?; -- conn.execute("DELETE FROM dirty_sources WHERE source_type = 'note' AND source_id = ?", [note_id])?; -- } -+ CREATE TEMP TABLE _stale_note_source_ids(source_id INTEGER PRIMARY KEY) WITHOUT ROWID; -+ INSERT INTO _stale_note_source_ids -+ SELECT gitlab_id -+ FROM notes -+ WHERE discussion_id = ? AND last_seen_at < ? AND is_system = 0; -+ -+ DELETE FROM notes -+ WHERE discussion_id = ? AND last_seen_at < ?; -+ -+ DELETE FROM documents -+ WHERE source_type = 'note' -+ AND source_id IN (SELECT source_id FROM _stale_note_source_ids); -+ -+ DELETE FROM dirty_sources -+ WHERE source_type = 'note' -+ AND source_id IN (SELECT source_id FROM _stale_note_source_ids); -+ -+ DROP TABLE _stale_note_source_ids; -``` - -4. **Fix project-scoping and time-window semantics in `lore notes`** -Why this is better: the plan currently has a contradiction: clap `requires = "project"` blocks use of `defaultProject`, while query layer says default fallback is allowed. Also, `since/until` parsing should use one shared “now” to avoid subtle drift and inverted windows. - -```diff ---- a/PRD.md -+++ b/PRD.md -@@ Work Chunk 1B NotesArgs --#[arg(long = "for-issue", ..., requires = "project")] -+#[arg(long = "for-issue", ...)] - pub for_issue: Option; - --#[arg(long = "for-mr", ..., requires = "project")] -+#[arg(long = "for-mr", ...)] - pub for_mr: Option; - -@@ Work Chunk 1A Query Notes --- `since`: `parse_since(since_str)` then `n.created_at >= ?` --- `until`: `parse_since(until_str)` then `n.created_at <= ?` -+- Parse `since` and `until` with a single anchored `now_ms` captured once per command. -+- If user supplies `YYYY-MM-DD` for `--until`, interpret as end-of-day (23:59:59.999 UTC). -+- Validate `since <= until` after both parse with same anchor. -``` - -5. **Add an analytics mode (not a profile command): `lore notes --aggregate`** -Why this is better: this directly supports the stated use case (review patterns) without introducing the rejected “profile report” command. It keeps scope narrow and reuses existing filters. - -```diff ---- a/PRD.md -+++ b/PRD.md -@@ Phase 1 -+### Work Chunk 1F: Aggregation Mode for Notes Listing -+ -+Add optional aggregation on top of `lore notes`: -+- `--aggregate author|note_type|path|resolution` -+- `--top N` (default 20) -+ -+Behavior: -+- Reuses all existing filters (`--since`, `--project`, `--for-mr`, etc.) -+- Returns grouped counts (+ percentage of filtered corpus) -+- Works in table/json/jsonl/csv -+ -+Non-goal alignment: -+- This is not a narrative “reviewer profile” command. -+- It is a query primitive for downstream analysis. -``` - -6. **Prevent note backfill from starving other document regeneration** -Why this is better: after migration/backfill, note dirty entries can dominate the queue and delay issue/MR/discussion updates. Add source-type fairness in regenerator scheduling. - -```diff ---- a/PRD.md -+++ b/PRD.md -@@ Work Chunk 2D -+#### Scheduling Revision -+Process dirty sources with weighted fairness instead of strict FIFO: -+- issue: 3 -+- merge_request: 3 -+- discussion: 2 -+- note: 1 -+ -+Implementation sketch: -+- fetch next batch by source_type buckets -+- interleave according to weights -+- preserve retry semantics per source - -+#### Tests to Write First -+#[test] -+fn test_note_backfill_does_not_starve_issue_and_mr_regeneration() { /* ... */ } -``` - -7. **Harden migration 023: remove invalid SQL assertions and move integrity checks to tests** -Why this is better: `RAISE(ABORT, ...)` in standalone `SELECT` is not valid SQLite usage outside triggers/check expressions. Keep migration SQL minimal/portable and enforce invariants in migration tests. - -```diff ---- a/PRD.md -+++ b/PRD.md -@@ Work Chunk 2A Migration SQL ---- Step 10: Integrity verification --SELECT CASE -- WHEN ... THEN RAISE(ABORT, '...') --END; -+-- Step 10 removed from SQL migration. -+-- Integrity verification is enforced in migration tests: -+-- 1) pre/post row-count equality -+-- 2) `PRAGMA foreign_key_check` is empty -+-- 3) documents_fts row count matches documents row count after rebuild - -@@ Work Chunk 2A Tests -+#[test] -+fn test_migration_023_integrity_checks_pass() { -+ // pre/post counts, foreign_key_check empty, fts parity -+} -``` - -These 7 revisions improve correctness under failure, reduce churn risk, improve large-sync performance, and make the feature materially more useful for reviewer-analysis workflows without reintroducing any rejected recommendations. \ No newline at end of file diff --git a/docs/prd-per-note-search.feedback-5.md b/docs/prd-per-note-search.feedback-5.md deleted file mode 100644 index 9c03177..0000000 --- a/docs/prd-per-note-search.feedback-5.md +++ /dev/null @@ -1,190 +0,0 @@ -Here are the highest-impact revisions I’d make. None of these repeat anything in your `## Rejected Recommendations`. - -1. **Add immutable reviewer identity (`author_id`) as a first-class key** -Why this improves the plan: the PRD’s core use case is year-scale reviewer profiling. Usernames are mutable in GitLab, so username-only filtering will fragment one reviewer into multiple identities over time. Adding `author_id` closes that correctness hole and makes historical analysis reliable. - -```diff -@@ Problem Statement --1. **Query individual notes by author** — the `--author` filter on `lore search` only matches the first note's author per discussion thread -+1. **Query individual notes by reviewer identity** — support both mutable username and immutable GitLab `author_id` for stable longitudinal analysis - -@@ Phase 0: Stable Note Identity -+### Work Chunk 0D: Immutable Author Identity Capture -+**Files:** `migrations/025_notes_author_id.sql`, `src/ingestion/discussions.rs`, `src/ingestion/mr_discussions.rs`, `src/cli/commands/list.rs` -+ -+#### Implementation -+- Add nullable `notes.author_id INTEGER` and backfill from future syncs. -+- Populate `author_id` from GitLab note payload (`note.author.id`) on both issue and MR note ingestion paths. -+- Add `--author-id ` filter to `lore notes`. -+- Keep `--author` for ergonomics; when both provided, require both to match. -+ -+#### Indexing -+- Add `idx_notes_author_id_created ON notes(project_id, author_id, created_at DESC, id DESC) WHERE is_system = 0;` -+ -+#### Tests -+- `test_query_notes_filter_author_id_survives_username_change` -+- `test_query_notes_author_and_author_id_intersection` -``` - -2. **Strengthen partial-fetch safety from a boolean to an explicit fetch state contract** -Why this improves the plan: `fetch_complete: bool` is easy to misuse and fragile under retries/crashes. A run-scoped state model makes sweep correctness auditable and prevents accidental deletions when ingestion aborts midway. - -```diff -@@ Phase 0: Stable Note Identity --### Work Chunk 0C: Sweep Safety Guard (Partial Fetch Protection) -+### Work Chunk 0C: Sweep Safety Guard with Run-Scoped Fetch State - -@@ Implementation --Add a `fetch_complete` parameter to the discussion ingestion functions. Only run the stale-note sweep when the fetch completed successfully: -+Add a run-scoped fetch state: -+- `FetchState::Complete` -+- `FetchState::Partial` -+- `FetchState::Failed` -+ -+Only run sweep on `FetchState::Complete`. -+Persist `run_seen_at` once per sync run and pass unchanged through all discussion/note upserts. -+Require `run_seen_at` monotonicity per discussion before sweep (skip and warn otherwise). - -@@ Tests to Write First -+#[test] -+fn test_failed_fetch_never_sweeps_even_after_partial_upserts() { ... } -+#[test] -+fn test_non_monotonic_run_seen_at_skips_sweep() { ... } -+#[test] -+fn test_retry_after_failed_fetch_then_complete_sweeps_correctly() { ... } -``` - -3. **Add DB-level cleanup triggers for note-document referential integrity** -Why this improves the plan: Work Chunk 0B handles the sweep path, but not every possible delete path. DB triggers give defense-in-depth so stale note docs cannot survive even if a future code path deletes notes differently. - -```diff -@@ Work Chunk 0B: Immediate Deletion Propagation --Update both sweep functions to propagate deletion to documents and dirty_sources using set-based SQL -+Keep set-based SQL in sweep functions, and add DB-level cleanup triggers as a safety net. - -@@ Work Chunk 2A: Schema Migration (023) -+-- Cleanup trigger: deleting a non-system note must delete note document + dirty queue row -+CREATE TRIGGER notes_ad_cleanup AFTER DELETE ON notes -+WHEN old.is_system = 0 -+BEGIN -+ DELETE FROM documents -+ WHERE source_type = 'note' AND source_id = old.id; -+ DELETE FROM dirty_sources -+ WHERE source_type = 'note' AND source_id = old.id; -+END; -+ -+-- Cleanup trigger: if note flips to system, remove its document artifacts -+CREATE TRIGGER notes_au_system_cleanup AFTER UPDATE OF is_system ON notes -+WHEN old.is_system = 0 AND new.is_system = 1 -+BEGIN -+ DELETE FROM documents -+ WHERE source_type = 'note' AND source_id = new.id; -+ DELETE FROM dirty_sources -+ WHERE source_type = 'note' AND source_id = new.id; -+END; -``` - -4. **Eliminate N+1 extraction cost with parent metadata caching in regeneration** -Why this improves the plan: backfilling ~8k notes with per-note parent/label lookups creates avoidable query amplification. Batch caching turns repeated joins into one-time lookups per parent entity and materially reduces rebuild time. - -```diff -@@ Phase 2: Per-Note Documents -+### Work Chunk 2I: Batch Parent Metadata Cache for Note Regeneration -+**Files:** `src/documents/regenerator.rs`, `src/documents/extractor.rs` -+ -+#### Implementation -+- Add `NoteExtractionContext` cache keyed by `(noteable_type, parent_id)` containing: -+ - parent iid/title/url -+ - parent labels -+ - project path -+- In batch regeneration, prefetch parent metadata for note IDs in the current chunk. -+- Use cached metadata in `extract_note_document()` to avoid repeated parent/label queries. -+ -+#### Tests -+- `test_note_regeneration_uses_parent_cache_consistently` -+- `test_note_regeneration_cache_hit_preserves_hash_determinism` -``` - -5. **Add embedding dedup cache keyed by semantic text hash** -Why this improves the plan: note docs will contain repeated short comments (“LGTM”, “nit: …”). Current doc-level hashing includes metadata, so identical semantic comments still re-embed many times. A semantic embedding hash cache cuts cost and speeds full rebuild/backfill without changing search behavior. - -```diff -@@ Phase 2: Per-Note Documents -+### Work Chunk 2J: Semantic Embedding Dedup for Notes -+**Files:** `migrations/026_embedding_cache.sql`, embedding pipeline module(s), `src/documents/extractor.rs` -+ -+#### Implementation -+- Compute `embedding_text` for notes as: normalized note body + compact stable context (`parent_type`, `path`, `resolution`), excluding volatile fields. -+- Compute `embedding_hash = sha256(embedding_text)`. -+- Before embedding generation, lookup existing vector by `(model, embedding_hash)`. -+- Reuse cached vector when present; only call embedding model on misses. -+ -+#### Tests -+- `test_identical_note_bodies_reuse_embedding_vector` -+- `test_embedding_hash_changes_when_semantic_context_changes` -``` - -6. **Add deterministic review-signal tags as derived labels** -Why this improves the plan: this makes output immediately more useful for reviewer-pattern analysis without adding a profile command (which is explicitly out of scope). It increases practical value of both `lore notes` and `lore search --type note` with low complexity. - -```diff -@@ Non-Goals --- Adding a "reviewer profile" report command (that's a downstream use case built on this infrastructure) -+- Adding a "reviewer profile" report command (downstream), while allowing low-level derived signal tags as indexing primitives - -@@ Phase 2: Per-Note Documents -+### Work Chunk 2K: Derived Review Signal Labels -+**Files:** `src/documents/extractor.rs` -+ -+#### Implementation -+- Derive deterministic labels from note text + metadata: -+ - `signal:nit` -+ - `signal:blocking` -+ - `signal:security` -+ - `signal:performance` -+ - `signal:testing` -+- Attach via existing `document_labels` flow for note documents. -+- No new CLI mode required; existing label filters can consume these labels. -+ -+#### Tests -+- `test_note_document_derives_signal_labels_nit` -+- `test_note_document_derives_signal_labels_security` -+- `test_signal_label_derivation_is_deterministic` -``` - -7. **Add high-precision note targeting filters (`--note-id`, `--gitlab-note-id`, `--discussion-id`)** -Why this improves the plan: debugging, incident response, and reproducibility all benefit from exact addressing. This is especially useful when validating sync correctness and cross-checking a specific note/document lifecycle. - -```diff -@@ Work Chunk 1B: CLI Arguments & Command Wiring - pub struct NotesArgs { -+ /// Filter by local note row id -+ #[arg(long = "note-id", help_heading = "Filters")] -+ pub note_id: Option, -+ -+ /// Filter by GitLab note id -+ #[arg(long = "gitlab-note-id", help_heading = "Filters")] -+ pub gitlab_note_id: Option, -+ -+ /// Filter by local discussion id -+ #[arg(long = "discussion-id", help_heading = "Filters")] -+ pub discussion_id: Option, - } - -@@ Work Chunk 1A: Filter struct - pub struct NoteListFilters<'a> { -+ pub note_id: Option, -+ pub gitlab_note_id: Option, -+ pub discussion_id: Option, - } - -@@ Tests to Write First -+#[test] -+fn test_query_notes_filter_note_id_exact() { ... } -+#[test] -+fn test_query_notes_filter_gitlab_note_id_exact() { ... } -+#[test] -+fn test_query_notes_filter_discussion_id_exact() { ... } -``` - -If you want, I can produce a single consolidated “iteration 5” PRD diff that merges these into your exact section ordering and updates the dependency graph/migration numbering end-to-end. \ No newline at end of file diff --git a/docs/who-command-design.feedback-1.md b/docs/who-command-design.feedback-1.md deleted file mode 100644 index b84728b..0000000 --- a/docs/who-command-design.feedback-1.md +++ /dev/null @@ -1,434 +0,0 @@ -Below are the highest-leverage revisions I’d make to this plan. I’m focusing on correctness pitfalls, SQLite gotchas, query performance on 280K notes, and reducing “dynamic SQL + param juggling” complexity—without turning this into a new ingestion project. - -Change 1 — Fix a hard SQLite bug in --active (GROUP_CONCAT DISTINCT + separator) -Why - -SQLite does not allow GROUP_CONCAT(DISTINCT x, sep). With DISTINCT, SQLite only permits a single argument (GROUP_CONCAT(DISTINCT x)). Your current query will error at runtime in many SQLite versions. - -Revision - -Use a subquery that selects distinct participants, then GROUP_CONCAT with your separator. - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ fn query_active(...) -- (SELECT GROUP_CONCAT(DISTINCT n.author_username, X'1F') -- FROM notes n -- WHERE n.discussion_id = d.id -- AND n.is_system = 0 -- AND n.author_username IS NOT NULL) AS participants -+ (SELECT GROUP_CONCAT(username, X'1F') FROM ( -+ SELECT DISTINCT n.author_username AS username -+ FROM notes n -+ WHERE n.discussion_id = d.id -+ AND n.is_system = 0 -+ AND n.author_username IS NOT NULL -+ ORDER BY username -+ )) AS participants - -Change 2 — Replace “contains('.') => exact file match” with segment-aware path classification -Why - -path.contains('.') misclassifies directories like: - -.github/workflows/ - -src/v1.2/auth/ - -It also fails the “root file” case (README.md) because your mode discriminator only treats paths as paths if they contain /. - -Revision - -Add explicit --path to force Expert mode (covers root files cleanly). - -Classify file-vs-dir by checking last path segment for a dot, and whether the input ends with /. - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ pub struct WhoArgs { -- /// Username or file path (path if contains /) -- pub target: Option, -+ /// Username or file path shorthand (ambiguous for root files like README.md) -+ pub target: Option, -+ -+ /// Force expert mode for a file/directory path (supports root files like README.md) -+ #[arg(long, help_heading = "Mode", conflicts_with_all = ["active", "overlap", "reviews"])] -+ pub path: Option, -@@ fn resolve_mode<'a>(args: &'a WhoArgs) -> Result> { -- if let Some(target) = &args.target { -+ if let Some(p) = &args.path { -+ return Ok(WhoMode::Expert { path: p }); -+ } -+ if let Some(target) = &args.target { - let clean = target.strip_prefix('@').unwrap_or(target); - if args.reviews { - return Ok(WhoMode::Reviews { username: clean }); - } -- // Disambiguation: if target contains '/', it's a file path. -- // GitLab usernames never contain '/'. -- if target.contains('/') { -+ // Disambiguation: -+ // - treat as path if it contains '/' -+ // - otherwise treat as username (root files require --path) -+ if target.contains('/') { - return Ok(WhoMode::Expert { path: target }); - } - return Ok(WhoMode::Workload { username: clean }); - } - - -And update the path pattern logic used by Expert/Overlap: - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ fn query_expert(...) -- // Normalize path for LIKE matching: add trailing % if no extension -- let path_pattern = if path.contains('.') { -- path.to_string() // Exact file match -- } else { -- let trimmed = path.trim_end_matches('/'); -- format!("{trimmed}/%") -- }; -+ // Normalize: -+ // - if ends_with('/') => directory prefix -+ // - else if last segment contains '.' => file exact match -+ // - else => directory prefix -+ let trimmed = path.trim_end_matches('/'); -+ let last = trimmed.rsplit('/').next().unwrap_or(trimmed); -+ let is_file = !path.ends_with('/') && last.contains('.'); -+ let path_pattern = if is_file { trimmed.to_string() } else { format!("{trimmed}/%") }; - -Change 3 — Stop building dynamic SQL strings for optional filters; always bind params -Why - -Right now you’re mixing: - -dynamic project_clause string fragments - -ad-hoc param vectors - -placeholder renumbering by branch - -That’s brittle and easy to regress (especially when you add more conditions later). SQLite/rusqlite can bind Option to NULL, which enables a simple pattern: - -sql -Copy code -AND (?3 IS NULL OR n.project_id = ?3) - -Revision (representative; apply to all queries) -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ fn query_expert(...) -- let project_clause = if project_id.is_some() { -- "AND n.project_id = ?3" -- } else { -- "" -- }; -- -- let sql = format!( -+ let sql = format!( - "SELECT username, role, activity_count, last_active_at FROM ( -@@ - FROM notes n - WHERE n.position_new_path LIKE ?1 - AND n.is_system = 0 - AND n.author_username IS NOT NULL - AND n.created_at >= ?2 -- {project_clause} -+ AND (?3 IS NULL OR n.project_id = ?3) -@@ - WHERE n.position_new_path LIKE ?1 - AND m.author_username IS NOT NULL - AND m.updated_at >= ?2 -- {project_clause} -+ AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY m.author_username -- )" -+ ) t" - ); -- -- let mut params: Vec> = Vec::new(); -- params.push(Box::new(path_pattern.clone())); -- params.push(Box::new(since_ms)); -- if let Some(pid) = project_id { -- params.push(Box::new(pid)); -- } -- let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect(); -+ let param_refs = rusqlite::params![path_pattern, since_ms, project_id]; - - -Notes: - -Adds required derived-table alias t (some SQLite configurations are stricter). - -Eliminates the dynamic param vector and placeholder gymnastics. - -Change 4 — Filter “path touch” queries to DiffNotes and escape LIKE properly -Why - -Only DiffNotes reliably have position_new_path; including other note types can skew counts and harm performance. - -LIKE treats % and _ as wildcards—rare in file paths, but not impossible (generated files, templates). Escaping is a low-cost robustness win. - -Revision - -Add note_type='DiffNote' and LIKE ... ESCAPE '\' plus a tiny escape helper. - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ fn query_expert(...) -- FROM notes n -- WHERE n.position_new_path LIKE ?1 -+ FROM notes n -+ WHERE n.note_type = 'DiffNote' -+ AND n.position_new_path LIKE ?1 ESCAPE '\' - AND n.is_system = 0 -@@ -diff --git a/Plan.md b/Plan.md -@@ Helper Functions -+fn escape_like(input: &str) -> String { -+ input.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_") -+} - - -And when building patterns: - -diff -Copy code -- let path_pattern = if is_file { trimmed.to_string() } else { format!("{trimmed}/%") }; -+ let base = escape_like(trimmed); -+ let path_pattern = if is_file { base } else { format!("{base}/%") }; - - -Apply the same changes to query_overlap and any other position_new_path LIKE .... - -Change 5 — Use note timestamps for “touch since” semantics (Expert/Overlap author branch) -Why - -In Expert/Overlap “author” branches you filter by m.updated_at >= since. That answers “MR updated recently” rather than “MR touched at this path recently”, which can surface stale ownership. - -Revision - -Filter by the note creation time (and use it for “last touch” where relevant). You can still compute author activity, but anchor it to note activity. - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ fn query_overlap(...) -- WHERE n.position_new_path LIKE ?1 -+ WHERE n.note_type = 'DiffNote' -+ AND n.position_new_path LIKE ?1 ESCAPE '\' - AND m.state IN ('opened', 'merged') - AND m.author_username IS NOT NULL -- AND m.updated_at >= ?2 -+ AND n.created_at >= ?2 - AND (?3 IS NULL OR m.project_id = ?3) - - -Same idea in Expert mode’s “MR authors” branch. - -Change 6 — Workload mode: apply --since consistently to unresolved discussions -Why - -Workload’s unresolved discussions ignore since_ms. That makes --since partially misleading and can dump very old threads. - -Revision - -Filter on d.last_note_at when since_ms is set. - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ fn query_workload(...) -- let disc_sql = format!( -+ let disc_since = if since_ms.is_some() { -+ "AND d.last_note_at >= ?2" -+ } else { "" }; -+ let disc_sql = format!( - "SELECT d.noteable_type, -@@ - WHERE d.resolvable = 1 AND d.resolved = 0 - AND EXISTS ( -@@ - ) - {disc_project_filter} -+ {disc_since} - ORDER BY d.last_note_at DESC - LIMIT {limit}" - ); -@@ -- // Rebuild params for discussion query (only username + optional project_id) -- let mut disc_params: Vec> = Vec::new(); -- disc_params.push(Box::new(username.to_string())); -- if let Some(pid) = project_id { -- disc_params.push(Box::new(pid)); -- } -+ // Params: username, since_ms, project_id (NULLs ok) -+ let disc_param_refs = rusqlite::params![username, since_ms, project_id]; - - -(If you adopt Change 3 fully, this becomes very clean.) - -Change 7 — Make Overlap results represent “both roles” instead of collapsing to one -Why - -Collapsing to a single role loses valuable info (“they authored and reviewed”). Also your current “prefer author” rule is arbitrary for the “who else is touching this” question. - -Revision - -Track role counts separately and render as A, R, or A+R. - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ pub struct OverlapUser { - pub username: String, -- pub role: String, -- pub touch_count: u32, -+ pub author_touch_count: u32, -+ pub review_touch_count: u32, -+ pub touch_count: u32, - pub last_touch_at: i64, - pub mr_iids: Vec, - } -@@ fn query_overlap(...) -- let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser { -+ let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser { - username: username.clone(), -- role: role.clone(), -+ author_touch_count: 0, -+ review_touch_count: 0, - touch_count: 0, - last_touch_at: 0, - mr_iids: Vec::new(), - }); - entry.touch_count += count; -+ if role == "author" { entry.author_touch_count += count; } -+ if role == "reviewer" { entry.review_touch_count += count; } -@@ human output -- println!( -- " {:<16} {:<8} {:>7} {:<12} {}", -+ println!( -+ " {:<16} {:<6} {:>7} {:<12} {}", - ... - ); -@@ -- user.role, -+ format_roles(user.author_touch_count, user.review_touch_count), - -Change 8 — Add an “Index Audit + optional migration” step (big perf win, low blast radius) -Why - -With 280K notes, the path/timestamp queries will degrade quickly without indexes. This isn’t “scope creep”; it’s making the feature usable. - -Revision (plan-level) - -Add a non-breaking migration that only creates indexes if missing. - -Optionally add a runtime check: if EXPLAIN QUERY PLAN indicates full table scan on notes, print a dim warning in human mode. - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ Implementation Order --| Step | What | Files | -+| Step | What | Files | - | 1 | CLI skeleton: `WhoArgs` + `Commands::Who` + dispatch + stub | `cli/mod.rs`, `commands/mod.rs`, `main.rs` | -+| 1.5 | Index audit + add `CREATE INDEX IF NOT EXISTS` migration for who hot paths | `migrations/0xx_who_indexes.sql` | -@@ - - -Suggested indexes (tune names to your conventions): - -notes(note_type, position_new_path, created_at) - -notes(discussion_id, is_system, author_username) - -discussions(resolvable, resolved, last_note_at, project_id) - -merge_requests(project_id, state, updated_at, author_username) - -issue_assignees(username, issue_id) - -Even if SQLite can’t perfectly index LIKE, these still help with join and timestamp filters. - -Change 9 — Make robot JSON reproducible by echoing the effective query inputs -Why - -Agent workflows benefit from a stable “query record”: what mode ran, what path/user, resolved project, effective since, limit. - -Revision - -Include an input object in JSON output. - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ struct WhoJsonData { - mode: String, -+ input: serde_json::Value, - #[serde(flatten)] - result: serde_json::Value, - } -@@ pub fn print_who_json(...) -- let output = WhoJsonEnvelope { -+ let input = serde_json::json!({ -+ "project": /* resolved or raw args.project */, -+ "since": /* resolved since ISO */, -+ "limit": /* args.limit */, -+ }); -+ let output = WhoJsonEnvelope { - ok: true, - data: WhoJsonData { - mode: mode.to_string(), -+ input, - result: data, - }, - meta: RobotMeta { elapsed_ms }, - }; - -Change 10 — Tighten clap constraints so invalid combinations never reach resolve_mode -Why - -Right now conflicts are enforced manually (or not at all). Clamp the invalid combos at the CLI layer: - ---active should conflict with target, --overlap, --reviews, --path - ---reviews should require a username (and should conflict with Expert path modes) - -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ pub struct WhoArgs { -- pub active: bool, -+ #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "overlap", "reviews", "path"])] -+ pub active: bool, -@@ -- pub overlap: Option, -+ #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "active", "reviews", "path"])] -+ pub overlap: Option, -@@ -- pub reviews: bool, -+ #[arg(long, help_heading = "Mode", requires = "target", conflicts_with_all = ["active", "overlap", "path"])] -+ pub reviews: bool, - -Summary of what I’d definitely change - -If you do nothing else, do these first: - -Fix GROUP_CONCAT(DISTINCT ..., sep) in Active mode (runtime error). - -Path classification: add --path, and stop using contains('.') globally. - -Remove dynamic SQL + param vectors: always bind project_id as nullable and use (? IS NULL OR ...). - -Filter to DiffNotes + LIKE escaping for correctness and fewer rows scanned. - -Optional index migration: otherwise this will feel slow/non-deterministically slow depending on local DB state. - -If you want, I can also provide a consolidated “v2 plan” as a single unified patch (one diff) rather than per-change snippets. \ No newline at end of file diff --git a/docs/who-command-design.feedback-2.md b/docs/who-command-design.feedback-2.md deleted file mode 100644 index e708903..0000000 --- a/docs/who-command-design.feedback-2.md +++ /dev/null @@ -1,303 +0,0 @@ -Below are the highest-leverage revisions I’d make to iteration 1 to tighten correctness, performance, and “agent usefulness” without blowing up scope. For each change: (1) rationale, (2) a focused unified diff against the plan you pasted. - -Change 1 — Make robot “input echo” actually resolved (project_id, project_path, since_ms/iso, mode) -Why - -Your Design Principle #5 says the robot envelope should echo resolved inputs (“effective since, resolved project”), but the current input object echoes only raw CLI strings. Agents can’t reliably reproduce or compare runs (e.g., fuzzy project resolution may map differently over time). - -This is also a reliability improvement: “what ran” should be computed once and propagated, not recomputed in output. - -Plan diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ --5. **Robot-first reproducibility.** Robot JSON output includes an `input` object echoing the resolved query parameters (effective since, resolved project, limit) so agents can trace exactly what ran. -+5. **Robot-first reproducibility.** Robot JSON output includes a `resolved_input` object (mode, since_ms + since_iso, resolved project_id + project_path, limit, db_path) so agents can trace exactly what ran. - -@@ --/// Main entry point. Resolves mode from args and dispatches. --pub fn run_who(config: &Config, args: &WhoArgs) -> Result { -+/// Main entry point. Resolves mode + resolved inputs once, then dispatches. -+pub fn run_who(config: &Config, args: &WhoArgs) -> Result { - let db_path = get_db_path(config.storage.db_path.as_deref()); - let conn = create_connection(&db_path)?; - -- let project_id = args -+ let project_id = args - .project - .as_deref() - .map(|p| resolve_project(&conn, p)) - .transpose()?; -+ let project_path = project_id -+ .map(|id| lookup_project_path(&conn, id)) -+ .transpose()?; - - let mode = resolve_mode(args)?; - - match mode { - WhoMode::Expert { path } => { - let since_ms = resolve_since(args.since.as_deref(), "6m")?; - let result = query_expert(&conn, path, project_id, since_ms, args.limit)?; -- Ok(WhoResult::Expert(result)) -+ Ok(WhoRun::new("expert", &db_path, project_id, project_path, since_ms, args.limit, WhoResult::Expert(result))) - } -@@ - } - } -+ -+/// Wrapper that carries resolved inputs for reproducible output. -+pub struct WhoRun { -+ pub mode: String, -+ pub resolved_input: WhoResolvedInput, -+ pub result: WhoResult, -+} -+ -+pub struct WhoResolvedInput { -+ pub db_path: String, -+ pub project_id: Option, -+ pub project_path: Option, -+ pub since_ms: i64, -+ pub since_iso: String, -+ pub limit: usize, -+} -@@ --pub fn print_who_json(result: &WhoResult, args: &WhoArgs, elapsed_ms: u64) { -- let (mode, data) = match result { -+pub fn print_who_json(run: &WhoRun, args: &WhoArgs, elapsed_ms: u64) { -+ let (mode, data) = match &run.result { - WhoResult::Expert(r) => ("expert", expert_to_json(r)), -@@ -- let input = serde_json::json!({ -+ let input = serde_json::json!({ - "target": args.target, - "path": args.path, - "project": args.project, - "since": args.since, - "limit": args.limit, - }); -+ -+ let resolved_input = serde_json::json!({ -+ "mode": run.mode, -+ "db_path": run.resolved_input.db_path, -+ "project_id": run.resolved_input.project_id, -+ "project_path": run.resolved_input.project_path, -+ "since_ms": run.resolved_input.since_ms, -+ "since_iso": run.resolved_input.since_iso, -+ "limit": run.resolved_input.limit, -+ }); -@@ -- data: WhoJsonData { -- mode: mode.to_string(), -- input, -- result: data, -- }, -+ data: WhoJsonData { mode: mode.to_string(), input, resolved_input, result: data }, - meta: RobotMeta { elapsed_ms }, - }; -@@ - struct WhoJsonData { - mode: String, - input: serde_json::Value, -+ resolved_input: serde_json::Value, - #[serde(flatten)] - result: serde_json::Value, - } - -Change 2 — Remove dynamic SQL format!(..LIMIT {limit}) and parameterize LIMIT everywhere -Why - -You explicitly prefer static SQL ((?N IS NULL OR ...)) to avoid subtle bugs; but Workload/Active use format! for LIMIT. Even though limit is typed, it’s an inconsistency that complicates statement caching and encourages future string assembly creep. - -SQLite supports LIMIT ? with bound parameters; rusqlite can bind an i64. - -Plan diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ -- let issues_sql = format!( -- "SELECT ... -- ORDER BY i.updated_at DESC -- LIMIT {limit}" -- ); -- let mut stmt = conn.prepare(&issues_sql)?; -+ let issues_sql = -+ "SELECT ... -+ ORDER BY i.updated_at DESC -+ LIMIT ?4"; -+ let mut stmt = conn.prepare(issues_sql)?; - let assigned_issues: Vec = stmt -- .query_map(rusqlite::params![username, project_id, since_ms], |row| { -+ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| { -@@ -- let authored_sql = format!( -- "SELECT ... -- ORDER BY m.updated_at DESC -- LIMIT {limit}" -- ); -- let mut stmt = conn.prepare(&authored_sql)?; -+ let authored_sql = -+ "SELECT ... -+ ORDER BY m.updated_at DESC -+ LIMIT ?4"; -+ let mut stmt = conn.prepare(authored_sql)?; -@@ -- .query_map(rusqlite::params![username, project_id, since_ms], |row| { -+ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| { -@@ -- let reviewing_sql = format!( -- "SELECT ... -- ORDER BY m.updated_at DESC -- LIMIT {limit}" -- ); -- let mut stmt = conn.prepare(&reviewing_sql)?; -+ let reviewing_sql = -+ "SELECT ... -+ ORDER BY m.updated_at DESC -+ LIMIT ?4"; -+ let mut stmt = conn.prepare(reviewing_sql)?; -@@ -- .query_map(rusqlite::params![username, project_id, since_ms], |row| { -+ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| { -@@ -- let disc_sql = format!( -- "SELECT ... -- ORDER BY d.last_note_at DESC -- LIMIT {limit}" -- ); -- let mut stmt = conn.prepare(&disc_sql)?; -+ let disc_sql = -+ "SELECT ... -+ ORDER BY d.last_note_at DESC -+ LIMIT ?4"; -+ let mut stmt = conn.prepare(disc_sql)?; -@@ -- .query_map(rusqlite::params![username, project_id, since_ms], |row| { -+ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| { -@@ -- let sql = format!( -- "SELECT ... -- ORDER BY d.last_note_at DESC -- LIMIT {limit}" -- ); -- let mut stmt = conn.prepare(&sql)?; -+ let sql = -+ "SELECT ... -+ ORDER BY d.last_note_at DESC -+ LIMIT ?3"; -+ let mut stmt = conn.prepare(sql)?; -@@ -- .query_map(rusqlite::params![since_ms, project_id], |row| { -+ .query_map(rusqlite::params![since_ms, project_id, limit as i64], |row| { - -Change 3 — Fix path matching for dotless files (LICENSE/Makefile) via “exact OR prefix” (no new flags) -Why - -Your improved “dot only in last segment” heuristic still fails on dotless files (LICENSE, Makefile, Dockerfile) which are common, especially at repo root. Right now they’ll be treated as directories (LICENSE/%) and silently return nothing. - -Best minimal UX: if user provides a path that’s ambiguous (no trailing slash), match either exact file OR directory prefix. - -Plan diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ --/// Build a LIKE pattern from a user-supplied path, with proper LIKE escaping. --/// --/// Rules: --/// - If the path ends with `/`, it's a directory prefix → `escaped_path%` --/// - If the last path segment contains `.`, it's a file → exact match --/// - Otherwise, it's a directory prefix → `escaped_path/%` -+/// Build an exact + prefix match from a user-supplied path, with proper LIKE escaping. -+/// -+/// Rules: -+/// - If the path ends with `/`, treat as directory-only (prefix match) -+/// - Otherwise, treat as ambiguous: exact match OR directory prefix -+/// (fixes dotless files like LICENSE/Makefile without requiring new flags) -@@ --fn build_path_pattern(path: &str) -> String { -+struct PathMatch { -+ exact: String, -+ prefix: String, -+ dir_only: bool, -+} -+ -+fn build_path_match(path: &str) -> PathMatch { - let trimmed = path.trim_end_matches('/'); -- let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); -- let is_file = !path.ends_with('/') && last_segment.contains('.'); - let escaped = escape_like(trimmed); -- -- if is_file { -- escaped -- } else { -- format!("{escaped}/%") -- } -+ PathMatch { -+ exact: escaped.clone(), -+ prefix: format!("{escaped}/%"), -+ dir_only: path.ends_with('/'), -+ } - } -@@ -- let path_pattern = build_path_pattern(path); -+ let pm = build_path_match(path); -@@ -- AND n.position_new_path LIKE ?1 ESCAPE '\\' -+ AND ( -+ (?4 = 1 AND n.position_new_path LIKE ?2 ESCAPE '\\') -+ OR (?4 = 0 AND (n.position_new_path = ?1 OR n.position_new_path LIKE ?2 ESCAPE '\\')) -+ ) -@@ -- let rows: Vec<(String, String, u32, i64)> = stmt -- .query_map(rusqlite::params![path_pattern, since_ms, project_id], |row| { -+ let rows: Vec<(String, String, u32, i64)> = stmt -+ .query_map(rusqlite::params![pm.exact, pm.prefix, since_ms, i32::from(pm.dir_only), project_id], |row| { - Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) - })? - - -(Apply the same pattern to Overlap mode.) - -Change 4 — Consistently exclude system notes in all DiffNote-based branches (Expert/Overlap author branches currently don’t) -Why - -You filter n.is_system = 0 for reviewer branches, but not in the author branches of Expert/Overlap. That can skew “author touch” via system-generated diff notes or bot activity. - -Consistency here improves correctness and also enables more aggressive partial indexing. - -Plan diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ -- WHERE n.note_type = 'DiffNote' -+ WHERE n.note_type = 'DiffNote' - AND n.position_new_path LIKE ?1 ESCAPE '\\' -+ AND n.is_system = 0 - AND m.author_username IS NOT NULL - AND n.created_at >= ?2 - AND (?3 IS NULL OR m.project_id = ?3) -@@ -- WHERE n.note_type = 'DiffNote' -+ WHERE n.note_type = 'DiffNote' - AND n.position_new_path LIKE ?1 ESCAPE '\\' -+ AND n.is_system = 0 - AND m.state IN ('opened', 'merged') - AND m.author_username IS NOT NULL - AND n.created_at >= ?2 - AND (?3 IS NULL OR m.project_id = ?3) - -Change 5 — Rework Migration 017 indexes to match real predicates + add one critical notes index for discussion participation -Why - -(a) idx_notes_diffnote_path_created currently leads with note_type even though it’s constant via partial index. You want the leading columns to match your most selective predicates: position_new_path prefix + created_at range, with optional project_id. - -(b) Active + Workload discussion participation repeatedly hits notes by (discussion_id, author_username); you only guarantee notes(discussion_id) is indexed. Adding a narrow partial composite index pays off immediately for both “participants” and “EXISTS user participated” checks. - -(c) The discussions index should focus on (project_id, last_note_at) with a partial predicate; resolvable/resolved a_ \ No newline at end of file diff --git a/docs/who-command-design.feedback-3.md b/docs/who-command-design.feedback-3.md deleted file mode 100644 index 5d03da0..0000000 --- a/docs/who-command-design.feedback-3.md +++ /dev/null @@ -1,471 +0,0 @@ -Below are the revisions I’d make to iteration 2 to improve correctness, determinism, query-plan quality, and multi-project usability without turning this into a bigger product. - -I’m treating your plan as the “source of truth” and showing git-diff style patches against the plan text/code blocks you included. - -Change 1 — Fix project scoping to hit the right index (DiffNote branches) -Why - -Your hot-path index is: - -idx_notes_diffnote_path_created ON notes(position_new_path, created_at, project_id) WHERE note_type='DiffNote' AND is_system=0 - -But in Expert/Overlap you sometimes scope by m.project_id = ?3 (MR table), not n.project_id = ?3 (notes table). That weakens the optimizer’s ability to use the composite notes index (and can force broader joins before filtering). - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ Query: Expert Mode @@ -- AND (?3 IS NULL OR m.project_id = ?3) -+ -- IMPORTANT: scope on notes.project_id to maximize use of -+ -- idx_notes_diffnote_path_created (notes is the selective table) -+ AND (?3 IS NULL OR n.project_id = ?3) - -@@ Query: Overlap Mode @@ -- AND (?3 IS NULL OR m.project_id = ?3) -+ AND (?3 IS NULL OR n.project_id = ?3) - -@@ Query: Overlap Mode (author branch) @@ -- AND (?3 IS NULL OR m.project_id = ?3) -+ AND (?3 IS NULL OR n.project_id = ?3) - -Change 2 — Introduce a “prefix vs exact” path query to avoid LIKE when you don’t need it -Why - -For exact file paths (e.g. src/auth/login.rs), you currently do: - -position_new_path LIKE ?1 ESCAPE '\' where ?1 has no wildcard - -That’s logically fine, but it’s a worse signal to the planner than = and can degrade performance depending on collation/case settings. - -This doesn’t violate “static SQL” — you can pick between two static query strings. - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ Helper: Path Pattern Construction @@ --fn build_path_pattern(path: &str) -> String { -+struct PathQuery { -+ /// The parameter value to bind. -+ value: String, -+ /// If true: use LIKE value || '%'. If false: use '='. -+ is_prefix: bool, -+} -+ -+fn build_path_query(path: &str) -> PathQuery { - let trimmed = path.trim_end_matches('/'); - let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); - let is_file = !path.ends_with('/') && last_segment.contains('.'); - let escaped = escape_like(trimmed); - - if is_file { -- escaped -+ PathQuery { value: escaped, is_prefix: false } - } else { -- format!("{escaped}/%") -+ PathQuery { value: format!("{escaped}/%"), is_prefix: true } - } - } - - -And then (example for DiffNote predicates): - -diff -Copy code -@@ Query: Expert Mode @@ -- let path_pattern = build_path_pattern(path); -+ let pq = build_path_query(path); - -- let sql = " ... n.position_new_path LIKE ?1 ESCAPE '\\' ... "; -+ let sql_prefix = " ... n.position_new_path LIKE ?1 ESCAPE '\\' ... "; -+ let sql_exact = " ... n.position_new_path = ?1 ... "; - -- let mut stmt = conn.prepare(sql)?; -+ let mut stmt = if pq.is_prefix { conn.prepare_cached(sql_prefix)? } -+ else { conn.prepare_cached(sql_exact)? }; - let rows = stmt.query_map(params![... pq.value ...], ...); - -Change 3 — Push Expert aggregation into SQL (less Rust, fewer rows, SQL-level LIMIT) -Why - -Right now Expert does: - -UNION ALL - -return per-role rows - -HashMap merge - -score compute - -sort/truncate - -You can do all of that in SQL deterministically, then LIMIT ?N actually works. - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ Query: Expert Mode @@ -- let sql = "SELECT username, role, activity_count, last_active_at FROM ( -- ... -- )"; -+ let sql = " -+ WITH activity AS ( -+ SELECT -+ n.author_username AS username, -+ 'reviewer' AS role, -+ COUNT(*) AS cnt, -+ MAX(n.created_at) AS last_active_at -+ FROM notes n -+ WHERE n.note_type = 'DiffNote' -+ AND n.is_system = 0 -+ AND n.author_username IS NOT NULL -+ AND n.created_at >= ?2 -+ AND (?3 IS NULL OR n.project_id = ?3) -+ AND ( -+ (?4 = 1 AND n.position_new_path LIKE ?1 ESCAPE '\\') OR -+ (?4 = 0 AND n.position_new_path = ?1) -+ ) -+ GROUP BY n.author_username -+ -+ UNION ALL -+ -+ SELECT -+ m.author_username AS username, -+ 'author' AS role, -+ COUNT(DISTINCT m.id) AS cnt, -+ MAX(n.created_at) AS last_active_at -+ FROM merge_requests m -+ JOIN discussions d ON d.merge_request_id = m.id -+ JOIN notes n ON n.discussion_id = d.id -+ WHERE n.note_type = 'DiffNote' -+ AND n.is_system = 0 -+ AND m.author_username IS NOT NULL -+ AND n.created_at >= ?2 -+ AND (?3 IS NULL OR n.project_id = ?3) -+ AND ( -+ (?4 = 1 AND n.position_new_path LIKE ?1 ESCAPE '\\') OR -+ (?4 = 0 AND n.position_new_path = ?1) -+ ) -+ GROUP BY m.author_username -+ ) -+ SELECT -+ username, -+ SUM(CASE WHEN role='reviewer' THEN cnt ELSE 0 END) AS review_count, -+ SUM(CASE WHEN role='author' THEN cnt ELSE 0 END) AS author_count, -+ MAX(last_active_at) AS last_active_at, -+ (SUM(CASE WHEN role='reviewer' THEN cnt ELSE 0 END) * 3.0) + -+ (SUM(CASE WHEN role='author' THEN cnt ELSE 0 END) * 2.0) AS score -+ FROM activity -+ GROUP BY username -+ ORDER BY score DESC, last_active_at DESC, username ASC -+ LIMIT ?5 -+ "; - -- // Aggregate by username: combine reviewer + author counts -- let mut user_map: HashMap<...> = HashMap::new(); -- ... -- experts.sort_by(...); experts.truncate(limit); -+ // No Rust-side merge/sort needed; SQL already returns final rows. - -Change 4 — Overlap output is ambiguous across projects: include stable MR refs (project_path!iid) -Why - -mr_iids: Vec is ambiguous in a multi-project DB. !123 only means something with a project. - -Also: your MR IID dedup is currently Vec.contains() inside a loop (O(n²)). Use a HashSet. - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ OverlapResult @@ - pub struct OverlapUser { - pub username: String, -@@ -- pub mr_iids: Vec, -+ /// Stable MR references like "group/project!123" -+ pub mr_refs: Vec, - } - -@@ Query: Overlap Mode (SQL) @@ -- GROUP_CONCAT(DISTINCT m.iid) AS mr_iids -+ GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM notes n - JOIN discussions d ON n.discussion_id = d.id - JOIN merge_requests m ON d.merge_request_id = m.id -+ JOIN projects p ON m.project_id = p.id -@@ -- GROUP_CONCAT(DISTINCT m.iid) AS mr_iids -+ GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM merge_requests m - JOIN discussions d ON d.merge_request_id = m.id - JOIN notes n ON n.discussion_id = d.id -+ JOIN projects p ON m.project_id = p.id - -@@ Query: Overlap Mode (Rust merge) @@ -- let mr_iids: Vec = mr_iids_csv ... -+ let mr_refs: Vec = mr_refs_csv -+ .as_deref() -+ .map(|csv| csv.split(',').map(|s| s.trim().to_string()).collect()) -+ .unwrap_or_default(); -@@ -- // Merge MR IIDs, deduplicate -- for iid in &mr_iids { -- if !entry.mr_iids.contains(iid) { -- entry.mr_iids.push(*iid); -- } -- } -+ // Merge MR refs, deduplicate -+ use std::collections::HashSet; -+ let mut set: HashSet = entry.mr_refs.drain(..).collect(); -+ for r in mr_refs { set.insert(r); } -+ entry.mr_refs = set.into_iter().collect(); - -Change 5 — Active mode: avoid correlated subqueries by preselecting discussions, then aggregating notes once -Why - -Your Active query does two correlated subqueries per discussion row: - -note_count - -participants - -With LIMIT 20 it’s not catastrophic, but it is still unnecessary work and creates “spiky” behavior if the planner chooses poorly. - -Pattern to use: - -CTE selects the limited set of discussions - -Join notes once, aggregate with GROUP BY - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ Query: Active Mode @@ -- let sql = -- "SELECT -- d.noteable_type, -- ... -- (SELECT COUNT(*) FROM notes n -- WHERE n.discussion_id = d.id AND n.is_system = 0) AS note_count, -- (SELECT GROUP_CONCAT(username, X'1F') FROM ( -- SELECT DISTINCT n.author_username AS username -- FROM notes n -- WHERE n.discussion_id = d.id -- AND n.is_system = 0 -- AND n.author_username IS NOT NULL -- ORDER BY username -- )) AS participants -- FROM discussions d -- ... -- LIMIT ?3"; -+ let sql = " -+ WITH picked AS ( -+ SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, d.project_id, d.last_note_at -+ FROM discussions d -+ WHERE d.resolvable = 1 AND d.resolved = 0 -+ AND d.last_note_at >= ?1 -+ AND (?2 IS NULL OR d.project_id = ?2) -+ ORDER BY d.last_note_at DESC -+ LIMIT ?3 -+ ), -+ note_agg AS ( -+ SELECT -+ n.discussion_id, -+ COUNT(*) AS note_count, -+ GROUP_CONCAT(n.author_username, X'1F') AS participants -+ FROM ( -+ SELECT DISTINCT discussion_id, author_username -+ FROM notes -+ WHERE is_system = 0 AND author_username IS NOT NULL -+ ) n -+ JOIN picked p ON p.id = n.discussion_id -+ GROUP BY n.discussion_id -+ ) -+ SELECT -+ p.noteable_type, -+ COALESCE(i.iid, m.iid) AS entity_iid, -+ COALESCE(i.title, m.title) AS entity_title, -+ proj.path_with_namespace, -+ p.last_note_at, -+ COALESCE(na.note_count, 0) AS note_count, -+ COALESCE(na.participants, '') AS participants -+ FROM picked p -+ JOIN projects proj ON p.project_id = proj.id -+ LEFT JOIN issues i ON p.issue_id = i.id -+ LEFT JOIN merge_requests m ON p.merge_request_id = m.id -+ LEFT JOIN note_agg na ON na.discussion_id = p.id -+ ORDER BY p.last_note_at DESC -+ "; - -Change 6 — Use prepare_cached() everywhere (cheap perf win, no scope creep) -Why - -You already worked hard to keep SQL static. Taking advantage of sqlite statement caching completes the loop. - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ Query functions @@ -- let mut stmt = conn.prepare(sql)?; -+ let mut stmt = conn.prepare_cached(sql)?; - - -Apply in all query fns (query_workload, query_reviews, query_active, query_expert, query_overlap, lookup_project_path). - -Change 7 — Human output: show project_path where ambiguity exists (Workload + Overlap) -Why - -When not project-scoped, #42 and !100 aren’t unique. You already have project paths in the query results — you’re just not printing them. - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ print_workload_human @@ -- println!( -- " {} {} {}", -+ println!( -+ " {} {} {} {}", - style(format!("#{:<5}", item.iid)).cyan(), - truncate_str(&item.title, 45), - style(format_relative_time(item.updated_at)).dim(), -+ style(&item.project_path).dim(), - ); - -@@ print_workload_human (MRs) @@ -- println!( -- " {} {}{} {}", -+ println!( -+ " {} {}{} {} {}", - style(format!("!{:<5}", mr.iid)).cyan(), - truncate_str(&mr.title, 40), - style(draft).dim(), - style(format_relative_time(mr.updated_at)).dim(), -+ style(&mr.project_path).dim(), - ); - -@@ print_overlap_human @@ -- let mr_str = user.mr_iids.iter().take(5).map(|iid| format!("!{iid}")).collect::>().join(", "); -+ let mr_str = user.mr_refs.iter().take(5).cloned().collect::>().join(", "); - -Change 8 — Robot JSON: add stable IDs + “defaulted” flags for reproducibility -Why - -You already added resolved_input — good. Two more reproducibility gaps remain: - -Agents can’t reliably “open” an entity without IDs (discussion_id, mr_id, issue_id). - -Agents can’t tell whether since was user-provided vs defaulted (important when replaying intent). - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ WhoResolvedInput @@ - pub struct WhoResolvedInput { -@@ - pub since_ms: Option, - pub since_iso: Option, -+ pub since_was_default: bool, - pub limit: usize, - } - -@@ run_who @@ -- let since_ms = resolve_since(args.since.as_deref(), "6m")?; -+ let since_was_default = args.since.is_none(); -+ let since_ms = resolve_since(args.since.as_deref(), "6m")?; - Ok(WhoRun { - resolved_input: WhoResolvedInput { -@@ - since_ms: Some(since_ms), - since_iso: Some(ms_to_iso(since_ms)), -+ since_was_default, - limit: args.limit, - }, - -@@ print_who_json resolved_input @@ - let resolved_input = serde_json::json!({ -@@ - "since_ms": run.resolved_input.since_ms, - "since_iso": run.resolved_input.since_iso, -+ "since_was_default": run.resolved_input.since_was_default, - "limit": run.resolved_input.limit, - }); - - -And for Active/Workload discussion items, add IDs in SQL and JSON: - -diff -Copy code -@@ ActiveDiscussion @@ - pub struct ActiveDiscussion { -+ pub discussion_id: i64, -@@ - } - -@@ query_active SELECT @@ -- SELECT -- p.noteable_type, -+ SELECT -+ p.id AS discussion_id, -+ p.noteable_type, - -@@ active_to_json @@ -- "discussions": r.discussions.iter().map(|d| json!({ -+ "discussions": r.discussions.iter().map(|d| json!({ -+ "discussion_id": d.discussion_id, - ... - })) - -Change 9 — Make performance verification explicit: require EXPLAIN QUERY PLAN checks for each mode -Why - -You’re adding indexes specifically for these queries. The only way to ensure the planner is doing what you think is to lock in a short perf checklist (especially after schema drift or SQLite version differences). - -Diff -diff -Copy code ---- a/who-command-design.md -+++ b/who-command-design.md -@@ Verification @@ - # Manual verification against real data - cargo run --release -- who src/features/global-search/ -@@ - cargo run --release -- who src/features/global-search/ -p typescript # project scoped -+ -+# Perf verification (required before merge): -+# Confirm idx_notes_diffnote_path_created is used for Expert/Overlap and -+# idx_discussions_unresolved_recent is used for Active. -+sqlite3 path/to/db.sqlite " -+ EXPLAIN QUERY PLAN -+ SELECT ... -- paste final Expert SQL with representative bindings -+"; - - -(Keep it lightweight: one representative query per mode is enough.) - -Net effect - -Correctness: project scoping hits the notes index; IDs added for agent workflows. - -Performance: fewer rows/materialization in Expert; statement caching everywhere; Active avoids correlated subqueries. - -UX: human output no longer ambiguous across projects; Overlap MR references become actionable. - -Reproducibility: agents can distinguish defaults vs explicit inputs; can dereference entities reliably. - -If you want one “highest ROI” subset to implement first: Change 1 + Change 4 + Change 6 + Change 7. That’s where the real operational value lands. \ No newline at end of file diff --git a/docs/who-command-design.feedback-4.md b/docs/who-command-design.feedback-4.md deleted file mode 100644 index 802fb1e..0000000 --- a/docs/who-command-design.feedback-4.md +++ /dev/null @@ -1,3 +0,0 @@ -ChatGPT said: -Proposing code revisions for performance and determinism -Answer now \ No newline at end of file diff --git a/docs/who-command-design.feedback-5.md b/docs/who-command-design.feedback-5.md deleted file mode 100644 index 6686dbf..0000000 --- a/docs/who-command-design.feedback-5.md +++ /dev/null @@ -1,356 +0,0 @@ -Below are the highest-leverage revisions I’d make. They’re tightly scoped (no new tables/APIs), but fix a few real correctness issues and make the outputs more actionable. - -1) Fix a correctness bug in PathQuery: don’t escape for =, and make --path Makefile actually work -Why - -Bug: build_path_query() currently runs escape_like() even when is_prefix = false (exact match). That will break exact matches for paths containing _, %, or \ because = does not treat those as metacharacters (so the escaped string won’t equal the stored path). - -UX mismatch: The plan says --path handles dotless root files (Makefile/LICENSE), but the current logic still treats them as directory prefixes (Makefile/%) → zero results. - -Change - -Only escape for LIKE. - -Treat root paths (no /) passed via --path as exact matches by default (unless they end with /). - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ --/// Build a path query from a user-supplied path. --/// --/// Rules: --/// - If the path ends with `/`, it's a directory prefix -> `escaped_path%` (LIKE) --/// - If the last path segment contains `.`, it's a file -> exact match (=) --/// - Otherwise, it's a directory prefix -> `escaped_path/%` (LIKE) -+/// Build a path query from a user-supplied path. -+/// -+/// Rules: -+/// - If the path ends with `/`, it's a directory prefix -> `escaped_path/%` (LIKE) -+/// - If the path is a root path (no `/`) and does NOT end with `/`, treat as exact (=) -+/// (this makes `--path Makefile` and `--path LICENSE` work as intended) -+/// - Else if the last path segment contains `.`, treat as exact (=) -+/// - Otherwise, treat as directory prefix -> `escaped_path/%` (LIKE) -@@ --fn build_path_query(path: &str) -> PathQuery { -+fn build_path_query(path: &str) -> PathQuery { - let trimmed = path.trim_end_matches('/'); - let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); -- let is_file = !path.ends_with('/') && last_segment.contains('.'); -- let escaped = escape_like(trimmed); -+ let is_root = !trimmed.contains('/'); -+ let is_file = !path.ends_with('/') && (is_root || last_segment.contains('.')); - - if is_file { - PathQuery { -- value: escaped, -+ // IMPORTANT: do NOT escape for exact match (=) -+ value: trimmed.to_string(), - is_prefix: false, - } - } else { -+ let escaped = escape_like(trimmed); - PathQuery { - value: format!("{escaped}/%"), - is_prefix: true, - } - } - } -@@ --/// **Known limitation:** Dotless root files (LICENSE, Makefile, Dockerfile) --/// without a trailing `/` will be treated as directory prefixes. Use `--path` --/// for these — the `--path` flag passes through to Expert mode directly, --/// and the `build_path_query` output for "LICENSE" is a prefix `LICENSE/%` --/// which will simply return zero results (a safe, obvious failure mode that the --/// help text addresses). -+/// Note: Root file paths passed via `--path` (including dotless files like Makefile/LICENSE) -+/// are treated as exact matches unless they end with `/`. - - -Also update the --path help text to be explicit: - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ -- /// Force expert mode for a file/directory path (handles root files like -- /// README.md, LICENSE, Makefile that lack a / and can't be auto-detected) -+ /// Force expert mode for a file/directory path. -+ /// Root files (README.md, LICENSE, Makefile) are treated as exact matches. -+ /// Use a trailing `/` to force directory-prefix matching. - -2) Fix Active mode: your note_count is currently counting participants, and the CTE scans too broadly -Why - -In note_agg, you do SELECT DISTINCT discussion_id, author_username and then COUNT(*) AS note_count. That’s participant count, not note count. - -The current note_agg also builds the DISTINCT set from all notes then joins to picked. It’s avoidable work. - -Change - -Split into two aggregations scoped to picked: - -note_counts: counts non-system notes per picked discussion. - -participants: distinct usernames per picked discussion, then GROUP_CONCAT. - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ -- note_agg AS ( -- SELECT -- n.discussion_id, -- COUNT(*) AS note_count, -- GROUP_CONCAT(n.author_username, X'1F') AS participants -- FROM ( -- SELECT DISTINCT discussion_id, author_username -- FROM notes -- WHERE is_system = 0 AND author_username IS NOT NULL -- ) n -- JOIN picked p ON p.id = n.discussion_id -- GROUP BY n.discussion_id -- ) -+ note_counts AS ( -+ SELECT -+ n.discussion_id, -+ COUNT(*) AS note_count -+ FROM notes n -+ JOIN picked p ON p.id = n.discussion_id -+ WHERE n.is_system = 0 -+ GROUP BY n.discussion_id -+ ), -+ participants AS ( -+ SELECT -+ x.discussion_id, -+ GROUP_CONCAT(x.author_username, X'1F') AS participants -+ FROM ( -+ SELECT DISTINCT n.discussion_id, n.author_username -+ FROM notes n -+ JOIN picked p ON p.id = n.discussion_id -+ WHERE n.is_system = 0 AND n.author_username IS NOT NULL -+ ) x -+ GROUP BY x.discussion_id -+ ) -@@ -- LEFT JOIN note_agg na ON na.discussion_id = p.id -+ LEFT JOIN note_counts nc ON nc.discussion_id = p.id -+ LEFT JOIN participants pa ON pa.discussion_id = p.id -@@ -- COALESCE(na.note_count, 0) AS note_count, -- COALESCE(na.participants, '') AS participants -+ COALESCE(nc.note_count, 0) AS note_count, -+ COALESCE(pa.participants, '') AS participants - - -Net effect: correctness fix + more predictable perf. - -Add a test that would have failed before: - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ - #[test] - fn test_active_query() { -@@ -- insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/foo.rs", "needs work"); -+ insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/foo.rs", "needs work"); -+ insert_diffnote(&conn, 2, 1, 1, "reviewer_b", "src/foo.rs", "follow-up"); -@@ -- assert_eq!(result.discussions[0].participants, vec!["reviewer_b"]); -+ assert_eq!(result.discussions[0].participants, vec!["reviewer_b"]); -+ assert_eq!(result.discussions[0].note_count, 2); - -3) Index fix: idx_discussions_unresolved_recent won’t help global --active ordering -Why - -Your index is (project_id, last_note_at) with WHERE resolvable=1 AND resolved=0. - -When --active is not project-scoped (common default), SQLite can’t use (project_id, last_note_at) to satisfy ORDER BY last_note_at DESC efficiently because project_id isn’t constrained. - -This can turn into a scan+sort over potentially large unresolved sets. - -Change - -Keep the project-scoped index, but add a global ordering index (partial, still small): - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ - CREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent - ON discussions(project_id, last_note_at) - WHERE resolvable = 1 AND resolved = 0; -+ -+-- Active (global): unresolved discussions by recency (no project scope). -+-- Supports ORDER BY last_note_at DESC LIMIT N when project_id is unconstrained. -+CREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent_global -+ ON discussions(last_note_at) -+ WHERE resolvable = 1 AND resolved = 0; - -4) Make Overlap “touches” coherent: count MRs for reviewers, not DiffNotes -Why - -Overlap’s question is “Who else has MRs touching my files?” but: - -reviewer branch uses COUNT(*) (DiffNotes) - -author branch uses COUNT(DISTINCT m.id) (MRs) - -Those are different units; summing them into touch_count is misleading. - -Change - -Count distinct MRs on the reviewer branch too: - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ -- COUNT(*) AS touch_count, -+ COUNT(DISTINCT m.id) AS touch_count, - MAX(n.created_at) AS last_touch_at, - GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - - -Also update human output labeling: - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ -- style("Touches").bold(), -+ style("MRs").bold(), - - -(You still preserve “strength” via mr_refs and last_touch_at.) - -5) Make outputs more actionable: add a canonical ref field (group/project!iid, group/project#iid) -Why - -You already do this for Overlap (mr_refs). Doing the same for Workload and Active reduces friction for both humans and agents: - -humans can copy/paste a single token - -robots don’t need to stitch project_path + iid + prefix - -Change (Workload structs + SQL) -diff -Copy code -diff --git a/plan.md b/plan.md -@@ - pub struct WorkloadIssue { - pub iid: i64, -+ pub ref_: String, - pub title: String, - pub project_path: String, - pub updated_at: i64, - } -@@ - pub struct WorkloadMr { - pub iid: i64, -+ pub ref_: String, - pub title: String, - pub draft: bool, - pub project_path: String, -@@ -- let issues_sql = -- "SELECT i.iid, i.title, p.path_with_namespace, i.updated_at -+ let issues_sql = -+ "SELECT i.iid, -+ (p.path_with_namespace || '#' || i.iid) AS ref, -+ i.title, p.path_with_namespace, i.updated_at -@@ -- iid: row.get(0)?, -- title: row.get(1)?, -- project_path: row.get(2)?, -- updated_at: row.get(3)?, -+ iid: row.get(0)?, -+ ref_: row.get(1)?, -+ title: row.get(2)?, -+ project_path: row.get(3)?, -+ updated_at: row.get(4)?, - }) -@@ -- let authored_sql = -- "SELECT m.iid, m.title, m.draft, p.path_with_namespace, m.updated_at -+ let authored_sql = -+ "SELECT m.iid, -+ (p.path_with_namespace || '!' || m.iid) AS ref, -+ m.title, m.draft, p.path_with_namespace, m.updated_at -@@ -- iid: row.get(0)?, -- title: row.get(1)?, -- draft: row.get::<_, i32>(2)? != 0, -- project_path: row.get(3)?, -+ iid: row.get(0)?, -+ ref_: row.get(1)?, -+ title: row.get(2)?, -+ draft: row.get::<_, i32>(3)? != 0, -+ project_path: row.get(4)?, - author_username: None, -- updated_at: row.get(4)?, -+ updated_at: row.get(5)?, - }) - - -Then use ref_ in human output + robot JSON. - -6) Reviews mode: tolerate leading whitespace before **prefix** -Why - -Many people write " **suggestion**: ...". Current LIKE '**%**%' misses that. - -Change - -Use ltrim(n.body) consistently: - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ -- AND n.body LIKE '**%**%' -+ AND ltrim(n.body) LIKE '**%**%' -@@ -- SUBSTR(n.body, 3, INSTR(SUBSTR(n.body, 3), '**') - 1) AS raw_prefix, -+ SUBSTR(ltrim(n.body), 3, INSTR(SUBSTR(ltrim(n.body), 3), '**') - 1) AS raw_prefix, - -7) Add two small tests that catch the above regressions -Why - -These are exactly the kind of issues that slip through without targeted tests. - -diff -Copy code -diff --git a/plan.md b/plan.md -@@ - #[test] - fn test_escape_like() { -@@ - } -+ -+ #[test] -+ fn test_build_path_query_exact_does_not_escape() { -+ // '_' must not be escaped for '=' -+ let pq = build_path_query("README_with_underscore.md"); -+ assert_eq!(pq.value, "README_with_underscore.md"); -+ assert!(!pq.is_prefix); -+ } -+ -+ #[test] -+ fn test_path_flag_dotless_root_file_is_exact() { -+ let pq = build_path_query("Makefile"); -+ assert_eq!(pq.value, "Makefile"); -+ assert!(!pq.is_prefix); -+ } - -Summary of net effect - -Correctness fixes: exact-path escaping bug; Active.note_count bug. - -Perf fixes: global --active index; avoid broad note scans in Active. - -Usefulness upgrades: coherent overlap “touch” metric; canonical refs everywhere; reviews prefix more robust. - -If you want one extra “stretch” that still isn’t scope creep: add an unscoped warning line in human output when project_id == None (e.g., “Aggregated across projects; use -p to scope”) for Expert/Overlap/Active. That’s pure presentation, but prevents misinterpretation in multi-project DBs. \ No newline at end of file diff --git a/docs/who-command-design.feedback-6.md b/docs/who-command-design.feedback-6.md deleted file mode 100644 index 5876662..0000000 --- a/docs/who-command-design.feedback-6.md +++ /dev/null @@ -1,471 +0,0 @@ -Proposed revisions (Iteration 6) - -Below are the highest-leverage changes I’d make on top of your current Iteration 5 plan, with rationale and git-diff style edits to the plan text/snippets. - -1) Fix a real edge case: dotless non-root files (src/Dockerfile, infra/Makefile, etc.) -Why - -Your current build_path_query() treats dotless last segments as directories (prefix match) unless the path is root. That misclassifies legitimate dotless files inside directories and silently produces path/% (zero hits or wrong hits). - -Best minimal fix: keep your static SQL approach, but add a DB existence probe (static SQL) for path queries: - -If user didn’t force directory (/), and exact path exists in DiffNotes, treat as exact =. - -Otherwise use prefix LIKE 'dir/%'. - -This avoids new CLI flags, avoids heuristics lists, and uses your existing partial index (idx_notes_diffnote_path_created) efficiently. - -Diff -diff -Copy code -diff --git a/Plan.md b/Plan.md -@@ --struct PathQuery { -+struct PathQuery { - /// The parameter value to bind. - value: String, - /// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`. - is_prefix: bool, - } - --/// Build a path query from a user-supplied path. -+/// Build a path query from a user-supplied path, with a DB probe for dotless files. -@@ --fn build_path_query(path: &str) -> PathQuery { -+fn build_path_query(conn: &Connection, path: &str) -> Result { - let trimmed = path.trim_end_matches('/'); - let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); - let is_root = !trimmed.contains('/'); -- let is_file = !path.ends_with('/') && (is_root || last_segment.contains('.')); -+ let forced_dir = path.ends_with('/'); -+ let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); -+ -+ // If it doesn't "look like a file" but the exact path exists in DiffNotes, -+ // treat as exact (handles src/Dockerfile, infra/Makefile, etc.). -+ let exact_exists = if !looks_like_file && !forced_dir { -+ conn.query_row( -+ "SELECT 1 -+ FROM notes -+ WHERE note_type = 'DiffNote' -+ AND is_system = 0 -+ AND position_new_path = ?1 -+ LIMIT 1", -+ rusqlite::params![trimmed], -+ |_| Ok(()), -+ ).is_ok() -+ } else { -+ false -+ }; -+ -+ let is_file = looks_like_file || exact_exists; - - if is_file { - PathQuery { - value: trimmed.to_string(), - is_prefix: false, - } - } else { - let escaped = escape_like(trimmed); - PathQuery { - value: format!("{escaped}/%"), - is_prefix: true, - } - } - } - - -Also update callers: - -diff -Copy code -@@ -- let pq = build_path_query(path); -+ let pq = build_path_query(conn, path)?; -@@ -- let pq = build_path_query(path); -+ let pq = build_path_query(conn, path)?; - - -And tests: - -diff -Copy code -@@ -- fn test_build_path_query() { -+ fn test_build_path_query() { -@@ -- // Dotless root file -> exact match (root path without '/') -+ // Dotless root file -> exact match (root path without '/') - let pq = build_path_query("Makefile"); - assert_eq!(pq.value, "Makefile"); - assert!(!pq.is_prefix); -+ -+ // Dotless file in subdir should become exact if DB contains it (probe) -+ // (set up: insert one DiffNote with position_new_path = "src/Dockerfile") - -2) Make “reviewer” semantics correct: exclude MR authors commenting on their own diffs -Why - -Right now, Overlap (and Expert reviewer branch) will count MR authors as “reviewers” if they leave DiffNotes in their own MR (clarifications / replies), inflating A+R and contaminating “who reviewed here” signals. - -You already enforce this in --reviews mode (m.author_username != ?1). Apply the same principle consistently: - -Reviewer branch: only count notes where n.author_username != m.author_username (when both non-NULL). - -Diff (Overlap reviewer branch) -diff -Copy code -@@ -- WHERE n.note_type = 'DiffNote' -+ WHERE n.note_type = 'DiffNote' - AND n.position_new_path LIKE ?1 ESCAPE '\\' - AND n.is_system = 0 - AND n.author_username IS NOT NULL -+ AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - - -Same change for sql_exact. - -3) Expert mode scoring: align units + reduce single-MR “comment storms” -Why - -Expert currently mixes units: - -reviewer side: DiffNote count - -author side: distinct MR count - -That makes score noisy and can crown “someone who wrote 30 comments on one MR” as top expert. - -Fix: make both sides primarily MR-breadth: - -reviewer: COUNT(DISTINCT m.id) as review_mr_count - -author: COUNT(DISTINCT m.id) as author_mr_count -Optionally keep review_note_count as a secondary intensity signal (but not the main driver). - -Diff (types + SQL) -diff -Copy code -@@ - pub struct Expert { - pub username: String, -- pub score: f64, -- pub review_count: u32, -- pub author_count: u32, -+ pub score: i64, -+ pub review_mr_count: u32, -+ pub review_note_count: u32, -+ pub author_mr_count: u32, - pub last_active_ms: i64, - } - - -Reviewer branch now joins to MR so it can count distinct MRs and exclude self-comments: - -diff -Copy code -@@ -- SELECT -- n.author_username AS username, -- 'reviewer' AS role, -- COUNT(*) AS cnt, -- MAX(n.created_at) AS last_active_at -- FROM notes n -+ SELECT -+ n.author_username AS username, -+ 'reviewer' AS role, -+ COUNT(DISTINCT m.id) AS mr_cnt, -+ COUNT(*) AS note_cnt, -+ MAX(n.created_at) AS last_active_at -+ FROM notes n -+ JOIN discussions d ON n.discussion_id = d.id -+ JOIN merge_requests m ON d.merge_request_id = m.id - WHERE n.note_type = 'DiffNote' - AND n.is_system = 0 - AND n.author_username IS NOT NULL -+ AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND n.position_new_path LIKE ?1 ESCAPE '\\' - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY n.author_username - - -Update author branch payload to match shape: - -diff -Copy code -@@ - SELECT - m.author_username AS username, - 'author' AS role, -- COUNT(DISTINCT m.id) AS cnt, -+ COUNT(DISTINCT m.id) AS mr_cnt, -+ 0 AS note_cnt, - MAX(n.created_at) AS last_active_at - - -Aggregate: - -diff -Copy code -@@ - SELECT - username, -- SUM(CASE WHEN role = 'reviewer' THEN cnt ELSE 0 END) AS review_count, -- SUM(CASE WHEN role = 'author' THEN cnt ELSE 0 END) AS author_count, -+ SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) AS review_mr_count, -+ SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) AS review_note_count, -+ SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) AS author_mr_count, - MAX(last_active_at) AS last_active_at, -- (SUM(CASE WHEN role = 'reviewer' THEN cnt ELSE 0 END) * 3.0) + -- (SUM(CASE WHEN role = 'author' THEN cnt ELSE 0 END) * 2.0) AS score -+ ( -+ (SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) * 20) + -+ (SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) * 12) + -+ (SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) * 1) -+ ) AS score - - -Human header: - -diff -Copy code -@@ -- style("Reviews").bold(), -- style("Authored").bold(), -+ style("Reviewed(MRs)").bold(), -+ style("Notes").bold(), -+ style("Authored(MRs)").bold(), - -4) Deterministic output: participants + MR refs + tie-breakers -Why - -You’ve correctly focused on reproducibility (resolved_input), but you still have nondeterministic lists: - -participants: GROUP_CONCAT order is undefined → vector order changes run-to-run. - -mr_refs: you dedup via HashSet then iterate → undefined order. - -user sorting in overlap is missing stable tie-breakers. - -This is a real “robot mode flake” source. - -Diff (Active participants sort) -diff -Copy code -@@ -- let participants: Vec = participants_csv -+ let mut participants: Vec = participants_csv - .as_deref() - .filter(|s| !s.is_empty()) - .map(|csv| csv.split('\x1F').map(String::from).collect()) - .unwrap_or_default(); -+ participants.sort(); // stable, deterministic - -Diff (Overlap MR refs sort + stable user sort) -diff -Copy code -@@ -- users.sort_by(|a, b| b.touch_count.cmp(&a.touch_count)); -+ users.sort_by(|a, b| { -+ b.touch_count.cmp(&a.touch_count) -+ .then_with(|| b.last_touch_at.cmp(&a.last_touch_at)) -+ .then_with(|| a.username.cmp(&b.username)) -+ }); -@@ -- entry.mr_refs = set.into_iter().collect(); -+ let mut v: Vec = set.into_iter().collect(); -+ v.sort(); -+ entry.mr_refs = v; - -5) Make --limit actionable: surface truncation explicitly (human + robot) -Why - -Agents (and humans) need to know if results were cut off so they can rerun with a bigger -n. -Right now there’s no signal. - -Minimal pattern: query limit + 1, set truncated = true if you got > limit, then truncate. - -Diff (result types) -diff -Copy code -@@ - pub struct ExpertResult { - pub path_query: String, - pub experts: Vec, -+ pub truncated: bool, - } -@@ - pub struct ActiveResult { - pub discussions: Vec, - pub total_unresolved: u32, -+ pub truncated: bool, - } -@@ - pub struct OverlapResult { - pub path_query: String, - pub users: Vec, -+ pub truncated: bool, - } - -Diff (query pattern example) -diff -Copy code -@@ -- let limit_i64 = limit as i64; -+ let limit_plus_one = (limit + 1) as i64; -@@ -- LIMIT ?4 -+ LIMIT ?4 -@@ -- rusqlite::params![pq.value, since_ms, project_id, limit_i64], -+ rusqlite::params![pq.value, since_ms, project_id, limit_plus_one], -@@ -- Ok(ExpertResult { -+ let truncated = experts.len() > limit; -+ let experts = experts.into_iter().take(limit).collect(); -+ Ok(ExpertResult { - path_query: path.to_string(), - experts, -+ truncated, - }) - - -Human output hint: - -diff -Copy code -@@ - if r.experts.is_empty() { ... } -+ if r.truncated { -+ println!(" {}", style("(showing first -n; rerun with a higher --limit)").dim()); -+ } - - -Robot output field: - -diff -Copy code -@@ - fn expert_to_json(r: &ExpertResult) -> serde_json::Value { - serde_json::json!({ - "path_query": r.path_query, -+ "truncated": r.truncated, - "experts": ... - }) - } - -6) Overlap merge hot loop: avoid repeated HashSet rebuild per row -Why - -This line is expensive in a UNION result with many rows: - -rust -Copy code -let mut set: HashSet = entry.mr_refs.drain(..).collect(); - - -It reallocates and rehashes every time. - -Fix: store an accumulator with HashSet during merge, convert once at end. - -Diff (internal accumulator) -diff -Copy code -@@ -- let mut user_map: HashMap = HashMap::new(); -+ struct OverlapAcc { -+ username: String, -+ author_touch_count: u32, -+ review_touch_count: u32, -+ touch_count: u32, -+ last_touch_at: i64, -+ mr_refs: HashSet, -+ } -+ let mut user_map: HashMap = HashMap::new(); -@@ -- let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser { -+ let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapAcc { - username: username.clone(), - author_touch_count: 0, - review_touch_count: 0, - touch_count: 0, - last_touch_at: 0, -- mr_refs: Vec::new(), -+ mr_refs: HashSet::new(), - }); -@@ -- let mut set: HashSet = entry.mr_refs.drain(..).collect(); -- for r in mr_refs { set.insert(r); } -- entry.mr_refs = set.into_iter().collect(); -+ for r in mr_refs { entry.mr_refs.insert(r); } -@@ -- let mut users: Vec = user_map.into_values().collect(); -+ let mut users: Vec = user_map.into_values().map(|a| { -+ let mut mr_refs: Vec = a.mr_refs.into_iter().collect(); -+ mr_refs.sort(); -+ OverlapUser { -+ username: a.username, -+ author_touch_count: a.author_touch_count, -+ review_touch_count: a.review_touch_count, -+ touch_count: a.touch_count, -+ last_touch_at: a.last_touch_at, -+ mr_refs, -+ } -+ }).collect(); - -7) Tests to lock these behaviors -Add tests (high value) - -dotless subdir file uses DB probe → exact match - -self-review exclusion prevents MR author showing up as reviewer - -deterministic ordering for participants and mr_refs (sort) - -Diff (test additions outline) -diff -Copy code -@@ - #[test] -+ fn test_build_path_query_dotless_subdir_file_uses_probe() { -+ let conn = setup_test_db(); -+ insert_project(&conn, 1, "team/backend"); -+ insert_mr(&conn, 1, 1, 100, "author_a", "opened"); -+ insert_discussion(&conn, 1, 1, Some(1), None, true, false); -+ insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/Dockerfile", "note"); -+ -+ let pq = build_path_query(&conn, "src/Dockerfile").unwrap(); -+ assert_eq!(pq.value, "src/Dockerfile"); -+ assert!(!pq.is_prefix); -+ } -+ -+ #[test] -+ fn test_overlap_excludes_self_review_notes() { -+ let conn = setup_test_db(); -+ insert_project(&conn, 1, "team/backend"); -+ insert_mr(&conn, 1, 1, 100, "author_a", "opened"); -+ insert_discussion(&conn, 1, 1, Some(1), None, true, false); -+ // author_a comments on their own MR diff -+ insert_diffnote(&conn, 1, 1, 1, "author_a", "src/auth/login.rs", "clarification"); -+ -+ let result = query_overlap(&conn, "src/auth/", None, 0, 20).unwrap(); -+ let u = result.users.iter().find(|u| u.username == "author_a"); -+ // should not be credited as reviewer touch -+ assert!(u.map(|x| x.review_touch_count).unwrap_or(0) == 0); -+ } - -Net effect - -Correctness: fixes dotless subdir files + self-review pollution. - -Signal quality: Expert ranking becomes harder to game by comment volume. - -Robot reproducibility: deterministic ordering + explicit truncation. - -Performance: avoids rehash loops in overlap merges; path probe uses indexed equality. - -If you want one “single best” change: #1 (DB probe exact-match) is the most likely to prevent confusing “why is this empty?” behavior without adding any user-facing complexity. \ No newline at end of file diff --git a/docs/who-command-design.feedback-7.md b/docs/who-command-design.feedback-7.md deleted file mode 100644 index de68fec..0000000 --- a/docs/who-command-design.feedback-7.md +++ /dev/null @@ -1,353 +0,0 @@ -Below are the highest-leverage revisions I’d make to iteration 6 to improve correctness (multi-project edge cases), robot-mode reliability (bounded payloads + truncation), and signal quality—without changing the fundamental scope (still pure SQL over existing tables). - -1) Make build_path_query project-aware and two-way probe (exact and prefix) -Why - -Your DB probe currently answers: “does this exact file exist anywhere in DiffNotes?” That can misclassify in a project-scoped run: - -Path exists as a dotless file in Project A → probe returns true - -User runs -p Project B where the path is a directory (or different shape) → you switch to exact, return empty, and miss valid prefix hits. - -Also, you still have a minor heuristic fragility for dot directories when the user omits trailing / (e.g., .github/workflows): last segment has a dot → you treat as file unless forced dir. - -Revision - -Thread project_id into build_path_query(conn, path, project_id) - -Probe exact first (scoped), then probe prefix (scoped) - -Only fall back to heuristics if both probes fail - -This keeps “static SQL, no dynamic assembly,” and costs at most 2 indexed existence queries per invocation. - -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ -- fn build_path_query(conn: &Connection, path: &str) -> Result { -+ fn build_path_query(conn: &Connection, path: &str, project_id: Option) -> Result { - let trimmed = path.trim_end_matches('/'); - let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); - let is_root = !trimmed.contains('/'); - let forced_dir = path.ends_with('/'); -- let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); -+ // Heuristic is now only a fallback; probes decide first. -+ let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); - -- let exact_exists = if !looks_like_file && !forced_dir { -- conn.query_row( -- "SELECT 1 FROM notes -- WHERE note_type = 'DiffNote' -- AND is_system = 0 -- AND position_new_path = ?1 -- LIMIT 1", -- rusqlite::params![trimmed], -- |_| Ok(()), -- ) -- .is_ok() -- } else { -- false -- }; -+ // Probe 1: exact file exists (scoped) -+ let exact_exists = conn.query_row( -+ "SELECT 1 FROM notes -+ WHERE note_type = 'DiffNote' -+ AND is_system = 0 -+ AND position_new_path = ?1 -+ AND (?2 IS NULL OR project_id = ?2) -+ LIMIT 1", -+ rusqlite::params![trimmed, project_id], -+ |_| Ok(()), -+ ).is_ok(); -+ -+ // Probe 2: directory prefix exists (scoped) -+ let prefix_exists = if !forced_dir { -+ let escaped = escape_like(trimmed); -+ let pat = format!("{escaped}/%"); -+ conn.query_row( -+ "SELECT 1 FROM notes -+ WHERE note_type = 'DiffNote' -+ AND is_system = 0 -+ AND position_new_path LIKE ?1 ESCAPE '\\' -+ AND (?2 IS NULL OR project_id = ?2) -+ LIMIT 1", -+ rusqlite::params![pat, project_id], -+ |_| Ok(()), -+ ).is_ok() -+ } else { false }; - -- let is_file = looks_like_file || exact_exists; -+ // Forced directory always wins; otherwise: exact > prefix > heuristic -+ let is_file = if forced_dir { false } -+ else if exact_exists { true } -+ else if prefix_exists { false } -+ else { looks_like_file }; - - if is_file { - Ok(PathQuery { value: trimmed.to_string(), is_prefix: false }) - } else { - let escaped = escape_like(trimmed); - Ok(PathQuery { value: format!("{escaped}/%"), is_prefix: true }) - } - } -@@ -- let pq = build_path_query(conn, path)?; -+ let pq = build_path_query(conn, path, project_id)?; - - -Add test coverage for the multi-project misclassification case: - -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ - #[test] - fn test_build_path_query_dotless_subdir_file_uses_db_probe() { -@@ -- let pq = build_path_query(&conn, "src/Dockerfile").unwrap(); -+ let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap(); -@@ -- let pq2 = build_path_query(&conn2, "src/Dockerfile").unwrap(); -+ let pq2 = build_path_query(&conn2, "src/Dockerfile", None).unwrap(); - } -+ -+ #[test] -+ fn test_build_path_query_probe_is_project_scoped() { -+ // Path exists as a dotless file in project 1; project 2 should not -+ // treat it as an exact file unless it exists there too. -+ let conn = setup_test_db(); -+ insert_project(&conn, 1, "team/a"); -+ insert_project(&conn, 2, "team/b"); -+ insert_mr(&conn, 1, 1, 10, "author_a", "opened"); -+ insert_discussion(&conn, 1, 1, Some(1), None, true, false); -+ insert_diffnote(&conn, 1, 1, 1, "rev", "infra/Makefile", "note"); -+ -+ let pq_scoped = build_path_query(&conn, "infra/Makefile", Some(2)).unwrap(); -+ assert!(pq_scoped.is_prefix); // should fall back to prefix in project 2 -+ } - -2) Bound robot payload sizes for participants and mr_refs (with totals + truncation) -Why - -mr_refs and participants can become unbounded arrays in robot mode, which is a real operational hazard: - -huge JSON → slow, noisy diffs, brittle downstream pipelines - -potential SQLite group_concat truncation becomes invisible (and you can’t distinguish “no refs” vs “refs truncated”) - -Revision - -Introduce hard caps and explicit metadata: - -participants_total, participants_truncated - -mr_refs_total, mr_refs_truncated - -This is not scope creep—it’s defensive output hygiene. - -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ - pub struct ActiveDiscussion { -@@ - pub participants: Vec, -+ pub participants_total: u32, -+ pub participants_truncated: bool, - } -@@ - pub struct OverlapUser { -@@ - pub mr_refs: Vec, -+ pub mr_refs_total: u32, -+ pub mr_refs_truncated: bool, - } - - -Implementation sketch (Rust-side, deterministic): - -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ - fn query_active(...) -> Result { -+ const MAX_PARTICIPANTS: usize = 50; -@@ -- participants.sort(); -+ participants.sort(); -+ let participants_total = participants.len() as u32; -+ let participants_truncated = participants.len() > MAX_PARTICIPANTS; -+ if participants_truncated { -+ participants.truncate(MAX_PARTICIPANTS); -+ } -@@ - Ok(ActiveDiscussion { -@@ - participants, -+ participants_total, -+ participants_truncated, - }) -@@ - fn query_overlap(...) -> Result { -+ const MAX_MR_REFS_PER_USER: usize = 50; -@@ - .map(|a| { - let mut mr_refs: Vec = a.mr_refs.into_iter().collect(); - mr_refs.sort(); -+ let mr_refs_total = mr_refs.len() as u32; -+ let mr_refs_truncated = mr_refs.len() > MAX_MR_REFS_PER_USER; -+ if mr_refs_truncated { -+ mr_refs.truncate(MAX_MR_REFS_PER_USER); -+ } - OverlapUser { -@@ - mr_refs, -+ mr_refs_total, -+ mr_refs_truncated, - } - }) - - -Update robot JSON accordingly: - -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ - fn active_to_json(r: &ActiveResult) -> serde_json::Value { -@@ - "participants": d.participants, -+ "participants_total": d.participants_total, -+ "participants_truncated": d.participants_truncated, - })) -@@ - fn overlap_to_json(r: &OverlapResult) -> serde_json::Value { -@@ - "mr_refs": u.mr_refs, -+ "mr_refs_total": u.mr_refs_total, -+ "mr_refs_truncated": u.mr_refs_truncated, - })) - - -Also update robot-docs manifest schema snippet for who.active.discussions[] and who.overlap.users[]. - -3) Add truncation metadata to Workload sections (same LIMIT+1 pattern) -Why - -Workload is the mode most likely to be consumed by agents, and right now it has silent truncation (each section is LIMIT N with no signal). Your plan already treats truncation as a first-class contract elsewhere; Workload should match. - -Revision - -For each workload query: - -request LIMIT + 1 - -set *_truncated booleans - -trim to requested limit - -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ - pub struct WorkloadResult { - pub username: String, - pub assigned_issues: Vec, - pub authored_mrs: Vec, - pub reviewing_mrs: Vec, - pub unresolved_discussions: Vec, -+ pub assigned_issues_truncated: bool, -+ pub authored_mrs_truncated: bool, -+ pub reviewing_mrs_truncated: bool, -+ pub unresolved_discussions_truncated: bool, - } - - -And in JSON include the booleans (plus you already have summary.counts). - -This is mechanically repetitive but extremely valuable for automation. - -4) Rename “Last Active” → “Last Seen” for Expert/Overlap -Why - -For “author” rows, the timestamp is derived from review activity on their MR (via MAX(n.created_at)), not necessarily that person’s direct action. Calling that “active” is semantically misleading. “Last seen” is accurate across both reviewer+author branches. - -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ - pub struct Expert { -@@ -- pub last_active_ms: i64, -+ pub last_seen_ms: i64, - } -@@ - pub struct OverlapUser { -@@ -- pub last_touch_at: i64, -+ pub last_seen_at: i64, -@@ - fn print_expert_human(...) { -@@ -- style("Last Active").bold(), -+ style("Last Seen").bold(), -@@ -- style(format_relative_time(expert.last_active_ms)).dim(), -+ style(format_relative_time(expert.last_seen_ms)).dim(), - - -(Keep internal SQL aliases consistent: last_seen_at everywhere.) - -5) Make MR state filtering consistent in Expert/Overlap reviewer branches -Why - -You already restrict Overlap author branch to opened|merged, but reviewer branches can include closed/unmerged noise. Consistency improves signal quality and can reduce scan churn. - -Low-risk revision: apply the same state filter to reviewer branches (Expert + Overlap). You can keep “closed” excluded by default without adding new flags. - -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ - WHERE n.note_type = 'DiffNote' - AND n.is_system = 0 -@@ -- AND n.created_at >= ?2 -+ AND m.state IN ('opened','merged') -+ AND n.created_at >= ?2 - - -This is a semantic choice; if you later want archaeology across closed/unmerged, that belongs in a separate mode/flag, but I would not add it now. - -6) Add a design principle for bounded outputs (aligns with robot-first reproducibility) -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ - 10. **Truncation transparency.** Result types carry a `truncated: bool` flag... -+11. **Bounded payloads.** Robot JSON must never emit unbounded arrays (participants, refs). -+ Large list fields are capped with `*_total` + `*_truncated` so agents can page/retry. - -Consolidated plan metadata bump (Iteration 7) -diff -Copy code -diff --git a/who-command-design.md b/who-command-design.md -@@ --iteration: 6 -+iteration: 7 - updated: 2026-02-07 - -Net effect (what you get) - -Correct path classification under -p scoping (no cross-project probe leakage) - -Deterministic + bounded robot payloads (no giant JSON surprises) - -Uniform truncation contract across all modes (Workload no longer silently truncates) - -Clearer semantics (“Last Seen” avoids misinterpretation) - -Cleaner signals (reviewer branches ignore closed/unmerged by default) - -If you want, I can also produce a second diff that updates the robot-docs schema block and the Verification EXPLAIN expectations to reflect the new probe queries and the state filter. \ No newline at end of file diff --git a/src/cli/autocorrect.rs b/src/cli/autocorrect.rs index 253f310..e5cba14 100644 --- a/src/cli/autocorrect.rs +++ b/src/cli/autocorrect.rs @@ -124,6 +124,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[ "--no-docs", "--no-events", "--no-file-changes", + "--no-status", "--dry-run", "--no-dry-run", "--timings", @@ -232,6 +233,16 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[ "--default-project", ], ), + ( + "file-history", + &[ + "--project", + "--discussions", + "--no-follow-renames", + "--merged", + "--limit", + ], + ), ("generate-docs", &["--full", "--project"]), ("completions", &[]), ("robot-docs", &["--brief"]), @@ -296,6 +307,8 @@ const SUBCOMMAND_ALIASES: &[(&str, &str)] = &[ ("syncstatus", "status"), ("auth_test", "auth"), ("authtest", "auth"), + ("file_history", "file-history"), + ("filehistory", "file-history"), ]; // --------------------------------------------------------------------------- diff --git a/src/cli/commands/file_history.rs b/src/cli/commands/file_history.rs new file mode 100644 index 0000000..a3894cd --- /dev/null +++ b/src/cli/commands/file_history.rs @@ -0,0 +1,334 @@ +use serde::Serialize; + +use crate::Config; +use crate::cli::render::{self, Icons, Theme}; +use crate::core::db::create_connection; +use crate::core::error::Result; +use crate::core::file_history::resolve_rename_chain; +use crate::core::paths::get_db_path; +use crate::core::project::resolve_project; +use crate::core::time::ms_to_iso; + +/// Maximum rename chain BFS depth. +const MAX_RENAME_HOPS: usize = 10; + +/// A single MR that touched the file. +#[derive(Debug, Serialize)] +pub struct FileHistoryMr { + pub iid: i64, + pub title: String, + pub state: String, + pub author_username: String, + pub change_type: String, + pub merged_at_iso: Option, + pub updated_at_iso: String, + pub merge_commit_sha: Option, + pub web_url: Option, +} + +/// A DiffNote discussion snippet on the file. +#[derive(Debug, Serialize)] +pub struct FileDiscussion { + pub discussion_id: String, + pub author_username: String, + pub body_snippet: String, + pub path: String, + pub created_at_iso: String, +} + +/// Full result of a file-history query. +#[derive(Debug, Serialize)] +pub struct FileHistoryResult { + pub path: String, + pub rename_chain: Vec, + pub renames_followed: bool, + pub merge_requests: Vec, + pub discussions: Vec, + pub total_mrs: usize, + pub paths_searched: usize, +} + +/// Run the file-history query. +pub fn run_file_history( + config: &Config, + path: &str, + project: Option<&str>, + no_follow_renames: bool, + merged_only: bool, + include_discussions: bool, + limit: usize, +) -> Result { + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + + let project_id = project.map(|p| resolve_project(&conn, p)).transpose()?; + + // Resolve rename chain unless disabled + let (all_paths, renames_followed) = if no_follow_renames { + (vec![path.to_string()], false) + } else if let Some(pid) = project_id { + let chain = resolve_rename_chain(&conn, pid, path, MAX_RENAME_HOPS)?; + let followed = chain.len() > 1; + (chain, followed) + } else { + // Without a project scope, can't resolve renames (need project_id) + (vec![path.to_string()], false) + }; + + let paths_searched = all_paths.len(); + + // Build placeholders for IN clause + let placeholders: Vec = (0..all_paths.len()) + .map(|i| format!("?{}", i + 2)) + .collect(); + let in_clause = placeholders.join(", "); + + let merged_filter = if merged_only { + " AND mr.state = 'merged'" + } else { + "" + }; + + let project_filter = if project_id.is_some() { + "AND mfc.project_id = ?1" + } else { + "" + }; + + let sql = format!( + "SELECT DISTINCT \ + mr.iid, mr.title, mr.state, mr.author_username, \ + mfc.change_type, mr.merged_at, mr.updated_at, mr.merge_commit_sha, mr.web_url \ + FROM mr_file_changes mfc \ + JOIN merge_requests mr ON mr.id = mfc.merge_request_id \ + WHERE mfc.new_path IN ({in_clause}) {project_filter} {merged_filter} \ + ORDER BY COALESCE(mr.merged_at, mr.updated_at) DESC \ + LIMIT ?{}", + all_paths.len() + 2 + ); + + let mut stmt = conn.prepare(&sql)?; + + // Bind parameters: ?1 = project_id (or 0 placeholder), ?2..?N+1 = paths, ?N+2 = limit + let mut params: Vec> = Vec::new(); + params.push(Box::new(project_id.unwrap_or(0))); + for p in &all_paths { + params.push(Box::new(p.clone())); + } + params.push(Box::new(limit as i64)); + + let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + let merge_requests: Vec = stmt + .query_map(param_refs.as_slice(), |row| { + let merged_at: Option = row.get(5)?; + let updated_at: i64 = row.get(6)?; + Ok(FileHistoryMr { + iid: row.get(0)?, + title: row.get(1)?, + state: row.get(2)?, + author_username: row.get(3)?, + change_type: row.get(4)?, + merged_at_iso: merged_at.map(ms_to_iso), + updated_at_iso: ms_to_iso(updated_at), + merge_commit_sha: row.get(7)?, + web_url: row.get(8)?, + }) + })? + .filter_map(std::result::Result::ok) + .collect(); + + let total_mrs = merge_requests.len(); + + // Optionally fetch DiffNote discussions on this file + let discussions = if include_discussions && !merge_requests.is_empty() { + fetch_file_discussions(&conn, &all_paths, project_id)? + } else { + Vec::new() + }; + + Ok(FileHistoryResult { + path: path.to_string(), + rename_chain: all_paths, + renames_followed, + merge_requests, + discussions, + total_mrs, + paths_searched, + }) +} + +/// Fetch DiffNote discussions that reference the given file paths. +fn fetch_file_discussions( + conn: &rusqlite::Connection, + paths: &[String], + project_id: Option, +) -> Result> { + let placeholders: Vec = (0..paths.len()).map(|i| format!("?{}", i + 2)).collect(); + let in_clause = placeholders.join(", "); + + let project_filter = if project_id.is_some() { + "AND d.project_id = ?1" + } else { + "" + }; + + let sql = format!( + "SELECT d.gitlab_discussion_id, n.author_username, n.body, n.position_new_path, n.created_at \ + FROM notes n \ + JOIN discussions d ON d.id = n.discussion_id \ + WHERE n.position_new_path IN ({in_clause}) {project_filter} \ + AND n.is_system = 0 \ + ORDER BY n.created_at DESC \ + LIMIT 50" + ); + + let mut stmt = conn.prepare(&sql)?; + + let mut params: Vec> = Vec::new(); + params.push(Box::new(project_id.unwrap_or(0))); + for p in paths { + params.push(Box::new(p.clone())); + } + + let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + let discussions: Vec = stmt + .query_map(param_refs.as_slice(), |row| { + let body: String = row.get(2)?; + let snippet = if body.len() > 200 { + format!("{}...", &body[..body.floor_char_boundary(200)]) + } else { + body + }; + let created_at: i64 = row.get(4)?; + Ok(FileDiscussion { + discussion_id: row.get(0)?, + author_username: row.get(1)?, + body_snippet: snippet, + path: row.get(3)?, + created_at_iso: ms_to_iso(created_at), + }) + })? + .filter_map(std::result::Result::ok) + .collect(); + + Ok(discussions) +} + +// ── Human output ──────────────────────────────────────────────────────────── + +pub fn print_file_history(result: &FileHistoryResult) { + // Header + let paths_info = if result.paths_searched > 1 { + format!( + " (via {} paths, {} MRs)", + result.paths_searched, result.total_mrs + ) + } else { + format!(" ({} MRs)", result.total_mrs) + }; + + println!(); + println!( + "{}", + Theme::bold().render(&format!("File History: {}{}", result.path, paths_info)) + ); + + // Rename chain + if result.renames_followed && result.rename_chain.len() > 1 { + let chain_str: Vec<&str> = result.rename_chain.iter().map(String::as_str).collect(); + println!( + " Rename chain: {}", + Theme::dim().render(&chain_str.join(" -> ")) + ); + } + + if result.merge_requests.is_empty() { + println!( + "\n {} {}", + Icons::info(), + Theme::dim().render("No merge requests found touching this file.") + ); + println!( + " {}", + Theme::dim().render("Hint: Run 'lore sync' to fetch MR file changes.") + ); + println!(); + return; + } + + println!(); + + for mr in &result.merge_requests { + let (icon, state_style) = match mr.state.as_str() { + "merged" => (Icons::mr_merged(), Theme::accent()), + "opened" => (Icons::mr_opened(), Theme::success()), + "closed" => (Icons::mr_closed(), Theme::warning()), + _ => (Icons::mr_opened(), Theme::dim()), + }; + + let date = mr + .merged_at_iso + .as_deref() + .or(Some(mr.updated_at_iso.as_str())) + .unwrap_or("") + .split('T') + .next() + .unwrap_or(""); + + println!( + " {} {} {} {} @{} {} {}", + icon, + Theme::accent().render(&format!("!{}", mr.iid)), + render::truncate(&mr.title, 50), + state_style.render(&mr.state), + mr.author_username, + date, + Theme::dim().render(&mr.change_type), + ); + } + + // Discussions + if !result.discussions.is_empty() { + println!( + "\n {} File discussions ({}):", + Icons::note(), + result.discussions.len() + ); + for d in &result.discussions { + let date = d.created_at_iso.split('T').next().unwrap_or(""); + println!( + " @{} ({}) [{}]: {}", + d.author_username, + date, + Theme::dim().render(&d.path), + d.body_snippet + ); + } + } + + println!(); +} + +// ── Robot (JSON) output ───────────────────────────────────────────────────── + +pub fn print_file_history_json(result: &FileHistoryResult, elapsed_ms: u64) { + let output = serde_json::json!({ + "ok": true, + "data": { + "path": result.path, + "rename_chain": if result.renames_followed { Some(&result.rename_chain) } else { None }, + "merge_requests": result.merge_requests, + "discussions": if result.discussions.is_empty() { None } else { Some(&result.discussions) }, + }, + "meta": { + "elapsed_ms": elapsed_ms, + "total_mrs": result.total_mrs, + "renames_followed": result.renames_followed, + "paths_searched": result.paths_searched, + } + }); + + println!("{}", serde_json::to_string(&output).unwrap_or_default()); +} diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index 5d4997b..a71ddaa 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -3,6 +3,7 @@ pub mod count; pub mod doctor; pub mod drift; pub mod embed; +pub mod file_history; pub mod generate_docs; pub mod ingest; pub mod init; @@ -23,6 +24,7 @@ pub use count::{ pub use doctor::{DoctorChecks, print_doctor_results, run_doctor}; pub use drift::{DriftResponse, print_drift_human, print_drift_json, run_drift}; pub use embed::{print_embed, print_embed_json, run_embed}; +pub use file_history::{print_file_history, print_file_history_json, run_file_history}; pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs}; pub use ingest::{ DryRunPreview, IngestDisplay, print_dry_run_preview, print_dry_run_preview_json, diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 1c6babe..fab3a61 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -234,6 +234,10 @@ pub enum Commands { /// People intelligence: experts, workload, active discussions, overlap Who(WhoArgs), + /// Show MRs that touched a file, with linked discussions + #[command(name = "file-history")] + FileHistory(FileHistoryArgs), + /// Detect discussion divergence from original intent Drift { /// Entity type (currently only "issues" supported) @@ -966,6 +970,42 @@ pub struct WhoArgs { pub all_history: bool, } +#[derive(Parser)] +#[command(after_help = "\x1b[1mExamples:\x1b[0m + lore file-history src/main.rs # MRs that touched this file + lore file-history src/auth/ -p group/repo # Scoped to project + lore file-history src/foo.rs --discussions # Include DiffNote snippets + lore file-history src/bar.rs --no-follow-renames # Skip rename chain")] +pub struct FileHistoryArgs { + /// File path to trace history for + pub path: String, + + /// Scope to a specific project (fuzzy match) + #[arg(short = 'p', long, help_heading = "Filters")] + pub project: Option, + + /// Include discussion snippets from DiffNotes on this file + #[arg(long, help_heading = "Output")] + pub discussions: bool, + + /// Disable rename chain resolution + #[arg(long = "no-follow-renames", help_heading = "Filters")] + pub no_follow_renames: bool, + + /// Only show merged MRs + #[arg(long, help_heading = "Filters")] + pub merged: bool, + + /// Maximum results + #[arg( + short = 'n', + long = "limit", + default_value = "50", + help_heading = "Output" + )] + pub limit: usize, +} + #[derive(Parser)] pub struct CountArgs { /// Entity type to count (issues, mrs, discussions, notes, events) diff --git a/src/main.rs b/src/main.rs index 31845f7..e510c1c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,23 +13,24 @@ use lore::cli::commands::{ NoteListFilters, SearchCliFilters, SyncOptions, TimelineParams, open_issue_in_browser, open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_drift_human, print_drift_json, print_dry_run_preview, print_dry_run_preview_json, print_embed, - print_embed_json, print_event_count, print_event_count_json, print_generate_docs, - print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues, - print_list_issues_json, print_list_mrs, print_list_mrs_json, print_list_notes, - print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl, print_search_results, - print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr, - print_show_mr_json, print_stats, print_stats_json, print_sync, print_sync_json, - print_sync_status, print_sync_status_json, print_timeline, print_timeline_json_with_meta, - print_who_human, print_who_json, query_notes, run_auth_test, run_count, run_count_events, - run_doctor, run_drift, run_embed, run_generate_docs, run_ingest, run_ingest_dry_run, run_init, - run_list_issues, run_list_mrs, run_search, run_show_issue, run_show_mr, run_stats, run_sync, - run_sync_status, run_timeline, run_who, + print_embed_json, print_event_count, print_event_count_json, print_file_history, + print_file_history_json, print_generate_docs, print_generate_docs_json, print_ingest_summary, + print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs, + print_list_mrs_json, print_list_notes, print_list_notes_csv, print_list_notes_json, + print_list_notes_jsonl, print_search_results, print_search_results_json, print_show_issue, + print_show_issue_json, print_show_mr, print_show_mr_json, print_stats, print_stats_json, + print_sync, print_sync_json, print_sync_status, print_sync_status_json, print_timeline, + print_timeline_json_with_meta, print_who_human, print_who_json, query_notes, run_auth_test, + run_count, run_count_events, run_doctor, run_drift, run_embed, run_file_history, + run_generate_docs, run_ingest, run_ingest_dry_run, run_init, run_list_issues, run_list_mrs, + run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, run_timeline, + run_who, }; use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme}; use lore::cli::robot::{RobotMeta, strip_schemas}; use lore::cli::{ - Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs, - NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, WhoArgs, + Cli, Commands, CountArgs, EmbedArgs, FileHistoryArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, + MrsArgs, NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, WhoArgs, }; use lore::core::db::{ LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations, @@ -195,6 +196,9 @@ async fn main() { handle_timeline(cli.config.as_deref(), args, robot_mode).await } Some(Commands::Who(args)) => handle_who(cli.config.as_deref(), args, robot_mode), + Some(Commands::FileHistory(args)) => { + handle_file_history(cli.config.as_deref(), args, robot_mode) + } Some(Commands::Drift { entity_type, iid, @@ -720,6 +724,7 @@ fn suggest_similar_command(invalid: &str) -> String { ("notes", "notes"), ("note", "notes"), ("drift", "drift"), + ("file-history", "file-history"), ]; let invalid_lower = invalid.to_lowercase(); @@ -1852,6 +1857,37 @@ async fn handle_stats( Ok(()) } +fn handle_file_history( + config_override: Option<&str>, + args: FileHistoryArgs, + robot_mode: bool, +) -> Result<(), Box> { + let start = std::time::Instant::now(); + let config = Config::load(config_override)?; + + let project = config + .effective_project(args.project.as_deref()) + .map(String::from); + + let result = run_file_history( + &config, + &args.path, + project.as_deref(), + args.no_follow_renames, + args.merged, + args.discussions, + args.limit, + )?; + + if robot_mode { + let elapsed_ms = start.elapsed().as_millis() as u64; + print_file_history_json(&result, elapsed_ms); + } else { + print_file_history(&result); + } + Ok(()) +} + async fn handle_timeline( config_override: Option<&str>, args: TimelineArgs, @@ -2520,6 +2556,16 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box", "-p/--project ", "--discussions", "--no-follow-renames", "--merged", "-n/--limit "], + "example": "lore --robot file-history src/main.rs -p group/repo", + "response_schema": { + "ok": "bool", + "data": {"path": "string", "rename_chain": "[string]?", "merge_requests": "[{iid:int, title:string, state:string, author_username:string, change_type:string, merged_at_iso:string?, updated_at_iso:string, merge_commit_sha:string?, web_url:string?}]", "discussions": "[{discussion_id:string, author_username:string, body_snippet:string, path:string, created_at_iso:string}]?"}, + "meta": {"elapsed_ms": "int", "total_mrs": "int", "renames_followed": "bool", "paths_searched": "int"} + } + }, "drift": { "description": "Detect discussion divergence from original issue intent", "flags": ["", "", "--threshold <0.0-1.0>", "-p/--project "],