diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 86eea33..3d6330e 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -42,7 +42,7 @@ {"id":"bd-1hoq","title":"Restructure expert SQL with CTE-based dual-path matching","description":"## Background\nThe current query_expert() at who.rs:641 uses a 4-signal UNION ALL that only matches position_new_path and new_path, with flat COUNT-based scoring computed entirely in SQL. The new model needs dual-path matching, 5 signal types, state-aware timestamps, and returns per-signal rows for Rust-side decay computation (bd-13q8).\n\n## Approach\n**Important**: This bead builds the new SQL as a separate function WITHOUT modifying query_expert() yet. bd-13q8 wires it into query_expert(). This keeps this bead independently testable.\n\nAdd a new function:\n```rust\n/// Build the CTE-based expert scoring SQL for a given path query mode.\n/// Returns SQL string. Params: ?1=path, ?2=since_ms, ?3=project_id, ?4=as_of_ms, ?5=closed_mr_multiplier, ?6=reviewer_min_note_chars\nfn build_expert_sql(path_op: &str) -> String {\n // ... format the SQL with {path_op} inlined, all config values as bound params\n}\n```\n\n### SQL structure (8 CTEs + final SELECT):\n1. **matched_notes_raw**: UNION ALL on position_new_path + position_old_path\n2. **matched_notes**: DISTINCT dedup by id\n3. **matched_file_changes_raw**: UNION ALL on new_path + old_path\n4. **matched_file_changes**: DISTINCT dedup by (merge_request_id, project_id)\n5. **mr_activity**: Centralized state-aware timestamps AND state_mult. Joins merge_requests via matched_file_changes. Computes:\n - activity_ts: CASE WHEN state='merged' THEN COALESCE(merged_at, created_at) WHEN state='closed' THEN COALESCE(closed_at, created_at) ELSE COALESCE(updated_at, created_at) END\n - state_mult: CASE WHEN state='closed' THEN ?5 ELSE 1.0 END\n6. **reviewer_participation**: substantive DiffNotes WHERE LENGTH(TRIM(body)) >= ?6\n7. **raw**: 5 signals (diffnote_reviewer, diffnote_author, file_author, file_reviewer_participated, file_reviewer_assigned). Signals 1-2 compute state_mult inline. Signals 3-4a-4b reference mr_activity.\n8. **aggregated**: MR-level GROUP BY + note_group with COUNT\n\n### Returns 6 columns: (username TEXT, signal TEXT, mr_id INTEGER, qty INTEGER, ts INTEGER, state_mult REAL)\n\nSee plans/time-decay-expert-scoring.md section 3 for the full SQL template.\n\n## TDD Loop\n\n### RED (write first):\n```rust\n#[test]\nfn test_expert_sql_returns_expected_signal_rows() {\n let conn = setup_test_db();\n insert_project(&conn, 1, \"team/backend\");\n insert_mr(&conn, 1, 1, 100, \"alice\", \"merged\");\n insert_file_change(&conn, 1, 1, \"src/app.rs\", \"modified\");\n insert_reviewer(&conn, 1, \"bob\");\n insert_reviewer(&conn, 1, \"carol\");\n insert_discussion(&conn, 1, 1, Some(1), None, true, false);\n insert_diffnote(&conn, 1, 1, 1, \"carol\", \"src/app.rs\", \"This needs error handling for the edge case\");\n\n let sql = build_expert_sql(\"= ?1\");\n let mut stmt = conn.prepare(&sql).unwrap();\n let rows: Vec<(String, String, i64, i64, i64, f64)> = stmt\n .query_map(\n rusqlite::params![\"src/app.rs\", 0_i64, Option::::None, now_ms() + 1000, 0.5_f64, 20_i64],\n |row| Ok((\n row.get(0).unwrap(), row.get(1).unwrap(), row.get(2).unwrap(),\n row.get(3).unwrap(), row.get(4).unwrap(), row.get(5).unwrap(),\n ))\n ).unwrap().filter_map(|r| r.ok()).collect();\n\n // alice: file_author\n assert!(rows.iter().any(|(u, s, ..)| u == \"alice\" && s == \"file_author\"));\n // carol: file_reviewer_participated (left substantive DiffNote)\n assert!(rows.iter().any(|(u, s, ..)| u == \"carol\" && s == \"file_reviewer_participated\"));\n // bob: file_reviewer_assigned (no DiffNotes)\n assert!(rows.iter().any(|(u, s, ..)| u == \"bob\" && s == \"file_reviewer_assigned\"));\n // carol: note_group\n assert!(rows.iter().any(|(u, s, ..)| u == \"carol\" && s == \"note_group\"));\n // alice: diffnote_author\n assert!(rows.iter().any(|(u, s, ..)| u == \"alice\" && s == \"diffnote_author\"));\n // All merged rows have state_mult = 1.0\n assert!(rows.iter().all(|(.., sm)| (sm - 1.0).abs() < f64::EPSILON));\n}\n```\n\n### GREEN: Implement build_expert_sql() with the 8 CTEs.\n### VERIFY: cargo test -p lore -- test_expert_sql_returns_expected_signal_rows\n\n## Acceptance Criteria\n- [ ] test_expert_sql_returns_expected_signal_rows passes (all 5 signal types correct)\n- [ ] SQL compiles against :memory: DB with indexes from bd-2ao4 (migration 026)\n- [ ] 6 columns returned: username, signal, mr_id, qty, ts, state_mult (REAL, not TEXT)\n- [ ] 6 SQL params: ?1=path, ?2=since_ms, ?3=project_id, ?4=as_of_ms, ?5=closed_mr_multiplier, ?6=reviewer_min_note_chars\n- [ ] mr_activity CTE centralizes timestamp + state_mult (not repeated)\n- [ ] reviewer_participation uses ?6 not inlined literal\n- [ ] Existing query_expert() and all existing tests UNTOUCHED\n- [ ] build_expert_sql() is a pure function (no Connection param)\n\n## Files\n- MODIFY: src/cli/commands/who.rs (new build_expert_sql function + test, placed near query_expert at line ~641)\n\n## Edge Cases\n- ?5 (closed_mr_multiplier) bound as f64 — rusqlite handles this\n- ?6 (reviewer_min_note_chars) bound as i64 — SQLite LENGTH returns integer\n- Signals 1-2 compute state_mult inline (join through discussions, not mr_activity)\n- COALESCE fallback to created_at for NULL merged_at/closed_at/updated_at\n- Dedup in matched_notes/matched_file_changes prevents double-counting","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-09T16:59:44.665314Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.410514Z","closed_at":"2026-02-12T20:43:04.410470Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"],"dependencies":[{"issue_id":"bd-1hoq","depends_on_id":"bd-1soz","type":"blocks","created_at":"2026-02-09T17:01:11.108727Z","created_by":"tayloreernisse"},{"issue_id":"bd-1hoq","depends_on_id":"bd-2ao4","type":"blocks","created_at":"2026-02-09T17:01:11.053353Z","created_by":"tayloreernisse"},{"issue_id":"bd-1hoq","depends_on_id":"bd-2w1p","type":"blocks","created_at":"2026-02-09T17:01:10.996731Z","created_by":"tayloreernisse"}]} {"id":"bd-1ht","title":"Epic: Gate 5 - Code Trace (lore trace)","description":"## Background\n\nGate 5 implements 'lore trace' — answers 'Why was this code introduced?' by tracing from a file path through the MR that modified it, to the issue that motivated the MR, to the discussions with decision rationale. Capstone of Phase B.\n\nGate 5 ships Tier 1 only (API-only, no local git). Tier 2 (git blame via git2-rs) deferred to Phase C.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Gate 5 (Sections 5.1-5.7).\n\n## Prerequisites\n\n- Gates 1-2 COMPLETE: entity_references populated, resource events fetched\n- Gate 4 (bd-14q): provides mr_file_changes table + resolve_rename_chain algorithm\n- entity_references source_method: 'api' | 'note_parse' | 'description_parse'\n- discussions/notes tables for DiffNote content\n- merge_requests.merged_at exists (migration 006). Use COALESCE(merged_at, updated_at) for ordering.\n\n## Architecture\n\n- **No new tables.** Trace queries combine mr_file_changes, entity_references, discussions/notes\n- **Query flow:** file -> mr_file_changes -> MRs -> entity_references (closes/related) -> issues -> discussions with DiffNote context\n- **Tier 1:** File-level granularity only. Cannot trace a specific line to its introducing commit.\n- **Path parsing:** Supports 'src/foo.rs:45' syntax — line number parsed but deferred with Tier 2 warning.\n- **Rename aware:** Reuses file_history::resolve_rename_chain for multi-path matching.\n\n## Children (Execution Order)\n\n1. **bd-2n4** — Trace query logic: file -> MR -> issue -> discussion chain (src/core/trace.rs)\n2. **bd-9dd** — CLI command with human + robot output (src/cli/commands/trace.rs)\n\n## Gate Completion Criteria\n\n- [ ] `lore trace ` shows MRs with linked issues + discussion context\n- [ ] Output includes MR -> issue -> discussion chain\n- [ ] DiffNote snippets show content on the traced file\n- [ ] Cross-references from entity_references used for MR->issue linking\n- [ ] :line suffix parses and emits Tier 2 warning\n- [ ] Robot mode JSON with tier: 'api_only'\n- [ ] Graceful handling when no MR data found (suggest sync with fetchMrFileChanges)\n","status":"open","priority":1,"issue_type":"feature","created_at":"2026-02-02T21:31:01.141053Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:57:12.357740Z","compaction_level":0,"original_size":0,"labels":["epic","gate-5","phase-b"],"dependencies":[{"issue_id":"bd-1ht","depends_on_id":"bd-14q","type":"blocks","created_at":"2026-02-02T21:34:38.033428Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ht","depends_on_id":"bd-1se","type":"blocks","created_at":"2026-02-02T21:34:37.987232Z","created_by":"tayloreernisse"}]} {"id":"bd-1i2","title":"Integrate mark_dirty_tx into ingestion modules","description":"## Background\nThis bead integrates dirty source tracking into the existing ingestion pipelines. Every entity upserted during ingestion must be marked dirty so the document regenerator knows to update the corresponding search document. The critical constraint: mark_dirty_tx() must be called INSIDE the same transaction that upserts the entity — not after commit.\n\n**Key PRD clarification:** Mark ALL upserted entities dirty (not just changed ones). The regenerator's hash comparison handles \"unchanged\" detection cheaply — this avoids needing change detection in ingestion.\n\n## Approach\nModify 4 existing ingestion files to add mark_dirty_tx() calls inside existing transaction blocks per PRD Section 6.1.\n\n**1. src/ingestion/issues.rs:**\nInside the issue upsert loop, after each successful INSERT/UPDATE:\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Issue, issue_row.id)?;\n```\n\n**2. src/ingestion/merge_requests.rs:**\nInside the MR upsert loop:\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::MergeRequest, mr_row.id)?;\n```\n\n**3. src/ingestion/discussions.rs:**\nInside discussion insert (issue discussions, full-refresh transaction):\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, discussion_row.id)?;\n```\n\n**4. src/ingestion/mr_discussions.rs:**\nInside discussion upsert (write phase):\n```rust\ndirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, discussion_row.id)?;\n```\n\n**Discussion Sweep Cleanup (PRD Section 6.1 — CRITICAL):**\nWhen the MR discussion sweep deletes stale discussions (`last_seen_at < run_start_time`), **delete the corresponding document rows directly** — do NOT use the dirty queue for cleanup. The `ON DELETE CASCADE` on `document_labels`/`document_paths` and the `documents_embeddings_ad` trigger handle all downstream cleanup.\n\n**PRD-exact CTE pattern:**\n```sql\n-- In src/ingestion/mr_discussions.rs, during sweep phase.\n-- Uses a CTE to capture stale IDs atomically before cascading deletes.\n-- This is more defensive than two separate statements because the CTE\n-- guarantees the ID set is captured before any row is deleted.\nWITH stale AS (\n SELECT id FROM discussions\n WHERE merge_request_id = ? AND last_seen_at < ?\n)\n-- Step 1: delete orphaned documents (must happen while source_id still resolves)\nDELETE FROM documents\n WHERE source_type = 'discussion' AND source_id IN (SELECT id FROM stale);\n-- Step 2: delete the stale discussions themselves\nDELETE FROM discussions\n WHERE id IN (SELECT id FROM stale);\n```\n\n**NOTE:** If SQLite version doesn't support CTE-based multi-statement, execute as two sequential statements capturing IDs in Rust first:\n```rust\nlet stale_ids: Vec = conn.prepare(\n \"SELECT id FROM discussions WHERE merge_request_id = ? AND last_seen_at < ?\"\n)?.query_map(params![mr_id, run_start], |r| r.get(0))?\n .collect::, _>>()?;\n\nif !stale_ids.is_empty() {\n // Delete documents FIRST (while source_id still resolves)\n conn.execute(\n \"DELETE FROM documents WHERE source_type = 'discussion' AND source_id IN (...)\",\n ...\n )?;\n // Then delete the discussions\n conn.execute(\n \"DELETE FROM discussions WHERE id IN (...)\",\n ...\n )?;\n}\n```\n\n**IMPORTANT difference from dirty queue pattern:** The sweep deletes documents DIRECTLY (not via dirty_sources queue). This is because the source entity is being deleted — there's nothing for the regenerator to regenerate from. The cascade handles FTS, labels, paths, and embeddings cleanup.\n\n## Acceptance Criteria\n- [ ] Every upserted issue is marked dirty inside the same transaction\n- [ ] Every upserted MR is marked dirty inside the same transaction\n- [ ] Every upserted discussion (issue + MR) is marked dirty inside the same transaction\n- [ ] ALL upserted entities marked dirty (not just changed ones) — regenerator handles skip\n- [ ] mark_dirty_tx called with &Transaction (not &Connection)\n- [ ] mark_dirty_tx uses upsert with ON CONFLICT to reset backoff state (not INSERT OR IGNORE)\n- [ ] Discussion sweep deletes documents DIRECTLY (not via dirty queue)\n- [ ] Discussion sweep uses CTE (or Rust-side ID capture) to capture stale IDs before cascading deletes\n- [ ] Documents deleted BEFORE discussions (while source_id still resolves)\n- [ ] ON DELETE CASCADE handles document_labels, document_paths cleanup\n- [ ] documents_embeddings_ad trigger handles embedding cleanup\n- [ ] `cargo build` succeeds\n- [ ] Existing ingestion tests still pass\n\n## Files\n- `src/ingestion/issues.rs` — add mark_dirty_tx calls in upsert loop\n- `src/ingestion/merge_requests.rs` — add mark_dirty_tx calls in upsert loop\n- `src/ingestion/discussions.rs` — add mark_dirty_tx calls in insert loop\n- `src/ingestion/mr_discussions.rs` — add mark_dirty_tx calls + direct document deletion in sweep\n\n## TDD Loop\nRED: Existing tests should still pass (regression); new tests:\n- `test_issue_upsert_marks_dirty` — after issue ingest, dirty_sources has entry\n- `test_mr_upsert_marks_dirty` — after MR ingest, dirty_sources has entry\n- `test_discussion_upsert_marks_dirty` — after discussion ingest, dirty_sources has entry\n- `test_discussion_sweep_deletes_documents` — stale discussion documents deleted directly\n- `test_sweep_cascade_cleans_labels_paths` — ON DELETE CASCADE works\nGREEN: Add mark_dirty_tx calls in all 4 files, implement sweep with CTE\nVERIFY: `cargo test ingestion && cargo build`\n\n## Edge Cases\n- Upsert that doesn't change data: still marks dirty (regenerator hash check handles skip)\n- Transaction rollback: dirty mark also rolled back (atomic, inside same txn)\n- Discussion sweep with zero stale IDs: CTE returns empty, no DELETE executed\n- Large batch of upserts: each mark_dirty_tx is O(1) INSERT with ON CONFLICT\n- Sweep deletes document before discussion: order matters for source_id resolution","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:27:09.540279Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:39:17.241433Z","closed_at":"2026-01-30T17:39:17.241390Z","close_reason":"Added mark_dirty_tx calls in issues.rs, merge_requests.rs, discussions.rs, mr_discussions.rs (2 paths)","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1i2","depends_on_id":"bd-38q","type":"blocks","created_at":"2026-01-30T15:29:35.105551Z","created_by":"tayloreernisse"}]} -{"id":"bd-1i4i","title":"Implement run_sync_surgical orchestration function","description":"## Background\n\nThe surgical sync pipeline needs a top-level orchestration function that coordinates the full pipeline for syncing specific IIDs. Unlike `run_sync` (lines 63-360 of `src/cli/commands/sync.rs`) which syncs all projects and all entities, `run_sync_surgical` targets specific issues/MRs by IID within a single project. The pipeline stages are: resolve project, record sync run, preflight fetch, check cancellation, acquire lock, ingest with TOCTOU guards, inline dependent enrichment (discussions, events, diffs), scoped doc regeneration, scoped embedding, finalize recorder, and build `SyncResult`.\n\n## Approach\n\nCreate `pub async fn run_sync_surgical()` in a new file `src/cli/commands/sync_surgical.rs`. Signature:\n\n```rust\npub async fn run_sync_surgical(\n config: &Config,\n options: SyncOptions,\n run_id: Option<&str>,\n signal: &ShutdownSignal,\n) -> Result\n```\n\nThe function reads `options.issues` and `options.merge_requests` (added by bd-1lja) to determine target IIDs. Pipeline:\n\n1. **Resolve project**: Call `resolve_project(conn, project_str)` from `src/core/project.rs` to get `gitlab_project_id`.\n2. **Start recorder**: `SyncRunRecorder::start(&recorder_conn, \"surgical-sync\", run_id)`. Note: `succeed()` and `fail()` consume `self`, so control flow must ensure exactly one terminal call.\n3. **Preflight fetch**: For each IID, call `get_issue_by_iid` / `get_mr_by_iid` (bd-159p) to confirm the entity exists on GitLab and capture `updated_at` for TOCTOU.\n4. **Check cancellation**: `if signal.is_cancelled() { recorder.fail(...); return Ok(result); }`\n5. **Acquire lock**: `AppLock::new(conn, LockOptions { name: \"surgical-sync\".into(), stale_lock_minutes: config.sync.stale_lock_minutes, heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds })`. Lock must `acquire(force)` and `release()` on all exit paths.\n6. **Ingest with TOCTOU**: For each preflight entity, call surgical ingest (bd-3sez). Compare DB `updated_at` with preflight `updated_at`; skip if already current. Record outcome in `EntitySyncResult`.\n7. **Inline dependents**: For ingested entities, fetch discussions, resource events (if `config.sync.fetch_resource_events`), MR diffs (if `config.sync.fetch_mr_file_changes`). Use `config.sync.requests_per_second` for rate limiting.\n8. **Scoped docs**: Call `run_generate_docs_for_sources()` (bd-hs6j) with only the affected entity source IDs.\n9. **Scoped embed**: Call `run_embed_for_document_ids()` (bd-1elx) with only the regenerated document IDs.\n10. **Finalize**: `recorder.succeed(conn, &metrics, total_items, total_errors)`.\n11. **Build SyncResult**: Populate surgical fields (bd-wcja): `surgical_mode: Some(true)`, `surgical_iids`, `entity_results`, `preflight_only`.\n\nIf `options.preflight_only` is set, return after step 3 with the preflight data and skip steps 4-10.\n\nProgress output uses `stage_spinner_v2(icon, label, msg, robot_mode)` from `src/cli/progress.rs` line 18 during execution, and `format_stage_line(icon, label, summary, elapsed)` from `src/cli/progress.rs` line 67 for completion lines. Stage icons via `Icons::sync()` from `src/cli/render.rs` line 208. Error completion uses `color_icon(icon, has_errors)` from `src/cli/commands/sync.rs` line 55.\n\n## Acceptance Criteria\n\n1. `run_sync_surgical` compiles and runs the full pipeline for 1+ issue IIDs\n2. Preflight-only mode returns early with fetched entity data, no DB writes beyond recorder\n3. TOCTOU: entities whose DB `updated_at` matches preflight `updated_at` are skipped with `skipped_toctou` outcome\n4. Cancellation at any stage between preflight and ingest stops processing, calls `recorder.fail()`\n5. Lock is acquired before ingest and released on all exit paths (success, error, cancellation)\n6. `SyncResult` surgical fields are populated: `surgical_mode`, `surgical_iids`, `entity_results`\n7. Robot mode produces valid JSON with per-entity outcomes\n8. Human mode shows stage spinners and completion lines\n\n## Files\n\n- `src/cli/commands/sync_surgical.rs` — new file, main orchestration function\n- `src/cli/commands/mod.rs` — add `pub mod sync_surgical;`\n\n## TDD Anchor\n\nTests in `src/cli/commands/sync_surgical.rs` or a companion `sync_surgical_tests.rs`:\n\n```rust\n#[cfg(test)]\nmod tests {\n use super::*;\n use crate::core::db::{create_connection, run_migrations};\n use std::path::Path;\n use wiremock::{MockServer, Mock, ResponseTemplate};\n use wiremock::matchers::{method, path_regex};\n\n fn test_config(mock_url: &str) -> Config {\n let mut config = Config::default();\n config.gitlab.url = mock_url.to_string();\n config.gitlab.token = \"test-token\".to_string();\n config\n }\n\n fn setup_db() -> rusqlite::Connection {\n let conn = create_connection(Path::new(\":memory:\")).unwrap();\n run_migrations(&conn).unwrap();\n // Insert test project\n conn.execute(\n \"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)\n VALUES (1, 'group/project', 'https://gitlab.example.com/group/project')\",\n [],\n ).unwrap();\n conn\n }\n\n #[tokio::test]\n async fn surgical_sync_single_issue_end_to_end() {\n let server = MockServer::start().await;\n // Mock: GET /projects/:id/issues?iids[]=7 returns one issue\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(\n serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])\n ))\n .mount(&server).await;\n // Mock discussions endpoint\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/discussions\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync_surgical(&config, options, Some(\"test01\"), &signal).await.unwrap();\n\n assert_eq!(result.surgical_mode, Some(true));\n assert_eq!(result.surgical_iids.as_ref().unwrap().issues, vec![7]);\n let entities = result.entity_results.as_ref().unwrap();\n assert_eq!(entities.len(), 1);\n assert_eq!(entities[0].outcome, \"synced\");\n }\n\n #[tokio::test]\n async fn preflight_only_returns_early() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n preflight_only: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync_surgical(&config, options, Some(\"test02\"), &signal).await.unwrap();\n\n assert_eq!(result.preflight_only, Some(true));\n assert_eq!(result.issues_updated, 0); // No actual ingest happened\n }\n\n #[tokio::test]\n async fn cancellation_before_ingest_fails_recorder() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issues: vec![7],\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n signal.cancel(); // Cancel before we start\n let result = run_sync_surgical(&config, options, Some(\"test03\"), &signal).await.unwrap();\n\n // Result should indicate cancellation\n assert_eq!(result.issues_updated, 0);\n }\n}\n```\n\n## Edge Cases\n\n- **Entity not found on GitLab**: Preflight returns 404 for an IID. Record `EntitySyncResult { outcome: \"not_found\" }` and continue with remaining IIDs.\n- **All entities skipped by TOCTOU**: Every entity's `updated_at` matches DB. Result has `entity_results` with all `skipped_toctou`, zero actual sync work.\n- **Mixed success/failure**: Some IIDs succeed, some fail. All recorded in `entity_results`. Function returns `Ok` with partial results, not `Err`.\n- **SyncRunRecorder consume semantics**: `succeed()` and `fail()` take `self` by value. The orchestrator must ensure exactly one terminal call. Use an `Option` pattern: `let mut recorder = Some(recorder); ... recorder.take().unwrap().succeed(...)`.\n- **Lock contention**: If another sync holds the lock and `force` is false, fail with clear error before any ingest.\n- **Empty IID lists**: If both `options.issues` and `options.merge_requests` are empty, return immediately with default `SyncResult` (no surgical fields set).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-wcja (SyncResult fields), bd-1lja (SyncOptions extensions), bd-159p (get_by_iid client methods), bd-3sez (surgical ingest/preflight/TOCTOU), bd-kanh (per-entity helpers), bd-arka (SyncRunRecorder surgical methods), bd-1elx (scoped embed), bd-hs6j (scoped docs), bd-tiux (migration 027)\n- **Blocks (downstream)**: bd-3bec (wiring into run_sync), bd-3jqx (integration tests)\n- This is the keystone bead — it consumes all upstream primitives and is consumed by the final wiring and integration test beads.","status":"open","priority":1,"issue_type":"task","created_at":"2026-02-17T19:17:24.197299Z","created_by":"tayloreernisse","updated_at":"2026-02-17T20:03:01.815253Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"],"dependencies":[{"issue_id":"bd-1i4i","depends_on_id":"bd-3bec","type":"blocks","created_at":"2026-02-17T19:19:25.301385Z","created_by":"tayloreernisse"}]} +{"id":"bd-1i4i","title":"Implement run_sync_surgical orchestration function","description":"## Background\n\nThe surgical sync pipeline needs a top-level orchestration function that coordinates the full pipeline for syncing specific IIDs. Unlike `run_sync` (lines 63-360 of `src/cli/commands/sync.rs`) which syncs all projects and all entities, `run_sync_surgical` targets specific issues/MRs by IID within a single project. The pipeline stages are: resolve project, record sync run, preflight fetch, check cancellation, acquire lock, ingest with TOCTOU guards, inline dependent enrichment (discussions, events, diffs), scoped doc regeneration, scoped embedding, finalize recorder, and build `SyncResult`.\n\n## Approach\n\nCreate `pub async fn run_sync_surgical()` in a new file `src/cli/commands/sync_surgical.rs`. Signature:\n\n```rust\npub async fn run_sync_surgical(\n config: &Config,\n options: SyncOptions,\n run_id: Option<&str>,\n signal: &ShutdownSignal,\n) -> Result\n```\n\nThe function reads `options.issue_iids` and `options.mr_iids` (added by bd-1lja) to determine target IIDs. Pipeline:\n\n1. **Resolve project**: Call `resolve_project(conn, project_str)` from `src/core/project.rs` to get `gitlab_project_id`.\n2. **Start recorder**: `SyncRunRecorder::start(&recorder_conn, \"surgical-sync\", run_id)`. Note: `succeed()` and `fail()` consume `self`, so control flow must ensure exactly one terminal call.\n3. **Preflight fetch**: For each IID, call `get_issue_by_iid` / `get_mr_by_iid` (bd-159p) to confirm the entity exists on GitLab and capture `updated_at` for TOCTOU.\n4. **Check cancellation**: `if signal.is_cancelled() { recorder.fail(...); return Ok(result); }`\n5. **Acquire lock**: `AppLock::new(conn, LockOptions { name: \"surgical-sync\".into(), stale_lock_minutes: config.sync.stale_lock_minutes, heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds })`. Lock must `acquire(force)` and `release()` on all exit paths.\n6. **Ingest with TOCTOU**: For each preflight entity, call surgical ingest (bd-3sez). Compare DB `updated_at` with preflight `updated_at`; skip if already current. Record outcome in `EntitySyncResult`.\n7. **Inline dependents**: For ingested entities, fetch discussions, resource events (if `config.sync.fetch_resource_events`), MR diffs (if `config.sync.fetch_mr_file_changes`). Use `config.sync.requests_per_second` for rate limiting.\n8. **Scoped docs**: Call `run_generate_docs_for_sources()` (bd-hs6j) with only the affected entity source IDs.\n9. **Scoped embed**: Call `run_embed_for_document_ids()` (bd-1elx) with only the regenerated document IDs.\n10. **Finalize**: `recorder.succeed(conn, &metrics, total_items, total_errors)`.\n11. **Build SyncResult**: Populate surgical fields (bd-wcja): `surgical_mode: Some(true)`, `surgical_iids`, `entity_results`, `preflight_only`.\n\nIf `options.preflight_only` is set, return after step 3 with the preflight data and skip steps 4-10.\n\nProgress output uses `stage_spinner_v2(icon, label, msg, robot_mode)` from `src/cli/progress.rs` line 18 during execution, and `format_stage_line(icon, label, summary, elapsed)` from `src/cli/progress.rs` line 67 for completion lines. Stage icons via `Icons::sync()` from `src/cli/render.rs` line 208. Error completion uses `color_icon(icon, has_errors)` from `src/cli/commands/sync.rs` line 55.\n\n## Acceptance Criteria\n\n1. `run_sync_surgical` compiles and runs the full pipeline for 1+ issue IIDs\n2. Preflight-only mode returns early with fetched entity data, no DB writes beyond recorder\n3. TOCTOU: entities whose DB `updated_at` matches preflight `updated_at` are skipped with `skipped_toctou` outcome\n4. Cancellation at any stage between preflight and ingest stops processing, calls `recorder.fail()`\n5. Lock is acquired before ingest and released on all exit paths (success, error, cancellation)\n6. `SyncResult` surgical fields are populated: `surgical_mode`, `surgical_iids`, `entity_results`\n7. Robot mode produces valid JSON with per-entity outcomes\n8. Human mode shows stage spinners and completion lines\n\n## Files\n\n- `src/cli/commands/sync_surgical.rs` — new file, main orchestration function\n- `src/cli/commands/mod.rs` — add `pub mod sync_surgical;`\n\n## TDD Anchor\n\nTests in `src/cli/commands/sync_surgical.rs` or a companion `sync_surgical_tests.rs`:\n\n```rust\n#[cfg(test)]\nmod tests {\n use super::*;\n use crate::core::db::{create_connection, run_migrations};\n use std::path::Path;\n use wiremock::{MockServer, Mock, ResponseTemplate};\n use wiremock::matchers::{method, path_regex};\n\n fn test_config(mock_url: &str) -> Config {\n let mut config = Config::default();\n config.gitlab.url = mock_url.to_string();\n config.gitlab.token = \"test-token\".to_string();\n config\n }\n\n fn setup_db() -> rusqlite::Connection {\n let conn = create_connection(Path::new(\":memory:\")).unwrap();\n run_migrations(&conn).unwrap();\n // Insert test project\n conn.execute(\n \"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)\n VALUES (1, 'group/project', 'https://gitlab.example.com/group/project')\",\n [],\n ).unwrap();\n conn\n }\n\n #[tokio::test]\n async fn surgical_sync_single_issue_end_to_end() {\n let server = MockServer::start().await;\n // Mock: GET /projects/:id/issues?iids[]=7 returns one issue\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(\n serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])\n ))\n .mount(&server).await;\n // Mock discussions endpoint\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/discussions\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issue_iids: vec![7],\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync_surgical(&config, options, Some(\"test01\"), &signal).await.unwrap();\n\n assert_eq!(result.surgical_mode, Some(true));\n assert_eq!(result.surgical_iids.as_ref().unwrap().issues, vec![7]);\n let entities = result.entity_results.as_ref().unwrap();\n assert_eq!(entities.len(), 1);\n assert_eq!(entities[0].outcome, \"synced\");\n }\n\n #[tokio::test]\n async fn preflight_only_returns_early() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issue_iids: vec![7],\n preflight_only: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync_surgical(&config, options, Some(\"test02\"), &signal).await.unwrap();\n\n assert_eq!(result.preflight_only, Some(true));\n assert_eq!(result.issues_updated, 0); // No actual ingest happened\n }\n\n #[tokio::test]\n async fn cancellation_before_ingest_fails_recorder() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n robot_mode: true,\n issue_iids: vec![7],\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n signal.cancel(); // Cancel before we start\n let result = run_sync_surgical(&config, options, Some(\"test03\"), &signal).await.unwrap();\n\n // Result should indicate cancellation\n assert_eq!(result.issues_updated, 0);\n }\n}\n```\n\n## Edge Cases\n\n- **Entity not found on GitLab**: Preflight returns 404 for an IID. Record `EntitySyncResult { outcome: \"not_found\" }` and continue with remaining IIDs.\n- **All entities skipped by TOCTOU**: Every entity's `updated_at` matches DB. Result has `entity_results` with all `skipped_toctou`, zero actual sync work.\n- **Mixed success/failure**: Some IIDs succeed, some fail. All recorded in `entity_results`. Function returns `Ok` with partial results, not `Err`.\n- **SyncRunRecorder consume semantics**: `succeed()` and `fail()` take `self` by value. The orchestrator must ensure exactly one terminal call. Use an `Option` pattern: `let mut recorder = Some(recorder); ... recorder.take().unwrap().succeed(...)`.\n- **Lock contention**: If another sync holds the lock and `force` is false, fail with clear error before any ingest.\n- **Empty IID lists**: If both `options.issue_iids` and `options.mr_iids` are empty, return immediately with default `SyncResult` (no surgical fields set).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-wcja (SyncResult fields), bd-1lja (SyncOptions extensions), bd-159p (get_by_iid client methods), bd-3sez (surgical ingest/preflight/TOCTOU), bd-kanh (per-entity helpers), bd-arka (SyncRunRecorder surgical methods), bd-1elx (scoped embed), bd-hs6j (scoped docs), bd-tiux (migration 027)\n- **Blocks (downstream)**: bd-3bec (wiring into run_sync), bd-3jqx (integration tests)\n- This is the keystone bead — it consumes all upstream primitives and is consumed by the final wiring and integration test beads.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-17T19:17:24.197299Z","created_by":"tayloreernisse","updated_at":"2026-02-18T20:36:39.596508Z","closed_at":"2026-02-18T20:36:39.596455Z","close_reason":"run_sync_surgical orchestrator: 719-line pipeline with preflight/TOCTOU/ingest/dependents/docs/embed stages, Option pattern, graceful embed failures","compaction_level":0,"original_size":0,"labels":["surgical-sync"],"dependencies":[{"issue_id":"bd-1i4i","depends_on_id":"bd-3bec","type":"blocks","created_at":"2026-02-17T19:19:25.301385Z","created_by":"tayloreernisse"}]} {"id":"bd-1j1","title":"Integration test: full Phase B sync pipeline","description":"## Background\n\nThis integration test proves the full Phase B sync pipeline works end-to-end. Since Gates 1 and 2 are already implemented and closed, this test validates that the complete pipeline — including Gate 4 mr_diffs draining — works together.\n\n## Codebase Context\n\n- **Gates 1-2 FULLY IMPLEMENTED (CLOSED):** resource events fetch, closes_issues API, system note parsing (note_parser.rs), entity_references extraction (references.rs)\n- **Gate 4 in progress:** migration 016 (mr_file_changes), fetch_mr_diffs, drain_mr_diffs — already wired in orchestrator (lines 708-726, 1514+)\n- **26 migrations exist** (001-026). LATEST_SCHEMA_VERSION = 26. In-memory DB must run all 26.\n- Orchestrator has drain_resource_events() (line 932), drain_mr_closes_issues() (line 1254), and drain_mr_diffs() (line 1514).\n- wiremock crate used in existing tests (check dev-dependencies in Cargo.toml)\n- src/core/dependent_queue.rs: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- IngestProjectResult and IngestMrProjectResult track counts for all drain phases\n\n## Approach\n\nCreate tests/phase_b_integration.rs:\n\n### Test Setup\n\n1. In-memory SQLite DB with all 26 migrations (001-026)\n2. wiremock mock server with:\n - /api/v4/projects/:id/issues — 2 test issues\n - /api/v4/projects/:id/merge_requests — 1 test MR\n - /api/v4/projects/:id/issues/:iid/resource_state_events — state events\n - /api/v4/projects/:id/issues/:iid/resource_label_events — label events\n - /api/v4/projects/:id/merge_requests/:iid/resource_state_events — merge event with source_merge_request_iid\n - /api/v4/projects/:id/merge_requests/:iid/closes_issues — linked issues\n - /api/v4/projects/:id/merge_requests/:iid/diffs — file changes\n - /api/v4/projects/:id/issues/:iid/discussions — discussion with system note \"mentioned in !1\"\n3. Config with fetch_resource_events=true and fetch_mr_file_changes=true\n4. Use dependent_concurrency=1 to avoid timing issues\n\n### Test Flow\n\n```rust\n#[tokio::test]\nasync fn test_full_phase_b_pipeline() {\n // 1. Set up mock server + DB with all 26 migrations\n // 2. Run ingest issues + MRs (orchestrator functions)\n // 3. Verify pending_dependent_fetches enqueued: resource_events, mr_closes_issues, mr_diffs\n // 4. Drain all dependent fetch queues\n // 5. Assert: resource_state_events populated (count > 0)\n // 6. Assert: resource_label_events populated (count > 0)\n // 7. Assert: entity_references has closes ref with source_method='api'\n // 8. Assert: entity_references has mentioned ref with source_method='note_parse'\n // 9. Assert: mr_file_changes populated from diffs API\n // 10. Assert: pending_dependent_fetches fully drained (no stuck locks)\n}\n```\n\n### Assertions (SQL)\n\n```sql\nSELECT COUNT(*) FROM resource_state_events -- > 0\nSELECT COUNT(*) FROM resource_label_events -- > 0\nSELECT COUNT(*) FROM entity_references WHERE reference_type = 'closes' AND source_method = 'api' -- >= 1\nSELECT COUNT(*) FROM entity_references WHERE source_method = 'note_parse' -- >= 1\nSELECT COUNT(*) FROM mr_file_changes -- > 0\nSELECT COUNT(*) FROM pending_dependent_fetches WHERE locked_at IS NOT NULL -- = 0\n```\n\n## Acceptance Criteria\n\n- [ ] Test creates DB with all 26 migrations, mocks, and runs full pipeline\n- [ ] resource_state_events and resource_label_events populated\n- [ ] entity_references has closes ref (source_method='api') and mentioned ref (source_method='note_parse')\n- [ ] mr_file_changes populated from diffs mock\n- [ ] pending_dependent_fetches fully drained (no stuck locks, no retryable jobs)\n- [ ] Test runs in < 10 seconds\n- [ ] `cargo test --test phase_b_integration` passes\n\n## Files\n\n- CREATE: tests/phase_b_integration.rs\n\n## TDD Anchor\n\nRED: Write test with all assertions — should pass if all Gates are wired correctly.\n\nGREEN: If anything fails, it indicates a missing orchestrator connection — fix the wiring.\n\nVERIFY: cargo test --test phase_b_integration -- --nocapture\n\n## Edge Cases\n\n- Paginated mock responses: include Link header for multi-page responses\n- Empty pages: verify graceful handling\n- Use dependent_concurrency=1 to avoid timing issues in test environment\n- Stale lock reclaim: test that locks older than stale_lock_minutes are reclaimed\n- If Gate 4 drain_mr_diffs is not fully wired yet, the mr_file_changes assertion will fail — this is the intended RED signal\n\n## Dependency Context\n\n- **bd-8t4 (resource_state_events extraction)**: CLOSED. Provides drain_resource_events() which populates resource_state_events and resource_label_events tables.\n- **bd-3ia (closes_issues)**: CLOSED. Provides drain_mr_closes_issues() which populates entity_references with reference_type='closes', source_method='api'.\n- **bd-1ji (note parsing)**: CLOSED. Provides note_parser.rs which extracts \"mentioned in !N\" patterns and stores as entity_references with source_method='note_parse'.\n- **dependent_queue.rs**: Provides the claim/complete/fail lifecycle. All three drain functions use this.\n- **orchestrator.rs**: Contains all drain functions. drain_mr_diffs() at line 1514+ populates mr_file_changes.","status":"open","priority":3,"issue_type":"task","created_at":"2026-02-02T22:42:26.355071Z","created_by":"tayloreernisse","updated_at":"2026-02-17T16:52:30.970742Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1j1","depends_on_id":"bd-1ji","type":"blocks","created_at":"2026-02-02T22:43:27.941002Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-1se","type":"parent-child","created_at":"2026-02-02T22:43:40.577709Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T22:43:28.048311Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j1","depends_on_id":"bd-8t4","type":"blocks","created_at":"2026-02-02T22:43:27.996061Z","created_by":"tayloreernisse"}]} {"id":"bd-1j5o","title":"Verification: quality gates, query plan check, real-world validation","description":"## Background\n\nPost-implementation verification checkpoint. Runs after all code beads complete to validate the full scoring model works correctly against real data, not just test fixtures.\n\n## Approach\n\nExecute 8 verification steps in order. Each step has a binary pass/fail outcome.\n\n### Step 1: Compiler check\n```bash\ncargo check --all-targets\n```\nPass: exit 0\n\n### Step 2: Clippy\n```bash\ncargo clippy --all-targets -- -D warnings\n```\nPass: exit 0\n\n### Step 3: Formatting\n```bash\ncargo fmt --check\n```\nPass: exit 0\n\n### Step 4: Test suite\n```bash\ncargo test -p lore\n```\nPass: all tests green, including 31 new decay/scoring tests\n\n### Step 5: UBS scan\n```bash\nubs src/cli/commands/who.rs src/core/config.rs src/core/db.rs\n```\nPass: exit 0\n\n### Step 6: Query plan verification (manual)\nRun against real database:\n```bash\ncargo run --release -- who --path MeasurementQualityDialog.tsx -vvv 2>&1 | grep -i \"query plan\"\n```\nOr use sqlite3 CLI with EXPLAIN QUERY PLAN on the expert SQL (both exact and prefix modes).\n\nPass criteria (6 checks):\n- matched_notes_raw branch 1 uses existing new_path index\n- matched_notes_raw branch 2 uses idx_notes_old_path_author\n- matched_file_changes_raw uses idx_mfc_new_path_project_mr and idx_mfc_old_path_project_mr\n- reviewer_participation uses idx_notes_diffnote_discussion_author\n- mr_activity CTE joins merge_requests via primary key from matched_file_changes\n- Path resolution probes (old_path leg) use idx_notes_old_path_project_created\nDocument observed plan as SQL comment near the CTE.\n\n### Step 7: Performance baseline (manual)\n```bash\ntime cargo run --release -- who --path MeasurementQualityDialog.tsx\ntime cargo run --release -- who --path src/\ntime cargo run --release -- who --path Dialog.tsx\n```\nPass criteria (soft SLOs):\n- Exact path: p95 < 200ms\n- Prefix: p95 < 300ms\n- Suffix: p95 < 500ms\nRecord timings as SQL comment for future regression reference.\n\n### Step 8: Real-world validation\n```bash\ncargo run --release -- who --path MeasurementQualityDialog.tsx\ncargo run --release -- who --path MeasurementQualityDialog.tsx --explain-score\ncargo run --release -- who --path MeasurementQualityDialog.tsx --as-of 2025-06-01\ncargo run --release -- who --path MeasurementQualityDialog.tsx --all-history\n```\nPass criteria:\n- [ ] Recency discounting visible (recent authors rank above old reviewers)\n- [ ] --explain-score components sum to total (within f64 tolerance)\n- [ ] --as-of produces identical results on repeated runs\n- [ ] Assigned-only reviewers rank below participated reviewers on same MR\n- [ ] Known renamed file path resolves and credits old expertise\n- [ ] LGTM-only reviewers classified as assigned-only\n- [ ] Closed MRs at ~50% contribution visible via --explain-score\n\n## Acceptance Criteria\n- [ ] Steps 1-5 pass (exit 0)\n- [ ] Step 6: query plan documented with all 6 index usage points confirmed\n- [ ] Step 7: timing baselines recorded\n- [ ] Step 8: all 7 real-world checks pass\n\n## Files\n- All files modified by child beads (read-only verification)\n- Add SQL comments near CTE with observed EXPLAIN QUERY PLAN output\n\n## Edge Cases\n- SQLite planner may choose different plans across versions — document version\n- Timing varies by hardware — record machine specs alongside baselines\n- Real DB may have NULL merged_at on old MRs — state-aware fallback handles this","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-09T17:00:59.287720Z","created_by":"tayloreernisse","updated_at":"2026-02-12T20:43:04.415816Z","closed_at":"2026-02-12T20:43:04.415772Z","close_reason":"Implemented by time-decay swarm: 3 agents, 12 tasks, 621 tests passing, all quality gates green","compaction_level":0,"original_size":0,"labels":["scoring"],"dependencies":[{"issue_id":"bd-1j5o","depends_on_id":"bd-1b50","type":"blocks","created_at":"2026-02-09T17:01:11.693095Z","created_by":"tayloreernisse"},{"issue_id":"bd-1j5o","depends_on_id":"bd-1vti","type":"blocks","created_at":"2026-02-09T17:01:11.600519Z","created_by":"tayloreernisse"}]} {"id":"bd-1je","title":"Implement pending discussion queue","description":"## Background\nThe pending discussion queue tracks discussions that need to be fetched from GitLab. When an issue or MR is updated, its discussions may need re-fetching. This queue is separate from dirty_sources (which tracks entities needing document regeneration) — it tracks entities needing API calls to GitLab. The queue uses the same backoff pattern as dirty_sources for consistency.\n\n## Approach\nCreate `src/ingestion/discussion_queue.rs`:\n\n```rust\nuse crate::core::backoff::compute_next_attempt_at;\n\n/// Noteable type for discussion queue.\n#[derive(Debug, Clone, Copy)]\npub enum NoteableType {\n Issue,\n MergeRequest,\n}\n\nimpl NoteableType {\n pub fn as_str(&self) -> &'static str {\n match self {\n Self::Issue => \"Issue\",\n Self::MergeRequest => \"MergeRequest\",\n }\n }\n}\n\npub struct PendingFetch {\n pub project_id: i64,\n pub noteable_type: NoteableType,\n pub noteable_iid: i64,\n pub attempt_count: i32,\n}\n\n/// Queue a discussion fetch. ON CONFLICT DO UPDATE resets backoff (consistent with dirty_sources).\npub fn queue_discussion_fetch(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n) -> Result<()>;\n\n/// Get next batch of pending fetches (WHERE next_attempt_at IS NULL OR <= now).\npub fn get_pending_fetches(conn: &Connection, limit: usize) -> Result>;\n\n/// Mark fetch complete (remove from queue).\npub fn complete_fetch(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n) -> Result<()>;\n\n/// Record fetch error with backoff.\npub fn record_fetch_error(\n conn: &Connection,\n project_id: i64,\n noteable_type: NoteableType,\n noteable_iid: i64,\n error: &str,\n) -> Result<()>;\n```\n\n## Acceptance Criteria\n- [ ] queue_discussion_fetch uses ON CONFLICT DO UPDATE (consistent with dirty_sources pattern)\n- [ ] Re-queuing resets: attempt_count=0, next_attempt_at=NULL, last_error=NULL\n- [ ] get_pending_fetches respects next_attempt_at backoff\n- [ ] get_pending_fetches returns entries ordered by queued_at ASC\n- [ ] complete_fetch removes entry from queue\n- [ ] record_fetch_error increments attempt_count, computes next_attempt_at via shared backoff\n- [ ] NoteableType.as_str() returns \"Issue\" or \"MergeRequest\" (matches DB CHECK constraint)\n- [ ] `cargo test discussion_queue` passes\n\n## Files\n- `src/ingestion/discussion_queue.rs` — new file\n- `src/ingestion/mod.rs` — add `pub mod discussion_queue;`\n\n## TDD Loop\nRED: Tests in `#[cfg(test)] mod tests`:\n- `test_queue_and_get` — queue entry, get returns it\n- `test_requeue_resets_backoff` — queue, error, re-queue -> attempt_count=0\n- `test_backoff_respected` — entry with future next_attempt_at not returned\n- `test_complete_removes` — complete_fetch removes entry\n- `test_error_increments_attempts` — error -> attempt_count=1, next_attempt_at set\nGREEN: Implement all functions\nVERIFY: `cargo test discussion_queue`\n\n## Edge Cases\n- Queue same (project_id, noteable_type, noteable_iid) twice: ON CONFLICT resets state\n- NoteableType must match DB CHECK constraint exactly (\"Issue\", \"MergeRequest\" — capitalized)\n- Empty queue: get_pending_fetches returns empty Vec","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:27:09.505548Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:31:35.496454Z","closed_at":"2026-01-30T17:31:35.496405Z","close_reason":"Implemented discussion_queue with queue/get/complete/record_error + 6 tests","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1je","depends_on_id":"bd-hrs","type":"blocks","created_at":"2026-01-30T15:29:35.034753Z","created_by":"tayloreernisse"},{"issue_id":"bd-1je","depends_on_id":"bd-mem","type":"blocks","created_at":"2026-01-30T15:29:35.071573Z","created_by":"tayloreernisse"}]} @@ -60,7 +60,7 @@ {"id":"bd-1n5q","title":"lore brief: situational awareness for topic/module/person","description":"## Background\nComposable capstone command. An agent says \"I am about to work on auth\" and gets everything in one call: open issues, active MRs, experts, recent activity, unresolved threads, related context. Replaces 5 separate lore calls with 1.\n\n## Input Modes\n1. Topic: `lore brief 'authentication'` — FTS search to find relevant entities, aggregate\n2. Path: `lore brief --path src/auth/` — who expert internals for path expertise\n3. Person: `lore brief --person teernisse` — who workload internals\n4. Entity: `lore brief issues 3864` — single entity focus with cross-references\n\n## Section Assembly Architecture\n\n### Reuse existing run_* functions (ship faster, recommended for v1)\nEach section calls existing CLI command functions and converts their output.\n\nIMPORTANT: All existing run_* functions take `&Config`, NOT `&Connection`. The Config contains the db_path and each function opens its own connection internally.\n\n```rust\n// In src/cli/commands/brief.rs\n\nuse crate::cli::commands::list::{run_list_issues, run_list_mrs, ListFilters, MrListFilters};\nuse crate::cli::commands::who::{run_who, WhoArgs, WhoMode};\nuse crate::core::config::Config;\n\npub async fn run_brief(config: &Config, args: BriefArgs) -> Result {\n let mut sections_computed = Vec::new();\n\n // 1. open_issues: reuse list.rs\n // Signature: pub fn run_list_issues(config: &Config, filters: ListFilters) -> Result\n // Located at src/cli/commands/list.rs:268\n let open_issues = run_list_issues(config, ListFilters {\n state: Some(\"opened\".into()),\n limit: Some(5),\n project: args.project.clone(),\n // ... scope by topic/path/person based on mode\n ..Default::default()\n })?;\n sections_computed.push(\"open_issues\");\n\n // 2. active_mrs: reuse list.rs\n // Signature: pub fn run_list_mrs(config: &Config, filters: MrListFilters) -> Result\n // Located at src/cli/commands/list.rs:476\n let active_mrs = run_list_mrs(config, MrListFilters {\n state: Some(\"opened\".into()),\n limit: Some(5),\n project: args.project.clone(),\n ..Default::default()\n })?;\n sections_computed.push(\"active_mrs\");\n\n // 3. experts: reuse who.rs\n // Signature: pub fn run_who(config: &Config, args: &WhoArgs) -> Result\n // Located at src/cli/commands/who.rs:276\n let experts = run_who(config, &WhoArgs {\n mode: WhoMode::Expert,\n path: args.path.clone(),\n limit: Some(3),\n ..Default::default()\n })?;\n sections_computed.push(\"experts\");\n\n // 4. recent_activity: reuse timeline internals\n // The timeline pipeline is 5-stage (SEED->HYDRATE->EXPAND->COLLECT->RENDER)\n // Types in src/core/timeline.rs, seed in src/core/timeline_seed.rs\n // ...etc\n}\n```\n\nNOTE: ListFilters and MrListFilters may not implement Default. Check before using `..Default::default()`. If they don't, derive it or construct all fields explicitly.\n\n### Concrete Function References (src/cli/commands/)\n| Module | Function | Signature | Line |\n|--------|----------|-----------|------|\n| list.rs | run_list_issues | `(config: &Config, filters: ListFilters) -> Result` | 268 |\n| list.rs | run_list_mrs | `(config: &Config, filters: MrListFilters) -> Result` | 476 |\n| who.rs | run_who | `(config: &Config, args: &WhoArgs) -> Result` | 276 |\n| search.rs | run_search | `(config: &Config, query: &str, cli_filters: SearchCliFilters, fts_mode: FtsQueryMode, requested_mode: &str, explain: bool) -> Result` | 61 |\n\nNOTE: run_search is currently synchronous (pub fn, not pub async fn). If bd-1ksf ships first, it becomes async. Brief should handle both cases — call `.await` if async, direct call if sync.\n\n### Section Details\n| Section | Source | Limit | Fallback |\n|---------|--------|-------|----------|\n| open_issues | list.rs with state=opened | 5 | empty array |\n| active_mrs | list.rs with state=opened | 5 | empty array |\n| experts | who.rs Expert mode | 3 | empty array (no path data) |\n| recent_activity | timeline pipeline | 10 events | empty array |\n| unresolved_threads | SQL: discussions WHERE resolved=false | 5 | empty array |\n| related | search_vector() via bd-8con | 5 | omit section (no embeddings) |\n| warnings | computed from dates/state | all | empty array |\n\n### Warning Generation\n```rust\nfn compute_warnings(issues: &[IssueRow]) -> Vec {\n let now = chrono::Utc::now();\n issues.iter().filter_map(|i| {\n let updated = parse_timestamp(i.updated_at)?;\n let days_stale = (now - updated).num_days();\n if days_stale > 30 {\n Some(format!(\"Issue #{} has no activity for {} days\", i.iid, days_stale))\n } else { None }\n }).chain(\n issues.iter().filter(|i| i.assignees.is_empty())\n .map(|i| format!(\"Issue #{} is unassigned\", i.iid))\n ).collect()\n}\n```\n\n## Robot Mode Output Schema\n```json\n{\n \"ok\": true,\n \"data\": {\n \"mode\": \"topic\",\n \"query\": \"authentication\",\n \"summary\": \"3 open issues, 2 active MRs, top expert: teernisse\",\n \"open_issues\": [{ \"iid\": 123, \"title\": \"...\", \"state\": \"opened\", \"assignees\": [...], \"updated_at\": \"...\", \"labels\": [...] }],\n \"active_mrs\": [{ \"iid\": 456, \"title\": \"...\", \"state\": \"opened\", \"author\": \"...\", \"draft\": false, \"updated_at\": \"...\" }],\n \"experts\": [{ \"username\": \"teernisse\", \"score\": 42, \"last_activity\": \"...\" }],\n \"recent_activity\": [{ \"timestamp\": \"...\", \"event_type\": \"state_change\", \"entity_ref\": \"issues#123\", \"summary\": \"...\", \"actor\": \"...\" }],\n \"unresolved_threads\": [{ \"discussion_id\": \"abc\", \"entity_ref\": \"issues#123\", \"started_by\": \"...\", \"note_count\": 5, \"last_note_at\": \"...\" }],\n \"related\": [{ \"iid\": 789, \"title\": \"...\", \"similarity_score\": 0.85 }],\n \"warnings\": [\"Issue #3800 has no activity for 45 days\"]\n },\n \"meta\": { \"elapsed_ms\": 1200, \"sections_computed\": [\"open_issues\", \"active_mrs\", \"experts\", \"recent_activity\"] }\n}\n```\n\n## Clap Registration\n```rust\n// In src/main.rs Commands enum, add:\nBrief {\n /// Free-text topic, entity type, or omit for project-wide brief\n query: Option,\n /// Focus on a file path (who expert mode)\n #[arg(long)]\n path: Option,\n /// Focus on a person (who workload mode)\n #[arg(long)]\n person: Option,\n /// Scope to project (fuzzy match)\n #[arg(short, long)]\n project: Option,\n /// Maximum items per section\n #[arg(long, default_value = \"5\")]\n section_limit: usize,\n},\n```\n\n## TDD Loop\nRED: Tests in src/cli/commands/brief.rs:\n- test_brief_topic_returns_all_sections: insert test data, search 'auth', assert all section keys present in response\n- test_brief_path_uses_who_expert: brief --path src/auth/, assert experts section populated\n- test_brief_person_uses_who_workload: brief --person user, assert open_issues filtered to user's assignments\n- test_brief_warnings_stale_issue: insert issue with updated_at > 30 days ago, assert warning generated\n- test_brief_token_budget: robot mode output for topic query is under 12000 bytes (~3000 tokens)\n- test_brief_no_embeddings_graceful: related section omitted (not errored) when no embeddings exist\n- test_brief_empty_topic: zero matches returns valid JSON with empty arrays + \"No data found\" summary\n\nGREEN: Implement brief with section assembly, calling existing run_* functions\n\nVERIFY:\n```bash\ncargo test brief:: && cargo clippy --all-targets -- -D warnings\ncargo run --release -- -J brief 'throw time' | jq '.data | keys'\ncargo run --release -- -J brief 'throw time' | wc -c # target <12000\n```\n\n## Acceptance Criteria\n- [ ] lore brief TOPIC returns all sections for free-text topic\n- [ ] lore brief --path PATH returns path-focused briefing with experts\n- [ ] lore brief --person USERNAME returns person-focused briefing\n- [ ] lore brief issues N returns entity-focused briefing\n- [ ] Robot mode output under 12000 bytes (~3000 tokens)\n- [ ] Each section degrades gracefully if its data source is unavailable\n- [ ] summary field is auto-generated one-liner from section counts\n- [ ] warnings detect: stale issues (>30d), unassigned, no due date\n- [ ] Performance: <2s total (acceptable since composing multiple queries)\n- [ ] Command registered in main.rs and robot-docs\n\n## Edge Cases\n- Topic with zero matches: return empty sections + \"No data found for this topic\" summary\n- Path that nobody has touched: experts empty, related may still have results\n- Person not found in DB: exit code 17 with suggestion\n- All sections empty: still return valid JSON with empty arrays\n- Very broad topic (\"the\"): may return too many results — each section respects its limit cap\n- ListFilters/MrListFilters may not derive Default — construct all fields explicitly if needed\n\n## Dependencies\n- Hybrid search (bd-1ksf) for topic relevance ranking\n- lore who (already shipped) for expertise\n- lore related (bd-8con) for semantic connections (BLOCKER — related section is core to the feature)\n- Timeline pipeline (already shipped) for recent activity\n\n## Dependency Context\n- **bd-1ksf (hybrid search)**: Provides `search_hybrid()` which brief uses for topic mode to find relevant entities. Without it, topic mode falls back to FTS-only via `search_fts()`.\n- **bd-8con (related)**: Provides `run_related()` which brief calls to populate the `related` section with semantically similar entities. This is a blocking dependency — the related section is a core differentiator.\n\n## Files to Create/Modify\n- NEW: src/cli/commands/brief.rs\n- src/cli/commands/mod.rs (add pub mod brief; re-export)\n- src/main.rs (register Brief subcommand in Commands enum, add handle_brief fn)\n- Reuse: list.rs, who.rs, timeline.rs, search.rs, show.rs internals","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-12T15:47:22.893231Z","created_by":"tayloreernisse","updated_at":"2026-02-12T16:31:33.752020Z","compaction_level":0,"original_size":0,"labels":["cli-imp","intelligence"],"dependencies":[{"issue_id":"bd-1n5q","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T15:47:22.898428Z","created_by":"tayloreernisse"},{"issue_id":"bd-1n5q","depends_on_id":"bd-1ksf","type":"blocks","created_at":"2026-02-12T15:47:52.084948Z","created_by":"tayloreernisse"},{"issue_id":"bd-1n5q","depends_on_id":"bd-8con","type":"blocks","created_at":"2026-02-12T15:47:52.152362Z","created_by":"tayloreernisse"}]} {"id":"bd-1nf","title":"Register 'lore timeline' command with all flags","description":"## Background\n\nThis bead wires the `lore timeline` command into the CLI — adding the subcommand to the Commands enum, defining all flags, registering in VALID_COMMANDS, and dispatching to the timeline handler. The actual query logic and rendering are in separate beads.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.1 (Command Design).\n\n## Codebase Context\n\n- Commands enum in `src/cli/mod.rs` (line ~86): uses #[derive(Subcommand)] with nested Args structs\n- VALID_COMMANDS in `src/main.rs` (line ~448): &[&str] array for fuzzy command matching\n- Handler dispatch in `src/main.rs` match on Commands:: variants\n- robot-docs manifest in `src/main.rs`: registers commands for `lore robot-docs` output\n- Existing pattern: `Sync(SyncArgs)`, `Search(SearchArgs)`, etc.\n- No timeline module exists yet — this bead creates the CLI entry point only\n\n## Approach\n\n### 1. TimelineArgs struct (`src/cli/mod.rs`):\n\n```rust\n/// Show a chronological timeline of events matching a query\n#[derive(Parser, Debug)]\npub struct TimelineArgs {\n /// Search query (keywords to find in issues, MRs, and discussions)\n pub query: String,\n\n /// Scope to a specific project (fuzzy match)\n #[arg(short = 'p', long)]\n pub project: Option,\n\n /// Only show events after this date (e.g. \"6m\", \"2w\", \"2024-01-01\")\n #[arg(long)]\n pub since: Option,\n\n /// Cross-reference expansion depth (0 = no expansion)\n #[arg(long, default_value = \"1\")]\n pub depth: usize,\n\n /// Also follow 'mentioned' edges during expansion (high fan-out)\n #[arg(long = \"expand-mentions\")]\n pub expand_mentions: bool,\n\n /// Maximum number of events to display\n #[arg(short = 'n', long = \"limit\", default_value = \"100\")]\n pub limit: usize,\n}\n```\n\n### 2. Commands enum variant:\n\n```rust\n/// Show a chronological timeline of events matching a query\n#[command(name = \"timeline\")]\nTimeline(TimelineArgs),\n```\n\n### 3. Handler in `src/main.rs`:\n\n```rust\nCommands::Timeline(args) => {\n // Placeholder: will be filled by bd-2f2 (human) and bd-dty (robot)\n // For now: resolve project, call timeline query, dispatch to renderer\n}\n```\n\n### 4. VALID_COMMANDS: add `\"timeline\"` to the array\n\n### 5. robot-docs: add timeline command description to manifest\n\n## Acceptance Criteria\n\n- [ ] `TimelineArgs` struct with all 6 flags: query, project, since, depth, expand-mentions, limit\n- [ ] Commands::Timeline variant registered in Commands enum\n- [ ] Handler stub in src/main.rs dispatches to timeline logic\n- [ ] `\"timeline\"` added to VALID_COMMANDS array\n- [ ] robot-docs manifest includes timeline command description\n- [ ] `lore timeline --help` shows correct help text\n- [ ] `lore timeline` without query shows error (query is required positional)\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/cli/mod.rs` (TimelineArgs struct + Commands::Timeline variant)\n- `src/main.rs` (handler dispatch + VALID_COMMANDS + robot-docs entry)\n\n## TDD Loop\n\nNo unit tests for CLI wiring. Verify with:\n\n```bash\ncargo check --all-targets\ncargo run -- timeline --help\n```\n\n## Edge Cases\n\n- --since parsing: reuse existing date parsing from ListFilters (src/cli/mod.rs handles \"7d\", \"2w\", \"YYYY-MM-DD\")\n- --depth 0: valid, means no cross-reference expansion\n- --expand-mentions: off by default because mentioned edges have high fan-out\n","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:28.422082Z","created_by":"tayloreernisse","updated_at":"2026-02-06T13:49:15.313047Z","closed_at":"2026-02-06T13:49:15.312993Z","close_reason":"Wired lore timeline command: TimelineArgs with 9 flags, Commands::Timeline variant, handle_timeline handler, VALID_COMMANDS entry, robot-docs manifest with temporal_intelligence workflow","compaction_level":0,"original_size":0,"labels":["cli","gate-3","phase-b"],"dependencies":[{"issue_id":"bd-1nf","depends_on_id":"bd-2f2","type":"blocks","created_at":"2026-02-02T21:33:37.746192Z","created_by":"tayloreernisse"},{"issue_id":"bd-1nf","depends_on_id":"bd-dty","type":"blocks","created_at":"2026-02-02T21:33:37.788079Z","created_by":"tayloreernisse"},{"issue_id":"bd-1nf","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:28.423399Z","created_by":"tayloreernisse"}]} {"id":"bd-1np","title":"[CP1] GitLab types for issues, discussions, notes","description":"## Background\n\nGitLab types define the Rust structs for deserializing GitLab API responses. These types are the foundation for all ingestion work - issues, discussions, and notes must be correctly typed for serde to parse them.\n\n## Approach\n\nAdd types to `src/gitlab/types.rs` with serde derives:\n\n### GitLabIssue\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabIssue {\n pub id: i64, // GitLab global ID\n pub iid: i64, // Project-scoped issue number\n pub project_id: i64,\n pub title: String,\n pub description: Option,\n pub state: String, // \"opened\" | \"closed\"\n pub created_at: String, // ISO 8601\n pub updated_at: String, // ISO 8601\n pub closed_at: Option,\n pub author: GitLabAuthor,\n pub labels: Vec, // Array of label names (CP1 canonical)\n pub web_url: String,\n}\n```\n\nNOTE: `labels_details` intentionally NOT modeled - varies across GitLab versions.\n\n### GitLabAuthor\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabAuthor {\n pub id: i64,\n pub username: String,\n pub name: String,\n}\n```\n\n### GitLabDiscussion\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabDiscussion {\n pub id: String, // String ID like \"6a9c1750b37d...\"\n pub individual_note: bool, // true = standalone comment\n pub notes: Vec,\n}\n```\n\n### GitLabNote\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabNote {\n pub id: i64,\n #[serde(rename = \"type\")]\n pub note_type: Option, // \"DiscussionNote\" | \"DiffNote\" | null\n pub body: String,\n pub author: GitLabAuthor,\n pub created_at: String, // ISO 8601\n pub updated_at: String, // ISO 8601\n pub system: bool, // true for system-generated notes\n #[serde(default)]\n pub resolvable: bool,\n #[serde(default)]\n pub resolved: bool,\n pub resolved_by: Option,\n pub resolved_at: Option,\n pub position: Option,\n}\n```\n\n### GitLabNotePosition\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabNotePosition {\n pub old_path: Option,\n pub new_path: Option,\n pub old_line: Option,\n pub new_line: Option,\n}\n```\n\n## Acceptance Criteria\n\n- [ ] GitLabIssue deserializes from API response JSON\n- [ ] GitLabAuthor embedded correctly in issue and note\n- [ ] GitLabDiscussion with notes array deserializes\n- [ ] GitLabNote handles null note_type (use Option)\n- [ ] GitLabNote uses #[serde(rename = \"type\")] for reserved keyword\n- [ ] resolvable/resolved default to false via #[serde(default)]\n- [ ] All timestamp fields are String (ISO 8601 parsed elsewhere)\n\n## Files\n\n- src/gitlab/types.rs (edit - add types)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/gitlab_types_tests.rs\n#[test] fn deserializes_gitlab_issue_from_json()\n#[test] fn deserializes_gitlab_discussion_from_json()\n#[test] fn handles_null_note_type()\n#[test] fn handles_missing_resolvable_field()\n#[test] fn deserializes_labels_as_string_array()\n```\n\nGREEN: Add type definitions with serde attributes\n\nVERIFY: `cargo test gitlab_types`\n\n## Edge Cases\n\n- note_type can be null, \"DiscussionNote\", or \"DiffNote\"\n- labels array can be empty\n- description can be null\n- resolved_by/resolved_at can be null\n- position is only present for DiffNotes","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.150472Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:17:08.842965Z","closed_at":"2026-01-25T22:17:08.842895Z","close_reason":"Implemented GitLabAuthor, GitLabIssue, GitLabDiscussion, GitLabNote, GitLabNotePosition types with 10 passing tests","compaction_level":0,"original_size":0} -{"id":"bd-1nsl","title":"Epic: Surgical Per-IID Sync","status":"open","priority":1,"issue_type":"epic","created_at":"2026-02-17T19:11:34.020453Z","created_by":"tayloreernisse","updated_at":"2026-02-17T19:11:34.023031Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} +{"id":"bd-1nsl","title":"Epic: Surgical Per-IID Sync","description":"## Background\n\nSurgical Per-IID Sync adds `lore sync --issue --mr -p ` for on-demand sync of specific entities without running the full project-wide pipeline. This is critical for agent workflows: when an agent needs fresh data for a specific issue or MR, waiting for a full sync (minutes) is unacceptable. Surgical sync completes in seconds by fetching only the targeted entities, their discussions, resource events, and dependent data, then scoping doc regeneration and embedding to only the affected documents.\n\n## Architecture\n\nThe pipeline stages mirror full sync but scoped to individual entities:\n\n```\nPREFLIGHT -> TOCTOU CHECK -> INGEST -> DEPENDENTS -> DOCS -> EMBED -> FINALIZE\n```\n\n- **Preflight**: Fetch entity from GitLab API by IID, confirm existence\n- **TOCTOU check**: Compare payload `updated_at` with DB — skip if already current\n- **Ingest**: Upsert entity via existing `process_single_issue`/`process_single_mr`\n- **Dependents**: Inline fetch of discussions, resource events, MR diffs, closes_issues\n- **Docs**: Scoped `regenerate_dirty_documents_for_sources()` — only affected source keys\n- **Embed**: Scoped `embed_documents_by_ids()` — only regenerated document IDs\n- **Finalize**: SyncRunRecorder with surgical mode columns\n\n## Children (Execution Order)\n\n### Foundation (no blockers, can parallelize)\n1. **bd-tiux** — Migration 027: surgical mode columns on sync_runs\n2. **bd-1sc6** — Error variant + pub(crate) visibility changes\n3. **bd-159p** — GitLab client get_by_iid methods\n4. **bd-1lja** — CLI flags + SyncOptions extensions\n\n### Core (blocked by foundation)\n5. **bd-wcja** — SyncResult surgical fields (blocked by bd-3sez)\n6. **bd-arka** — SyncRunRecorder surgical lifecycle (blocked by bd-tiux)\n7. **bd-3sez** — surgical.rs core module + tests (blocked by bd-159p, bd-1sc6)\n8. **bd-hs6j** — Scoped doc regeneration (no blockers)\n9. **bd-1elx** — Scoped embedding (no blockers)\n\n### Orchestration (blocked by core)\n10. **bd-kanh** — Per-entity dependent helpers (blocked by bd-3sez)\n11. **bd-1i4i** — Orchestrator function (blocked by all core beads)\n\n### Wiring + Validation\n12. **bd-3bec** — Wire dispatch in run_sync + robot-docs (blocked by bd-1i4i)\n13. **bd-3jqx** — Integration tests (blocked by bd-1i4i + core beads)\n\n## Completion Criteria\n\n- [ ] `lore sync --issue 7 -p group/project` fetches, ingests, and reports for issue 7 only\n- [ ] `lore sync --mr 101 --mr 102 -p proj` handles multiple MRs\n- [ ] `lore sync --preflight-only --issue 7 -p proj` validates without DB writes\n- [ ] Robot mode JSON includes `surgical_mode`, `surgical_iids`, `entity_results`\n- [ ] TOCTOU: already-current entities are skipped (not re-ingested)\n- [ ] Scoped docs + embed: only affected documents are regenerated and embedded\n- [ ] Cancellation at any stage stops gracefully with partial results\n- [ ] `lore robot-docs` documents all surgical flags and response schemas\n- [ ] All existing full-sync tests pass unchanged\n- [ ] Integration test suite (bd-3jqx) passes","status":"open","priority":1,"issue_type":"epic","created_at":"2026-02-17T19:11:34.020453Z","created_by":"tayloreernisse","updated_at":"2026-02-18T19:26:25.746875Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} {"id":"bd-1o1","title":"OBSERV: Add -v/--verbose and --log-format CLI flags","description":"## Background\nUsers and agents need CLI-controlled verbosity without knowing RUST_LOG syntax. The -v flag convention (cargo, curl, ssh) is universally understood. --log-format json enables lore sync 2>&1 | jq workflows without reading log files.\n\n## Approach\nAdd two new global flags to the Cli struct in src/cli/mod.rs (insert after the quiet field at line ~37):\n\n```rust\n/// Increase log verbosity (-v, -vv, -vvv)\n#[arg(short = 'v', long = \"verbose\", action = clap::ArgAction::Count, global = true)]\npub verbose: u8,\n\n/// Log format for stderr output: text (default) or json\n#[arg(long = \"log-format\", global = true, value_parser = [\"text\", \"json\"], default_value = \"text\")]\npub log_format: String,\n```\n\nThe existing Cli struct (src/cli/mod.rs:13-42) has these global flags: config, robot, json, color, quiet. The new flags follow the same pattern.\n\nNote: clap::ArgAction::Count allows -v, -vv, -vvv as a single flag with increasing count (0, 1, 2, 3).\n\n## Acceptance Criteria\n- [ ] lore -v sync parses without error (verbose=1)\n- [ ] lore -vv sync parses (verbose=2)\n- [ ] lore -vvv sync parses (verbose=3)\n- [ ] lore --log-format json sync parses (log_format=\"json\")\n- [ ] lore --log-format text sync parses (default)\n- [ ] lore --log-format xml sync errors (invalid value)\n- [ ] Existing commands unaffected (verbose defaults to 0, log_format to \"text\")\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/cli/mod.rs (modify Cli struct, lines 13-42)\n\n## TDD Loop\nRED: Write test that parses Cli with -v flag and asserts verbose=1\nGREEN: Add the two fields to Cli struct\nVERIFY: cargo test -p lore && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- -v and -q together: both parse fine; conflict resolution happens in subscriber setup (bd-2rr), not here\n- -v flag must be global=true so it works before and after subcommands: lore -v sync AND lore sync -v\n- --log-format is a string, not enum, to keep Cli struct simple","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-04T15:53:55.421339Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:10:22.585947Z","closed_at":"2026-02-04T17:10:22.585905Z","close_reason":"Added -v/--verbose (count) and --log-format (text|json) global CLI flags","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-1o1","depends_on_id":"bd-2nx","type":"parent-child","created_at":"2026-02-04T15:53:55.422103Z","created_by":"tayloreernisse"}]} {"id":"bd-1o4h","title":"OBSERV: Define StageTiming struct in src/core/metrics.rs","description":"## Background\nStageTiming is the materialized view of span timing data. It's the data structure that flows through robot JSON output, sync_runs.metrics_json, and the human-readable timing summary. Defined in a new file because it's genuinely new functionality that doesn't fit existing modules.\n\n## Approach\nCreate src/core/metrics.rs:\n\n```rust\nuse serde::Serialize;\n\nfn is_zero(v: &usize) -> bool { *v == 0 }\n\n#[derive(Debug, Clone, Serialize)]\npub struct StageTiming {\n pub name: String,\n #[serde(skip_serializing_if = \"Option::is_none\")]\n pub project: Option,\n pub elapsed_ms: u64,\n pub items_processed: usize,\n #[serde(skip_serializing_if = \"is_zero\")]\n pub items_skipped: usize,\n #[serde(skip_serializing_if = \"is_zero\")]\n pub errors: usize,\n #[serde(skip_serializing_if = \"Vec::is_empty\")]\n pub sub_stages: Vec,\n}\n```\n\nRegister module in src/core/mod.rs (line ~11, add):\n```rust\npub mod metrics;\n```\n\nThe is_zero helper is a private function used by serde's skip_serializing_if. It must take &usize (reference) and return bool.\n\n## Acceptance Criteria\n- [ ] StageTiming serializes to JSON matching PRD Section 4.6.2 example\n- [ ] items_skipped omitted when 0\n- [ ] errors omitted when 0\n- [ ] sub_stages omitted when empty vec\n- [ ] project omitted when None\n- [ ] name, elapsed_ms, items_processed always present\n- [ ] Struct is Debug + Clone + Serialize\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/metrics.rs (new file)\n- src/core/mod.rs (register module, add line after existing pub mod declarations)\n\n## TDD Loop\nRED:\n - test_stage_timing_serialization: create StageTiming with sub_stages, serialize, assert JSON structure\n - test_stage_timing_zero_fields_omitted: errors=0, items_skipped=0, assert no \"errors\" or \"items_skipped\" keys\n - test_stage_timing_empty_sub_stages: sub_stages=vec![], assert no \"sub_stages\" key\nGREEN: Create metrics.rs with StageTiming struct and is_zero helper\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- is_zero must be a function, not a closure (serde skip_serializing_if requires a function path)\n- Vec::is_empty is a method on Vec, and serde accepts \"Vec::is_empty\" as a path for skip_serializing_if\n- Recursive StageTiming (sub_stages contains StageTiming): serde handles this naturally, no special handling needed","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-04T15:54:31.907234Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:21:40.915842Z","closed_at":"2026-02-04T17:21:40.915794Z","close_reason":"Created src/core/metrics.rs with StageTiming struct, serde skip_serializing_if for zero/empty fields, 5 tests","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-1o4h","depends_on_id":"bd-3er","type":"parent-child","created_at":"2026-02-04T15:54:31.910015Z","created_by":"tayloreernisse"}]} {"id":"bd-1oi7","title":"NOTE-2A: Schema migration for note documents (migration 024)","description":"## Background\nThe documents and dirty_sources tables have CHECK constraints limiting source_type to ('issue', 'merge_request', 'discussion'). Need to add 'note' as valid source_type. SQLite doesn't support ALTER CONSTRAINT, so use the table-rebuild pattern. Uses migration slot 024 (022 = query indexes, 023 = issue_detail_fields already exists).\n\n## Approach\nCreate migrations/024_note_documents.sql:\n\n1. Rebuild dirty_sources: CREATE dirty_sources_new with CHECK adding 'note', INSERT SELECT, DROP old, RENAME.\n2. Rebuild documents (complex — must preserve FTS consistency):\n - Save junction table data (_doc_labels_backup, _doc_paths_backup)\n - Drop FTS triggers (documents_ai, documents_ad, documents_au — defined in migration 008_fts5.sql)\n - Drop junction tables (document_labels, document_paths — defined in migration 007_documents.sql)\n - Create documents_new with updated CHECK adding 'note'\n - INSERT INTO documents_new SELECT * FROM documents (preserves rowids for FTS)\n - Drop documents, rename new\n - Recreate all indexes (idx_documents_project_updated, idx_documents_author, idx_documents_source, idx_documents_content_hash — see migration 007_documents.sql for definitions)\n - Recreate junction tables + restore data from backups\n - Recreate FTS triggers (see migration 008_fts5.sql for trigger SQL)\n - INSERT INTO documents_fts(documents_fts) VALUES('rebuild')\n3. Defense-in-depth triggers:\n - notes_ad_cleanup: AFTER DELETE ON notes WHEN old.is_system = 0 → delete doc + dirty_sources for source_type='note', source_id=old.id\n - notes_au_system_cleanup: AFTER UPDATE OF is_system ON notes WHEN NEW.is_system = 1 AND OLD.is_system = 0 → delete doc + dirty_sources\n4. Drop temp backup tables\n\nRegister as (\"024\", include_str!(\"../../migrations/024_note_documents.sql\")) in MIGRATIONS array in src/core/db.rs. Position AFTER the \"023\" entry.\n\n## Files\n- CREATE: migrations/024_note_documents.sql\n- MODIFY: src/core/db.rs (add (\"024\", include_str!(...)) to MIGRATIONS array, after line 75)\n\n## TDD Anchor\nRED: test_migration_024_allows_note_source_type — INSERT with source_type='note' should succeed in both documents and dirty_sources.\nGREEN: Implement the table rebuild migration.\nVERIFY: cargo test migration_024 -- --nocapture\nTests: test_migration_024_preserves_existing_data, test_migration_024_fts_triggers_intact, test_migration_024_row_counts_preserved, test_migration_024_integrity_checks_pass, test_migration_024_fts_rebuild_consistent, test_migration_024_note_delete_trigger_cleans_document, test_migration_024_note_system_flip_trigger_cleans_document, test_migration_024_system_note_delete_trigger_does_not_fire\n\n## Acceptance Criteria\n- [ ] INSERT source_type='note' succeeds in documents and dirty_sources\n- [ ] All existing data preserved through table rebuild (row counts match before/after)\n- [ ] FTS triggers fire correctly after rebuild (insert a doc, verify FTS entry exists)\n- [ ] documents_fts row count == documents row count after rebuild\n- [ ] PRAGMA foreign_key_check returns no violations\n- [ ] notes_ad_cleanup trigger fires on note deletion (deletes document + dirty_sources)\n- [ ] notes_au_system_cleanup trigger fires when is_system flips 0→1\n- [ ] System note deletion does NOT trigger notes_ad_cleanup (is_system = 1 guard)\n- [ ] All 9 tests pass\n\n## Edge Cases\n- Rowid preservation: INSERT INTO documents_new SELECT * preserves id column = rowid for FTS consistency\n- CRITICAL: Must save/restore junction table data (ON DELETE CASCADE on document_labels/document_paths would delete them when documents table is dropped)\n- The FTS rebuild at end is a safety net for any rowid drift\n- Empty database: migration is a no-op (all SELECTs return 0 rows, tables rebuilt with new CHECK)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T17:01:35.164340Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:24.078558Z","closed_at":"2026-02-12T18:13:24.078512Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"],"dependencies":[{"issue_id":"bd-1oi7","depends_on_id":"bd-18bf","type":"blocks","created_at":"2026-02-12T17:04:47.854894Z","created_by":"tayloreernisse"},{"issue_id":"bd-1oi7","depends_on_id":"bd-22ai","type":"blocks","created_at":"2026-02-12T17:04:49.940178Z","created_by":"tayloreernisse"},{"issue_id":"bd-1oi7","depends_on_id":"bd-ef0u","type":"blocks","created_at":"2026-02-12T17:04:49.301709Z","created_by":"tayloreernisse"}]} @@ -201,7 +201,7 @@ {"id":"bd-3a4k","title":"CLI: list issues status column, filter, and robot fields","description":"## Background\nList issues needs a Status column in the table, status fields in robot JSON, and a --status filter for querying by work item status name. The filter supports multiple values (OR semantics) and case-insensitive matching.\n\n## Approach\nExtend list.rs row types, SQL, table rendering. Add --status Vec to clap args. Build dynamic WHERE clause with COLLATE NOCASE. Wire into both ListFilters constructions in main.rs. Register in autocorrect.\n\n## Files\n- src/cli/commands/list.rs (row types, SQL, table, filter, color helper)\n- src/cli/mod.rs (--status flag on IssuesArgs)\n- src/main.rs (wire statuses into both ListFilters)\n- src/cli/autocorrect.rs (add --status to COMMAND_FLAGS)\n\n## Implementation\n\nIssueListRow + IssueListRowJson: add 5 status fields (all Option)\nFrom<&IssueListRow> for IssueListRowJson: clone all 5 fields\n\nquery_issues SELECT: add i.status_name, i.status_category, i.status_color, i.status_icon_name, i.status_synced_at after existing columns\n Existing SELECT has 12 columns (indices 0-11). New columns: indices 12-16.\n Row mapping: status_name: row.get(12)?, ..., status_synced_at: row.get(16)?\n\nListFilters: add pub statuses: &'a [String]\n\nWHERE clause builder (after has_due_date block):\n if statuses.len() == 1: \"i.status_name = ? COLLATE NOCASE\" + push param\n if statuses.len() > 1: \"i.status_name IN (?, ?, ...) COLLATE NOCASE\" + push all params\n\nTable: add \"Status\" column header (bold) between State and Assignee\n Row: match &issue.status_name -> Some: colored_cell_hex(status, color), None: Cell::new(\"\")\n\nNew helper:\n fn colored_cell_hex(content, hex: Option<&str>) -> Cell\n If no hex or colors disabled: Cell::new(content)\n Parse 6-char hex, use Cell::new(content).fg(Color::Rgb { r, g, b })\n\nIn src/cli/mod.rs IssuesArgs:\n #[arg(long, help_heading = \"Filters\")]\n pub status: Vec,\n\nIn src/main.rs handle_issues (~line 695):\n ListFilters { ..., statuses: &args.status }\nIn legacy List handler (~line 2421):\n ListFilters { ..., statuses: &[] }\n\nIn src/cli/autocorrect.rs COMMAND_FLAGS \"issues\" entry:\n Add \"--status\" between existing flags\n\n## Acceptance Criteria\n- [ ] Status column appears in table between State and Assignee\n- [ ] NULL status -> empty cell\n- [ ] Status colored by hex in human mode\n- [ ] --status \"In progress\" filters correctly\n- [ ] --status \"in progress\" matches \"In progress\" (COLLATE NOCASE)\n- [ ] --status \"To do\" --status \"In progress\" -> OR semantics (both returned)\n- [ ] Robot: status_name, status_category in each issue JSON\n- [ ] --fields supports status_name, status_category, status_color, status_icon_name, status_synced_at\n- [ ] --fields minimal does NOT include status fields\n- [ ] Autocorrect registry test passes (--status registered)\n- [ ] cargo check --all-targets passes\n\n## TDD Loop\nRED: test_list_filter_by_status, test_list_filter_by_status_case_insensitive, test_list_filter_by_multiple_statuses\nGREEN: Implement all changes across 4 files\nVERIFY: cargo test list_filter && cargo test registry_covers\n\n## Edge Cases\n- COLLATE NOCASE is ASCII-only but sufficient (all system statuses are ASCII)\n- Single-value uses = for simplicity; multi-value uses IN with dynamic placeholders\n- --status combined with other filters (--state, --label) -> AND logic\n- autocorrect registry_covers_command_flags test will FAIL if --status not registered\n- Legacy List command path also constructs ListFilters — needs statuses: &[]\n- Column index offset: new columns start at 12 (0-indexed)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-11T06:42:26.438Z","created_by":"tayloreernisse","updated_at":"2026-02-11T07:21:33.421297Z","closed_at":"2026-02-11T07:21:33.421247Z","close_reason":"Implemented by agent swarm — all quality gates pass (595 tests, 0 failures)","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3a4k","depends_on_id":"bd-2y79","type":"parent-child","created_at":"2026-02-11T06:42:26.440928Z","created_by":"tayloreernisse"},{"issue_id":"bd-3a4k","depends_on_id":"bd-3dum","type":"blocks","created_at":"2026-02-11T06:42:45.236067Z","created_by":"tayloreernisse"}]} {"id":"bd-3ae","title":"Epic: CP2 Gate A - MRs Only","description":"## Background\nGate A validates core MR ingestion works before adding complexity. Proves the cursor-based sync, pagination, and basic CLI work. This is the foundation - if Gate A fails, nothing else matters.\n\n## Acceptance Criteria (Pass/Fail)\n- [ ] `gi ingest --type=merge_requests` completes without error\n- [ ] `SELECT COUNT(*) FROM merge_requests` > 0\n- [ ] `gi list mrs --limit=5` shows 5 MRs with iid, title, state, author\n- [ ] `gi count mrs` shows total count matching DB query\n- [ ] MR with `state=locked` can be stored (if exists in test data)\n- [ ] Draft MR shows `draft=1` in DB and `[DRAFT]` in list output\n- [ ] `work_in_progress=true` MR shows `draft=1` (fallback works)\n- [ ] `head_sha` populated for MRs with commits\n- [ ] `references_short` and `references_full` populated\n- [ ] Re-run ingest shows \"0 new MRs\" or minimal refetch (cursor working)\n- [ ] Cursor saved at page boundary, not item boundary\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate A: MRs Only ===\"\n\n# 1. Clear any existing MR data for clean test\necho \"Step 1: Reset MR cursor for clean test...\"\nsqlite3 \"$DB_PATH\" \"DELETE FROM sync_cursors WHERE resource_type = 'merge_requests';\"\n\n# 2. Run MR ingestion\necho \"Step 2: Ingest MRs...\"\ngi ingest --type=merge_requests\n\n# 3. Verify MRs exist\necho \"Step 3: Verify MR count...\"\nMR_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests;\")\necho \" MR count: $MR_COUNT\"\n[ \"$MR_COUNT\" -gt 0 ] || { echo \"FAIL: No MRs ingested\"; exit 1; }\n\n# 4. Verify list command\necho \"Step 4: Test list command...\"\ngi list mrs --limit=5\n\n# 5. Verify count command\necho \"Step 5: Test count command...\"\ngi count mrs\n\n# 6. Verify draft handling\necho \"Step 6: Check draft MRs...\"\nDRAFT_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE draft = 1;\")\necho \" Draft MR count: $DRAFT_COUNT\"\n\n# 7. Verify head_sha population\necho \"Step 7: Check head_sha...\"\nSHA_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE head_sha IS NOT NULL;\")\necho \" MRs with head_sha: $SHA_COUNT\"\n\n# 8. Verify references\necho \"Step 8: Check references...\"\nREF_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE references_short IS NOT NULL;\")\necho \" MRs with references: $REF_COUNT\"\n\n# 9. Verify cursor saved\necho \"Step 9: Check cursor...\"\nCURSOR=$(sqlite3 \"$DB_PATH\" \"SELECT updated_at, gitlab_id FROM sync_cursors WHERE resource_type = 'merge_requests';\")\necho \" Cursor: $CURSOR\"\n[ -n \"$CURSOR\" ] || { echo \"FAIL: Cursor not saved\"; exit 1; }\n\n# 10. Re-run and verify minimal refetch\necho \"Step 10: Re-run ingest (should be minimal)...\"\ngi ingest --type=merge_requests\n# Output should show minimal or zero new MRs\n\necho \"\"\necho \"=== Gate A: PASSED ===\"\n```\n\n## Test Commands (Quick Verification)\n```bash\n# Run these in order:\ngi ingest --type=merge_requests\ngi list mrs --limit=10\ngi count mrs\n\n# Verify in DB:\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT \n COUNT(*) as total,\n SUM(CASE WHEN draft = 1 THEN 1 ELSE 0 END) as drafts,\n SUM(CASE WHEN head_sha IS NOT NULL THEN 1 ELSE 0 END) as with_sha,\n SUM(CASE WHEN references_short IS NOT NULL THEN 1 ELSE 0 END) as with_refs\n FROM merge_requests;\n\"\n\n# Re-run (should be no-op):\ngi ingest --type=merge_requests\n```\n\n## Dependencies\nThis gate requires these beads to be complete:\n- bd-3ir (Database migration)\n- bd-5ta (GitLab MR types)\n- bd-34o (MR transformer)\n- bd-iba (GitLab client pagination)\n- bd-ser (MR ingestion module)\n\n## Edge Cases\n- `locked` state is transitional (merge in progress); may not exist in test data\n- Some older GitLab instances may not return `head_sha` for all MRs\n- `work_in_progress` is deprecated but should still work as fallback\n- Very large projects (10k+ MRs) may take significant time on first sync","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:00.966522Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.057298Z","closed_at":"2026-01-27T00:48:21.057225Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3ae","depends_on_id":"bd-iba","type":"blocks","created_at":"2026-01-26T22:08:55.576626Z","created_by":"tayloreernisse"},{"issue_id":"bd-3ae","depends_on_id":"bd-ser","type":"blocks","created_at":"2026-01-26T22:08:55.446814Z","created_by":"tayloreernisse"}]} {"id":"bd-3as","title":"Implement timeline event collection and chronological interleaving","description":"## Background\n\nThe event collection phase is steps 4-5 of the timeline pipeline (spec Section 3.2). It takes seed + expanded entity sets and collects all their events from resource event tables, then interleaves chronologically.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 steps 4-5, Section 3.3 (Event Model).\n\n## Codebase Context\n\n- resource_state_events: columns include state, actor_username (not actor_gitlab_id for display), created_at, issue_id, merge_request_id, source_merge_request_iid, source_commit\n- resource_label_events: columns include action ('add'|'remove'), label_name (NULLABLE since migration 012), actor_username, created_at\n- resource_milestone_events: columns include action ('add'|'remove'), milestone_title (NULLABLE since migration 012), actor_username, created_at\n- issues table: created_at, author_username, title, web_url\n- merge_requests table: created_at, author_username, title, web_url, merged_at, updated_at\n- All timestamps are ms epoch UTC (stored as INTEGER)\n\n## Approach\n\nCreate `src/core/timeline_collect.rs`:\n\n```rust\nuse rusqlite::Connection;\nuse crate::core::timeline::{TimelineEvent, TimelineEventType, EntityRef, ExpandedEntityRef};\n\npub fn collect_events(\n conn: &Connection,\n seed_entities: &[EntityRef],\n expanded_entities: &[ExpandedEntityRef],\n evidence_notes: &[TimelineEvent], // from seed phase\n since_ms: Option, // --since filter\n limit: usize, // -n flag (default 100)\n) -> Result> { ... }\n```\n\n### Event Collection Per Entity\n\nFor each entity (seed + expanded), collect:\n\n1. **Creation event** (`Created`):\n ```sql\n -- Issues:\n SELECT created_at, author_username, title, web_url FROM issues WHERE id = ?1\n -- MRs:\n SELECT created_at, author_username, title, web_url FROM merge_requests WHERE id = ?1\n ```\n\n2. **State changes** (`StateChanged { state }`):\n ```sql\n SELECT state, actor_username, created_at FROM resource_state_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2) -- since filter\n ORDER BY created_at ASC\n ```\n NOTE: For MRs, a state='merged' event also produces a separate Merged variant.\n\n3. **Label changes** (`LabelAdded`/`LabelRemoved`):\n ```sql\n SELECT action, label_name, actor_username, created_at FROM resource_label_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2)\n ORDER BY created_at ASC\n ```\n Handle NULL label_name (deleted label): use \"[deleted label]\" as fallback.\n\n4. **Milestone changes** (`MilestoneSet`/`MilestoneRemoved`):\n ```sql\n SELECT action, milestone_title, actor_username, created_at FROM resource_milestone_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2)\n ORDER BY created_at ASC\n ```\n Handle NULL milestone_title: use \"[deleted milestone]\" as fallback.\n\n5. **Merge event** (Merged, MR only):\n Derive from merge_requests.merged_at (preferred) OR resource_state_events WHERE state='merged'. Skip StateChanged when state='merged' — emit only the Merged variant.\n\n### Chronological Interleave\n\n```rust\nevents.sort(); // Uses Ord impl from bd-20e\nif let Some(since) = since_ms {\n events.retain(|e| e.timestamp >= since);\n}\nevents.truncate(limit);\n```\n\nRegister in `src/core/mod.rs`: `pub mod timeline_collect;`\n\n## Acceptance Criteria\n\n- [ ] Collects Created, StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged, NoteEvidence events\n- [ ] Merged events deduplicated from StateChanged{merged} — emit only Merged variant\n- [ ] NULL label_name/milestone_title handled with fallback text\n- [ ] --since filter applied to all event types\n- [ ] Events sorted chronologically with stable tiebreak\n- [ ] Limit applied AFTER sorting\n- [ ] Evidence notes from seed phase included\n- [ ] is_seed correctly set based on entity source\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_collect.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_collect;`)\n\n## TDD Loop\n\nRED:\n- `test_collect_creation_event` - entity produces Created event\n- `test_collect_state_events` - state changes produce StateChanged events\n- `test_collect_merged_dedup` - state='merged' produces Merged not StateChanged\n- `test_collect_null_label_fallback` - NULL label_name uses fallback text\n- `test_collect_since_filter` - old events excluded\n- `test_collect_chronological_sort` - mixed entity events interleave correctly\n- `test_collect_respects_limit`\n\nTests need in-memory DB with migrations 001-014 applied.\n\nGREEN: Implement SQL queries and event assembly.\n\nVERIFY: `cargo test --lib -- timeline_collect`\n\n## Edge Cases\n\n- MR with merged_at=NULL and no state='merged' event: no Merged event emitted\n- Entity with 0 events in resource tables: only Created event returned\n- NULL actor_username: actor field is None\n- Timestamps at exact --since boundary: use >= (inclusive)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.703942Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:53:01.160429Z","closed_at":"2026-02-05T21:53:01.160380Z","close_reason":"Completed: Created src/core/timeline_collect.rs with event collection for Created, StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged, NoteEvidence. Merged dedup (state=merged skipped in favor of Merged variant). NULL label/milestone fallbacks. Since filter, chronological sort, limit. 10 tests pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-3as","depends_on_id":"bd-1ep","type":"blocks","created_at":"2026-02-02T21:33:37.618171Z","created_by":"tayloreernisse"},{"issue_id":"bd-3as","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.705605Z","created_by":"tayloreernisse"},{"issue_id":"bd-3as","depends_on_id":"bd-ypa","type":"blocks","created_at":"2026-02-02T21:33:37.575585Z","created_by":"tayloreernisse"}]} -{"id":"bd-3bec","title":"Wire surgical dispatch in run_sync and update robot-docs","description":"## Background\n\nThe existing `run_sync` function (lines 63-360 of `src/cli/commands/sync.rs`) handles the normal full-sync pipeline. Once `run_sync_surgical` (bd-1i4i) is implemented, this bead wires the dispatch: when `SyncOptions` contains issue or MR IIDs, route to the surgical path instead of the normal path. This also requires updating `handle_sync_cmd` (line 2120 of `src/main.rs`) to pass through the new CLI fields (bd-1lja), and updating the robot-docs schema to document the new surgical response fields.\n\n## Approach\n\nThree changes:\n\n**1. Dispatch in `run_sync` (src/cli/commands/sync.rs)**\n\nAdd an early check at the top of `run_sync` (after line 68):\n\n```rust\npub async fn run_sync(\n config: &Config,\n options: SyncOptions,\n run_id: Option<&str>,\n signal: &ShutdownSignal,\n) -> Result {\n // Surgical dispatch: if any IIDs specified, route to surgical pipeline\n if !options.issues.is_empty() || !options.merge_requests.is_empty() {\n return run_sync_surgical(config, options, run_id, signal).await;\n }\n\n // ... existing normal sync pipeline unchanged ...\n}\n```\n\n**2. Update `handle_sync_cmd` (src/main.rs line 2120)**\n\nPass new fields from `SyncArgs` into `SyncOptions`:\n\n```rust\nlet options = SyncOptions {\n full: args.full && !args.no_full,\n force: args.force && !args.no_force,\n no_embed: args.no_embed,\n no_docs: args.no_docs,\n no_events: args.no_events,\n robot_mode,\n dry_run,\n // New surgical fields (from bd-1lja)\n issues: args.issue.clone(),\n merge_requests: args.mr.clone(),\n project: args.project.clone(),\n preflight_only: args.preflight_only,\n};\n```\n\nAlso: when surgical mode is detected (issues/MRs non-empty), skip the normal SyncRunRecorder setup in `handle_sync_cmd` since `run_sync_surgical` manages its own recorder.\n\n**3. Update robot-docs (src/main.rs handle_robot_docs)**\n\nAdd documentation for the surgical sync response format. The robot-docs output should include:\n- New CLI flags: `--issue`, `--mr`, `-p`/`--project`, `--preflight-only`\n- Surgical response fields: `surgical_mode`, `surgical_iids`, `entity_results`, `preflight_only`\n- `EntitySyncResult` schema: `entity_type`, `iid`, `outcome`, `error`, `toctou_reason`\n- Exit codes for surgical-specific errors\n\n## Acceptance Criteria\n\n1. `lore sync --issue 7 -p group/project` dispatches to `run_sync_surgical`, not normal sync\n2. `lore sync` (no IIDs) follows the existing normal pipeline unchanged\n3. `handle_sync_cmd` passes `issues`, `merge_requests`, `project`, `preflight_only` from args to options\n4. `lore robot-docs` output includes surgical sync documentation\n5. All existing sync tests pass without modification\n6. Robot mode JSON output for surgical sync matches documented schema\n\n## Files\n\n- `src/cli/commands/sync.rs` — add dispatch check at top of `run_sync`, add `use super::sync_surgical::run_sync_surgical`\n- `src/main.rs` — update `handle_sync_cmd` to pass new fields, update robot-docs text\n- `src/cli/commands/mod.rs` — ensure `sync_surgical` module is public (may already be done by bd-1i4i)\n\n## TDD Anchor\n\nTests in `src/cli/commands/sync.rs` or a companion test file:\n\n```rust\n#[cfg(test)]\nmod dispatch_tests {\n use super::*;\n\n #[test]\n fn sync_options_with_issues_is_surgical() {\n let options = SyncOptions {\n issues: vec![7],\n ..SyncOptions::default()\n };\n assert!(!options.issues.is_empty() || !options.merge_requests.is_empty());\n }\n\n #[test]\n fn sync_options_without_iids_is_normal() {\n let options = SyncOptions::default();\n assert!(options.issues.is_empty() && options.merge_requests.is_empty());\n }\n\n #[test]\n fn sync_options_with_mrs_is_surgical() {\n let options = SyncOptions {\n merge_requests: vec![10, 20],\n ..SyncOptions::default()\n };\n assert!(!options.issues.is_empty() || !options.merge_requests.is_empty());\n }\n\n #[tokio::test]\n async fn dispatch_routes_to_surgical_when_issues_present() {\n // Integration-level test: verify run_sync with IIDs calls surgical path.\n // This test uses wiremock to mock the surgical path's GitLab calls.\n // The key assertion: when options.issues is non-empty, the function\n // does NOT attempt the normal ingest flow (no project cursor queries).\n let server = wiremock::MockServer::start().await;\n wiremock::Mock::given(wiremock::matchers::method(\"GET\"))\n .and(wiremock::matchers::path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(wiremock::ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let mut config = Config::default();\n config.gitlab.url = server.uri();\n config.gitlab.token = \"test-token\".to_string();\n let options = SyncOptions {\n issues: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync(&config, options, Some(\"dispatch-test\"), &signal).await;\n\n // Should succeed via surgical path (or at least not panic from normal path)\n assert!(result.is_ok());\n let r = result.unwrap();\n assert_eq!(r.surgical_mode, Some(true));\n }\n\n #[test]\n fn robot_docs_includes_surgical_sync() {\n // Verify the robot-docs string contains surgical sync documentation\n // This tests the static text, not runtime behavior\n let docs = include_str!(\"../../../src/main.rs\");\n // The robot-docs handler should mention surgical sync\n // (Actual assertion depends on how robot-docs are generated)\n }\n}\n```\n\n## Edge Cases\n\n- **Dry-run + surgical**: `handle_sync_cmd` currently short-circuits dry-run before SyncRunRecorder setup (line 2149). Surgical dry-run should also short-circuit, but preflight-only is the surgical equivalent. Clarify: `--dry-run --issue 7` should be treated as `--preflight-only --issue 7`.\n- **Normal sync recorder vs surgical recorder**: `handle_sync_cmd` creates a `SyncRunRecorder` for normal sync (line 2159). When dispatching to surgical, skip this since `run_sync_surgical` creates its own. Use the `options.issues.is_empty() && options.merge_requests.is_empty()` check to decide.\n- **Robot-docs backward compatibility**: New fields are additive. Existing robot-docs consumers that ignore unknown fields are unaffected.\n- **No project specified with IIDs**: If `--issue 7` is passed without `-p project`, the dispatch should fail with a clear usage error (validation in bd-1lja).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-1i4i (the `run_sync_surgical` function to call), bd-1lja (SyncOptions extensions with `issues`, `merge_requests`, `project`, `preflight_only` fields), bd-wcja (SyncResult surgical fields for assertion)\n- **No downstream dependents** — this is the final wiring bead for the main code path.\n- Must NOT modify the normal sync pipeline behavior. The dispatch is a pure conditional branch at function entry.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-17T19:18:10.648172Z","created_by":"tayloreernisse","updated_at":"2026-02-17T20:03:44.531713Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} +{"id":"bd-3bec","title":"Wire surgical dispatch in run_sync and update robot-docs","description":"## Background\n\nThe existing `run_sync` function (lines 63-360 of `src/cli/commands/sync.rs`) handles the normal full-sync pipeline. Once `run_sync_surgical` (bd-1i4i) is implemented, this bead wires the dispatch: when `SyncOptions` contains issue or MR IIDs, route to the surgical path instead of the normal path. This also requires updating `handle_sync_cmd` (line 2120 of `src/main.rs`) to pass through the new CLI fields (bd-1lja), and updating the robot-docs schema to document the new surgical response fields.\n\n## Approach\n\nThree changes:\n\n**1. Dispatch in `run_sync` (src/cli/commands/sync.rs)**\n\nAdd an early check at the top of `run_sync` (after line 68):\n\n```rust\npub async fn run_sync(\n config: &Config,\n options: SyncOptions,\n run_id: Option<&str>,\n signal: &ShutdownSignal,\n) -> Result {\n // Surgical dispatch: if any IIDs specified, route to surgical pipeline\n if options.is_surgical() {\n return run_sync_surgical(config, options, run_id, signal).await;\n }\n\n // ... existing normal sync pipeline unchanged ...\n}\n```\n\n**2. Update `handle_sync_cmd` (src/main.rs line 2120)**\n\nPass new fields from `SyncArgs` into `SyncOptions`:\n\n```rust\nlet options = SyncOptions {\n full: args.full && !args.no_full,\n force: args.force && !args.no_force,\n no_embed: args.no_embed,\n no_docs: args.no_docs,\n no_events: args.no_events,\n robot_mode,\n dry_run,\n // New surgical fields (from bd-1lja)\n issue_iids: args.issue.clone(),\n mr_iids: args.mr.clone(),\n project: args.project.clone(),\n preflight_only: args.preflight_only,\n};\n```\n\nAlso: when surgical mode is detected (issues/MRs non-empty), skip the normal SyncRunRecorder setup in `handle_sync_cmd` since `run_sync_surgical` manages its own recorder.\n\n**3. Update robot-docs (src/main.rs handle_robot_docs)**\n\nAdd documentation for the surgical sync response format. The robot-docs output should include:\n- New CLI flags: `--issue`, `--mr`, `-p`/`--project`, `--preflight-only`\n- Surgical response fields: `surgical_mode`, `surgical_iids`, `entity_results`, `preflight_only`\n- `EntitySyncResult` schema: `entity_type`, `iid`, `outcome`, `error`, `toctou_reason`\n- Exit codes for surgical-specific errors\n\n## Acceptance Criteria\n\n1. `lore sync --issue 7 -p group/project` dispatches to `run_sync_surgical`, not normal sync\n2. `lore sync` (no IIDs) follows the existing normal pipeline unchanged\n3. `handle_sync_cmd` passes `issues`, `merge_requests`, `project`, `preflight_only` from args to options\n4. `lore robot-docs` output includes surgical sync documentation\n5. All existing sync tests pass without modification\n6. Robot mode JSON output for surgical sync matches documented schema\n\n## Files\n\n- `src/cli/commands/sync.rs` — add dispatch check at top of `run_sync`, add `use super::sync_surgical::run_sync_surgical`\n- `src/main.rs` — update `handle_sync_cmd` to pass new fields, update robot-docs text\n- `src/cli/commands/mod.rs` — ensure `sync_surgical` module is public (may already be done by bd-1i4i)\n\n## TDD Anchor\n\nTests in `src/cli/commands/sync.rs` or a companion test file:\n\n```rust\n#[cfg(test)]\nmod dispatch_tests {\n use super::*;\n\n #[test]\n fn sync_options_with_issues_is_surgical() {\n let options = SyncOptions {\n issue_iids: vec![7],\n ..SyncOptions::default()\n };\n assert!(options.is_surgical());\n }\n\n #[test]\n fn sync_options_without_iids_is_normal() {\n let options = SyncOptions::default();\n assert!(!options.is_surgical());\n }\n\n #[test]\n fn sync_options_with_mrs_is_surgical() {\n let options = SyncOptions {\n mr_iids: vec![10, 20],\n ..SyncOptions::default()\n };\n assert!(options.is_surgical());\n }\n\n #[tokio::test]\n async fn dispatch_routes_to_surgical_when_issues_present() {\n // Integration-level test: verify run_sync with IIDs calls surgical path.\n // This test uses wiremock to mock the surgical path's GitLab calls.\n // The key assertion: when options.issue_iids is non-empty, the function\n // does NOT attempt the normal ingest flow (no project cursor queries).\n let server = wiremock::MockServer::start().await;\n wiremock::Mock::given(wiremock::matchers::method(\"GET\"))\n .and(wiremock::matchers::path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(wiremock::ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([{\n \"id\": 100, \"iid\": 7, \"project_id\": 1, \"title\": \"Test\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let mut config = Config::default();\n config.gitlab.url = server.uri();\n config.gitlab.token = \"test-token\".to_string();\n let options = SyncOptions {\n issue_iids: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n let result = run_sync(&config, options, Some(\"dispatch-test\"), &signal).await;\n\n // Should succeed via surgical path (or at least not panic from normal path)\n assert!(result.is_ok());\n let r = result.unwrap();\n assert_eq!(r.surgical_mode, Some(true));\n }\n\n #[test]\n fn robot_docs_includes_surgical_sync() {\n // Verify the robot-docs string contains surgical sync documentation\n // This tests the static text, not runtime behavior\n let docs = include_str!(\"../../../src/main.rs\");\n // The robot-docs handler should mention surgical sync\n // (Actual assertion depends on how robot-docs are generated)\n }\n}\n```\n\n## Edge Cases\n\n- **Dry-run + surgical**: `handle_sync_cmd` currently short-circuits dry-run before SyncRunRecorder setup (line 2149). Surgical dry-run should also short-circuit, but preflight-only is the surgical equivalent. Clarify: `--dry-run --issue 7` should be treated as `--preflight-only --issue 7`.\n- **Normal sync recorder vs surgical recorder**: `handle_sync_cmd` creates a `SyncRunRecorder` for normal sync (line 2159). When dispatching to surgical, skip this since `run_sync_surgical` creates its own. Use `!options.is_surgical()` to decide.\n- **Robot-docs backward compatibility**: New fields are additive. Existing robot-docs consumers that ignore unknown fields are unaffected.\n- **No project specified with IIDs**: If `--issue 7` is passed without `-p project`, the dispatch should fail with a clear usage error (validation in bd-1lja).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-1i4i (the `run_sync_surgical` function to call), bd-1lja (SyncOptions extensions with `issues`, `merge_requests`, `project`, `preflight_only` fields), bd-wcja (SyncResult surgical fields for assertion)\n- **No downstream dependents** — this is the final wiring bead for the main code path.\n- Must NOT modify the normal sync pipeline behavior. The dispatch is a pure conditional branch at function entry.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-17T19:18:10.648172Z","created_by":"tayloreernisse","updated_at":"2026-02-18T20:36:35.149830Z","closed_at":"2026-02-18T20:36:35.149779Z","close_reason":"Surgical dispatch wired: run_sync routes to run_sync_surgical when is_surgical(), handle_sync_cmd skips recorder for surgical mode, dry-run+surgical→preflight-only, removed wrong embed validation, robot-docs updated with surgical schema","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} {"id":"bd-3bo","title":"[CP1] gi count issues/discussions/notes commands","description":"Count entities in the database.\n\nCommands:\n- gi count issues → 'Issues: N'\n- gi count discussions --type=issue → 'Issue Discussions: N'\n- gi count notes --type=issue → 'Issue Notes: N (excluding M system)'\n\nFiles: src/cli/commands/count.ts\nDone when: Counts match expected values from GitLab","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:16.190875Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.156293Z","deleted_at":"2026-01-25T15:21:35.156290Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-3bpk","title":"NOTE-0A: Upsert/sweep for issue discussion notes","description":"## Background\nIssue discussion note ingestion uses a delete/reinsert pattern (DELETE FROM notes WHERE discussion_id = ? at line 132-135 of src/ingestion/discussions.rs then re-insert). This makes notes.id unstable across syncs. MR discussion notes already use upsert (ON CONFLICT(gitlab_id) DO UPDATE at line 470-536 of src/ingestion/mr_discussions.rs) producing stable IDs. Phase 2 depends on stable notes.id as source_id for note documents.\n\n## Approach\nRefactor src/ingestion/discussions.rs to match the MR pattern in src/ingestion/mr_discussions.rs:\n\n1. Create shared NoteUpsertOutcome struct (in src/ingestion/discussions.rs, also used by mr_discussions.rs):\n pub struct NoteUpsertOutcome { pub local_note_id: i64, pub changed_semantics: bool }\n\n2. Replace insert_note() (line 201-233) with upsert_note_for_issue(). Current signature is:\n fn insert_note(conn: &Connection, discussion_id: i64, note: &NormalizedNote, payload_id: Option) -> Result<()>\n New signature:\n fn upsert_note_for_issue(conn: &Connection, discussion_id: i64, note: &NormalizedNote, last_seen_at: i64, payload_id: Option) -> Result\n\n Use ON CONFLICT(gitlab_id) DO UPDATE SET body, note_type, updated_at, last_seen_at, resolvable, resolved, resolved_by, resolved_at, position_old_path, position_new_path, position_old_line, position_new_line, position_type, position_line_range_start, position_line_range_end, position_base_sha, position_start_sha, position_head_sha\n\n IMPORTANT: The current issue insert_note() only populates: gitlab_id, discussion_id, project_id, note_type, is_system, author_username, body, created_at, updated_at, last_seen_at, position (integer array order), resolvable, resolved, resolved_by, resolved_at, raw_payload_id. It does NOT populate the decomposed position columns (position_new_path, etc.). The MR upsert_note() at line 470 DOES populate all decomposed position columns. Your upsert must include ALL columns from the MR pattern. The NormalizedNote struct (from src/gitlab/transformers.rs) has all position fields.\n\n3. Change detection via pre-read: SELECT existing note before upsert, compare semantic fields (body, note_type, resolved, resolved_by, positions). Exclude updated_at/last_seen_at from semantic comparison. Use IS NOT for NULL-safe comparison.\n\n4. Add sweep_stale_issue_notes(conn, discussion_id, last_seen_at) — DELETE FROM notes WHERE discussion_id = ? AND last_seen_at < ?\n\n5. Replace the delete-reinsert loop (lines 132-139) with:\n for note in notes { let outcome = upsert_note_for_issue(&tx, local_discussion_id, ¬e, last_seen_at, None)?; }\n sweep_stale_issue_notes(&tx, local_discussion_id, last_seen_at)?;\n\n6. Update upsert_note() in mr_discussions.rs (line 470) to return NoteUpsertOutcome with same semantic change detection. Current signature returns Result<()>.\n\nReference files:\n- src/ingestion/mr_discussions.rs: upsert_note() line 470, sweep_stale_notes() line 551\n- src/ingestion/discussions.rs: insert_note() line 201, delete pattern line 132-135\n- src/gitlab/transformers.rs: NormalizedNote struct definition\n\n## Files\n- MODIFY: src/ingestion/discussions.rs (refactor insert_note -> upsert + sweep, lines 132-233)\n- MODIFY: src/ingestion/mr_discussions.rs (return NoteUpsertOutcome from upsert_note at line 470)\n\n## TDD Anchor\nRED: test_issue_note_upsert_stable_id — insert 2 notes, record IDs, re-sync same gitlab_ids, assert IDs unchanged.\nGREEN: Implement upsert_note_for_issue with ON CONFLICT.\nVERIFY: cargo test upsert_stable_id -- --nocapture\nTests: test_issue_note_upsert_detects_body_change, test_issue_note_upsert_unchanged_returns_false, test_issue_note_upsert_updated_at_only_does_not_mark_semantic_change, test_issue_note_sweep_removes_stale, test_issue_note_upsert_returns_local_id\n\n## Acceptance Criteria\n- [ ] upsert_note_for_issue() uses ON CONFLICT(gitlab_id) DO UPDATE\n- [ ] Local note IDs stable across re-syncs of identical data\n- [ ] changed_semantics = true only for body/note_type/resolved/position changes\n- [ ] changed_semantics = false for updated_at-only changes\n- [ ] sweep removes notes with stale last_seen_at\n- [ ] MR upsert_note() returns NoteUpsertOutcome\n- [ ] Issue upsert populates ALL position columns (matching MR pattern)\n- [ ] All 6 tests pass, clippy clean\n\n## Edge Cases\n- NULL body: IS NOT comparison handles NULLs correctly\n- UNIQUE(gitlab_id) already exists on notes table (migration 002)\n- last_seen_at prevents stale-sweep of notes currently being ingested\n- Issue notes currently don't populate position_new_path etc. — the new upsert must extract these from NormalizedNote (check that the transformer populates them for issue DiffNotes)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T16:59:14.783336Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:24.151831Z","closed_at":"2026-02-12T18:13:24.151781Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"],"dependencies":[{"issue_id":"bd-3bpk","depends_on_id":"bd-18bf","type":"blocks","created_at":"2026-02-12T17:04:47.776788Z","created_by":"tayloreernisse"},{"issue_id":"bd-3bpk","depends_on_id":"bd-2b28","type":"blocks","created_at":"2026-02-12T17:04:47.932914Z","created_by":"tayloreernisse"},{"issue_id":"bd-3bpk","depends_on_id":"bd-2ezb","type":"blocks","created_at":"2026-02-12T17:04:49.450541Z","created_by":"tayloreernisse"},{"issue_id":"bd-3bpk","depends_on_id":"bd-jbfw","type":"blocks","created_at":"2026-02-12T17:04:48.008740Z","created_by":"tayloreernisse"}]} {"id":"bd-3cjp","title":"NOTE-2I: Batch parent metadata cache for note regeneration","description":"## Background\nextract_note_document() (from NOTE-2C) fetches parent entity metadata per note via SQL queries. During initial backfill of ~8K notes, this creates N+1 amplification — 50 notes on same MR = 50 identical parent lookups. This is a performance optimization for batch regeneration only.\n\n## Approach\n1. Add ParentMetadataCache struct in src/documents/extractor.rs:\n pub struct ParentMetadataCache {\n cache: HashMap<(String, i64), ParentMetadata>,\n }\n Key: (noteable_type: String, parent_local_id: i64)\n ParentMetadata struct: { iid: i64, title: String, web_url: String, labels: Vec, project_path: String }\n\n Methods:\n - pub fn new() -> Self\n - pub fn get_or_fetch(&mut self, conn: &Connection, noteable_type: &str, parent_id: i64) -> Result>\n get_or_fetch uses HashMap entry API: on miss, fetches from DB (same queries as extract_note_document), caches, returns ref.\n\n2. Add pub fn extract_note_document_cached(conn: &Connection, note_id: i64, cache: &mut ParentMetadataCache) -> Result>:\n Same logic as extract_note_document but calls cache.get_or_fetch() instead of inline parent queries. The uncached version remains for single-note use.\n\n3. Update batch regeneration loop in src/documents/regenerator.rs. The main regeneration loop is in regenerate_dirty_documents() (top of file, ~line 20). It processes dirty entries one at a time via regenerate_one() (line 86). For batch cache to work:\n - Create ParentMetadataCache before the loop\n - In the SourceType::Note arm of regenerate_one, pass the cache through\n - This requires either making regenerate_one() take an optional cache parameter, or restructuring to handle Note specially in the loop body.\n\n Cleanest approach: Add cache: &mut Option parameter to regenerate_one(). Initialize as Some(ParentMetadataCache::new()) before the loop. Only SourceType::Note uses it. Other types ignore it.\n\n Cache is created fresh per regenerate_dirty_documents() call — no cross-invocation persistence.\n\n## Files\n- MODIFY: src/documents/extractor.rs (add ParentMetadataCache struct + extract_note_document_cached)\n- MODIFY: src/documents/regenerator.rs (add cache parameter to regenerate_one, use in batch loop)\n- MODIFY: src/documents/mod.rs (export ParentMetadataCache if needed externally)\n\n## TDD Anchor\nRED: test_note_regeneration_batch_uses_cache — insert project, issue, 10 notes on same issue, mark all dirty, regenerate all, assert all 10 documents created correctly.\nGREEN: Implement ParentMetadataCache and extract_note_document_cached.\nVERIFY: cargo test note_regeneration_batch -- --nocapture\nTests: test_note_regeneration_cache_consistent_with_direct_extraction (cached output == uncached output), test_note_regeneration_cache_invalidates_across_parents (notes from different parents get correct metadata)\n\n## Acceptance Criteria\n- [ ] ParentMetadataCache reduces DB queries during batch regeneration (10 notes on 1 parent = 1 parent fetch, not 10)\n- [ ] Cached extraction produces identical DocumentData output to uncached\n- [ ] Cache keyed per (noteable_type, parent_id) — no cross-parent leakage\n- [ ] Cache scoped to single regenerate_dirty_documents call — no persistence or invalidation complexity\n- [ ] All 3 tests pass\n\n## Dependency Context\n- Depends on NOTE-2C (bd-18yh): extract_note_document function must exist to create the cached variant\n\n## Edge Cases\n- Parent deleted between cache creation and lookup: get_or_fetch returns None, extract_note_document_cached returns None (same as uncached)\n- Very large batch (10K+ notes): cache grows but is bounded by number of unique parents (typically <100 issues/MRs)\n- Cache miss for orphaned discussion: cached None result prevents repeated failed lookups","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-12T17:03:00.515490Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:15.870738Z","closed_at":"2026-02-12T18:13:15.870693Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"]} @@ -221,7 +221,7 @@ {"id":"bd-3ir","title":"Add database migration 006_merge_requests.sql","description":"## Background\nFoundation for all CP2 MR features. This migration defines the schema that all other MR components depend on. Must complete BEFORE any other CP2 work can proceed.\n\n## Approach\nCreate migration file that adds:\n1. `merge_requests` table with all CP2 fields\n2. `mr_labels`, `mr_assignees`, `mr_reviewers` junction tables\n3. Indexes on discussions for MR queries\n4. DiffNote position columns on notes table\n\n## Files\n- `migrations/006_merge_requests.sql` - New migration file\n- `src/core/db.rs` - Update MIGRATIONS const to include version 6\n\n## Acceptance Criteria\n- [ ] Migration file exists at `migrations/006_merge_requests.sql`\n- [ ] `merge_requests` table has columns: id, gitlab_id, project_id, iid, title, description, state, draft, author_username, source_branch, target_branch, head_sha, references_short, references_full, detailed_merge_status, merge_user_username, created_at, updated_at, merged_at, closed_at, last_seen_at, discussions_synced_for_updated_at, discussions_sync_last_attempt_at, discussions_sync_attempts, discussions_sync_last_error, web_url, raw_payload_id\n- [ ] `mr_labels` junction table exists with (merge_request_id, label_id) PK\n- [ ] `mr_assignees` junction table exists with (merge_request_id, username) PK\n- [ ] `mr_reviewers` junction table exists with (merge_request_id, username) PK\n- [ ] `idx_discussions_mr_id` and `idx_discussions_mr_resolved` indexes exist\n- [ ] `notes` table has new columns: position_type, position_line_range_start, position_line_range_end, position_base_sha, position_start_sha, position_head_sha\n- [ ] `gi doctor` runs without migration errors\n- [ ] `cargo test` passes\n\n## TDD Loop\nRED: Cannot open DB with version 6 schema\nGREEN: Add migration file with full SQL\nVERIFY: `cargo run -- doctor` shows healthy DB\n\n## SQL Reference (from PRD)\n```sql\n-- Merge requests table\nCREATE TABLE merge_requests (\n id INTEGER PRIMARY KEY,\n gitlab_id INTEGER UNIQUE NOT NULL,\n project_id INTEGER NOT NULL REFERENCES projects(id),\n iid INTEGER NOT NULL,\n title TEXT,\n description TEXT,\n state TEXT, -- opened | merged | closed | locked\n draft INTEGER NOT NULL DEFAULT 0, -- SQLite boolean\n author_username TEXT,\n source_branch TEXT,\n target_branch TEXT,\n head_sha TEXT,\n references_short TEXT,\n references_full TEXT,\n detailed_merge_status TEXT,\n merge_user_username TEXT,\n created_at INTEGER, -- ms epoch UTC\n updated_at INTEGER,\n merged_at INTEGER,\n closed_at INTEGER,\n last_seen_at INTEGER NOT NULL,\n discussions_synced_for_updated_at INTEGER,\n discussions_sync_last_attempt_at INTEGER,\n discussions_sync_attempts INTEGER DEFAULT 0,\n discussions_sync_last_error TEXT,\n web_url TEXT,\n raw_payload_id INTEGER REFERENCES raw_payloads(id)\n);\nCREATE INDEX idx_mrs_project_updated ON merge_requests(project_id, updated_at);\nCREATE UNIQUE INDEX uq_mrs_project_iid ON merge_requests(project_id, iid);\n-- ... (see PRD for full index list)\n\n-- Junction tables\nCREATE TABLE mr_labels (\n merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,\n label_id INTEGER REFERENCES labels(id) ON DELETE CASCADE,\n PRIMARY KEY(merge_request_id, label_id)\n);\n\nCREATE TABLE mr_assignees (\n merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,\n username TEXT NOT NULL,\n PRIMARY KEY(merge_request_id, username)\n);\n\nCREATE TABLE mr_reviewers (\n merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,\n username TEXT NOT NULL,\n PRIMARY KEY(merge_request_id, username)\n);\n\n-- DiffNote position columns (ALTER TABLE)\nALTER TABLE notes ADD COLUMN position_type TEXT;\nALTER TABLE notes ADD COLUMN position_line_range_start INTEGER;\nALTER TABLE notes ADD COLUMN position_line_range_end INTEGER;\nALTER TABLE notes ADD COLUMN position_base_sha TEXT;\nALTER TABLE notes ADD COLUMN position_start_sha TEXT;\nALTER TABLE notes ADD COLUMN position_head_sha TEXT;\n\nINSERT INTO schema_version (version, applied_at, description)\nVALUES (6, strftime('%s', 'now') * 1000, 'Merge requests, MR labels, assignees, reviewers');\n```\n\n## Edge Cases\n- SQLite does not support ADD CONSTRAINT - FK defined as nullable in CP1\n- `locked` state is transitional (merge-in-progress) - store as first-class\n- discussions_synced_for_updated_at prevents redundant discussion refetch","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:40.101470Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:06:43.899079Z","closed_at":"2026-01-27T00:06:43.898875Z","close_reason":"Migration 006_merge_requests.sql created and verified. Schema v6 applied successfully with all tables, indexes, and position columns.","compaction_level":0,"original_size":0} {"id":"bd-3ir1","title":"Implement terminal safety module (sanitize + URL policy + redact)","description":"## Background\nGitLab content (issue descriptions, comments, MR descriptions) can contain arbitrary text including ANSI escape sequences, bidirectional text overrides, OSC hyperlinks, and C1 control codes. Displaying unsanitized content in a terminal can hijack cursor position, inject fake UI elements, or cause rendering corruption. This module provides a sanitization layer that strips dangerous sequences while preserving a safe ANSI subset for readability.\n\n## Approach\nCreate `crates/lore-tui/src/safety.rs` with:\n- `sanitize_for_terminal(input: &str) -> String` — the main entry point\n- Strip C1 control codes (0x80-0x9F)\n- Strip OSC sequences (ESC ] ... ST)\n- Strip cursor movement (CSI A/B/C/D/E/F/G/H/J/K)\n- Strip bidi overrides (U+202A-U+202E, U+2066-U+2069)\n- **PRESERVE safe ANSI subset**: SGR sequences for bold (1), italic (3), underline (4), reset (0), and standard foreground/background colors (30-37, 40-47, 90-97, 100-107). These improve readability of formatted GitLab content.\n- `UrlPolicy` enum: `Strip`, `Footnote`, `Passthrough` — controls how OSC 8 hyperlinks are handled\n- `RedactPattern` for optional PII/secret redaction (email, token patterns)\n- All functions are pure (no I/O), fully testable\n\nReference existing terminal safety patterns in ftui-core if available.\n\n## Acceptance Criteria\n- [ ] sanitize_for_terminal strips C1, OSC, cursor movement, bidi overrides\n- [ ] sanitize_for_terminal preserves bold, italic, underline, reset, and standard color SGR sequences\n- [ ] UrlPolicy::Strip removes OSC 8 hyperlinks entirely\n- [ ] UrlPolicy::Footnote converts OSC 8 hyperlinks to numbered footnotes [1] with URL list at end\n- [ ] RedactPattern matches common secret patterns (tokens, emails) and replaces with [REDACTED]\n- [ ] No unsafe code\n- [ ] Unit tests cover each dangerous sequence type AND verify safe sequences are preserved\n- [ ] Fuzz test with 1000 random byte sequences: no panic\n\n## Files\n- CREATE: crates/lore-tui/src/safety.rs\n- MODIFY: crates/lore-tui/src/lib.rs (add pub mod safety)\n\n## TDD Anchor\nRED: Write `test_strips_cursor_movement` that asserts CSI sequences for cursor up/down/left/right are removed from input while bold SGR is preserved.\nGREEN: Implement the sanitizer state machine that categorizes and filters escape sequences.\nVERIFY: cargo test -p lore-tui safety -- --nocapture\n\nAdditional tests:\n- test_strips_c1_control_codes\n- test_strips_bidi_overrides\n- test_strips_osc_sequences\n- test_preserves_bold_italic_underline_reset\n- test_preserves_standard_colors\n- test_url_policy_strip\n- test_url_policy_footnote\n- test_redact_patterns\n- test_fuzz_no_panic\n\n## Edge Cases\n- Malformed/truncated escape sequences (ESC without closing) — must not consume following text\n- Nested SGR sequences (e.g., bold+color combined in single CSI) — preserve entire sequence if all parameters are safe\n- UTF-8 multibyte chars adjacent to escape sequences — must not corrupt char boundaries\n- Empty input returns empty string\n- Input with only safe content passes through unchanged\n\n## Dependency Context\nDepends on bd-3ddw (scaffold) for the crate structure to exist. No other dependencies — this is a pure utility module.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T16:54:30.165761Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:21.987998Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-3ir1","depends_on_id":"bd-1cj0","type":"blocks","created_at":"2026-02-12T18:11:21.987966Z","created_by":"tayloreernisse"},{"issue_id":"bd-3ir1","depends_on_id":"bd-3ddw","type":"blocks","created_at":"2026-02-12T17:09:28.594948Z","created_by":"tayloreernisse"}]} {"id":"bd-3j6","title":"Add transform_mr_discussion and transform_notes_with_diff_position","description":"## Background\nExtends discussion transformer for MR context. MR discussions can contain DiffNotes with file position metadata. This is critical for code review context in CP3 document generation.\n\n## Approach\nAdd two new functions to existing `src/gitlab/transformers/discussion.rs`:\n1. `transform_mr_discussion()` - Transform discussion with MR reference\n2. `transform_notes_with_diff_position()` - Extract DiffNote position metadata\n\nCP1 already has the polymorphic `NormalizedDiscussion` with `NoteableRef` enum - reuse that pattern.\n\n## Files\n- `src/gitlab/transformers/discussion.rs` - Add new functions\n- `tests/diffnote_tests.rs` - DiffNote position extraction tests\n- `tests/mr_discussion_tests.rs` - MR discussion transform tests\n\n## Acceptance Criteria\n- [ ] `transform_mr_discussion()` returns `NormalizedDiscussion` with `merge_request_id: Some(local_mr_id)`\n- [ ] `transform_notes_with_diff_position()` returns `Result, String>`\n- [ ] DiffNote position fields extracted: `position_old_path`, `position_new_path`, `position_old_line`, `position_new_line`\n- [ ] Extended position fields extracted: `position_type`, `position_line_range_start`, `position_line_range_end`\n- [ ] SHA triplet extracted: `position_base_sha`, `position_start_sha`, `position_head_sha`\n- [ ] Strict timestamp parsing - returns `Err` on invalid timestamps (no `unwrap_or(0)`)\n- [ ] `cargo test diffnote` passes\n- [ ] `cargo test mr_discussion` passes\n\n## TDD Loop\nRED: `cargo test diffnote_position` -> test fails\nGREEN: Add position extraction logic\nVERIFY: `cargo test diffnote`\n\n## Function Signatures\n```rust\n/// Transform GitLab discussion for MR context.\n/// Reuses existing transform_discussion logic, just with MR reference.\npub fn transform_mr_discussion(\n gitlab_discussion: &GitLabDiscussion,\n local_project_id: i64,\n local_mr_id: i64,\n) -> NormalizedDiscussion {\n // Use existing transform_discussion with NoteableRef::MergeRequest(local_mr_id)\n transform_discussion(\n gitlab_discussion,\n local_project_id,\n NoteableRef::MergeRequest(local_mr_id),\n )\n}\n\n/// Transform notes with DiffNote position extraction.\n/// Returns Result to enforce strict timestamp parsing.\npub fn transform_notes_with_diff_position(\n gitlab_discussion: &GitLabDiscussion,\n local_project_id: i64,\n) -> Result, String>\n```\n\n## DiffNote Position Extraction\n```rust\n// Extract position metadata if present\nlet (old_path, new_path, old_line, new_line, position_type, lr_start, lr_end, base_sha, start_sha, head_sha) = note\n .position\n .as_ref()\n .map(|pos| (\n pos.old_path.clone(),\n pos.new_path.clone(),\n pos.old_line,\n pos.new_line,\n pos.position_type.clone(), // \"text\" | \"image\" | \"file\"\n pos.line_range.as_ref().map(|r| r.start_line),\n pos.line_range.as_ref().map(|r| r.end_line),\n pos.base_sha.clone(),\n pos.start_sha.clone(),\n pos.head_sha.clone(),\n ))\n .unwrap_or((None, None, None, None, None, None, None, None, None, None));\n```\n\n## Strict Timestamp Parsing\n```rust\n// CRITICAL: Return error on invalid timestamps, never zero\nlet created_at = iso_to_ms(¬e.created_at)\n .ok_or_else(|| format\\!(\n \"Invalid note.created_at for note {}: {}\",\n note.id, note.created_at\n ))?;\n```\n\n## NormalizedNote Fields for DiffNotes\n```rust\nNormalizedNote {\n // ... existing fields ...\n // DiffNote position metadata\n position_old_path: old_path,\n position_new_path: new_path,\n position_old_line: old_line,\n position_new_line: new_line,\n // Extended position\n position_type,\n position_line_range_start: lr_start,\n position_line_range_end: lr_end,\n // SHA triplet\n position_base_sha: base_sha,\n position_start_sha: start_sha,\n position_head_sha: head_sha,\n}\n```\n\n## Edge Cases\n- Notes without position should have all position fields as None\n- Invalid timestamp should fail the entire discussion (no partial results)\n- File renames: `old_path \\!= new_path` indicates a renamed file\n- Multi-line comments: `line_range` present means comment spans lines 45-48","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:41.208380Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:20:13.473091Z","closed_at":"2026-01-27T00:20:13.473031Z","close_reason":"Implemented transform_mr_discussion() and transform_notes_with_diff_position() with full DiffNote position extraction:\n- Extended NormalizedNote with 10 DiffNote position fields (path, line, type, line_range, SHA triplet)\n- Added strict timestamp parsing that returns Err on invalid timestamps\n- Created 13 diffnote_position_tests covering all extraction paths and error cases\n- Created 6 mr_discussion_tests verifying MR reference handling\n- All 161 tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3j6","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-01-26T22:08:54.207801Z","created_by":"tayloreernisse"},{"issue_id":"bd-3j6","depends_on_id":"bd-5ta","type":"blocks","created_at":"2026-01-26T22:08:54.244201Z","created_by":"tayloreernisse"}]} -{"id":"bd-3jqx","title":"Implement async integration tests: cancellation, timeout, embed isolation, payload integrity","description":"## Background\n\nThe surgical sync pipeline involves async operations, cancellation signals, timeouts, scoped embedding, and multi-entity coordination. Unit tests in individual beads cover their own logic, but integration tests are needed to verify the full pipeline under realistic conditions: cancellation at different stages, timeout behavior with continuation, embedding scope isolation (only affected documents get embedded), and payload integrity (project_id mismatches rejected). These tests use wiremock for HTTP mocking and tokio for async runtime.\n\n## Approach\n\nCreate `tests/surgical_integration.rs` as an integration test file (Rust convention: `tests/` directory for integration tests). Six test functions covering the critical behavioral properties of the surgical pipeline:\n\n1. **Cancellation before preflight**: Signal cancelled before any HTTP call. Verify: recorder marked failed, no GitLab requests made, result has zero updates.\n2. **Cancellation during dependent stage**: Signal cancelled after preflight succeeds but during discussion fetch. Verify: partial results recorded, recorder marked failed, entities processed before cancellation have outcomes.\n3. **Per-entity timeout with continuation**: One entity's GitLab endpoint is slow (wiremock delay). Verify: that entity gets `failed` outcome with timeout error, remaining entities continue and succeed.\n4. **Embed scope isolation**: Sync two issues. Verify: only documents generated from those two issues are embedded, not the entire corpus. Assert by checking document IDs passed to embed function.\n5. **Payload project_id mismatch rejection**: Preflight returns an issue with `project_id` different from the resolved project. Verify: that entity gets `failed` outcome with clear error, other entities unaffected.\n6. **Successful full pipeline**: Sync one issue end-to-end through all stages. Verify: SyncResult has correct counts, entity_results has `synced` outcome, documents regenerated, embeddings created.\n\nAll tests use in-memory SQLite (`create_connection(Path::new(\":memory:\"))` + `run_migrations`) and wiremock `MockServer`.\n\n## Acceptance Criteria\n\n1. All 6 tests compile and pass\n2. Tests are isolated (each creates its own DB and mock server)\n3. Cancellation tests verify recorder state (failed status in sync_runs table)\n4. Timeout test uses wiremock delay, not `tokio::time::sleep` on the test side\n5. Embed isolation test verifies document-level scoping, not just function call\n6. Tests run in CI without flakiness (no real network, no real Ollama)\n\n## Files\n\n- `tests/surgical_integration.rs` — all 6 integration tests\n\n## TDD Anchor\n\n```rust\n// tests/surgical_integration.rs\n\nuse lore::cli::commands::sync::{SyncOptions, SyncResult};\nuse lore::core::db::{create_connection, run_migrations};\nuse lore::core::shutdown::ShutdownSignal;\nuse lore::Config;\nuse std::path::Path;\nuse std::time::Duration;\nuse wiremock::{Mock, MockServer, ResponseTemplate};\nuse wiremock::matchers::{method, path_regex};\n\nfn test_config(mock_url: &str) -> Config {\n let mut config = Config::default();\n config.gitlab.url = mock_url.to_string();\n config.gitlab.token = \"test-token\".to_string();\n config\n}\n\nfn setup_db() -> rusqlite::Connection {\n let conn = create_connection(Path::new(\":memory:\")).unwrap();\n run_migrations(&conn).unwrap();\n conn.execute(\n \"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)\n VALUES (1, 'group/project', 'https://gitlab.example.com/group/project')\",\n [],\n ).unwrap();\n conn\n}\n\nfn mock_issue_json(iid: u64) -> serde_json::Value {\n serde_json::json!({\n \"id\": 100 + iid, \"iid\": iid, \"project_id\": 1, \"title\": format!(\"Issue {}\", iid),\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": format!(\"https://gitlab.example.com/group/project/-/issues/{}\", iid)\n })\n}\n\n#[tokio::test]\nasync fn cancellation_before_preflight() {\n let server = MockServer::start().await;\n // No mocks mounted — if any request is made, wiremock will return 404\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issues: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n signal.cancel(); // Cancel before anything starts\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"cancel-pre\"), &signal,\n ).await.unwrap();\n\n assert_eq!(result.issues_updated, 0);\n assert_eq!(result.mrs_updated, 0);\n // Verify no HTTP requests were made\n assert_eq!(server.received_requests().await.unwrap().len(), 0);\n}\n\n#[tokio::test]\nasync fn cancellation_during_dependent_stage() {\n let server = MockServer::start().await;\n // Mock issue fetch (preflight succeeds)\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([mock_issue_json(7)])))\n .mount(&server).await;\n // Mock discussion fetch with delay (gives time to cancel)\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/discussions\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([]))\n .set_body_delay(Duration::from_secs(2)))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issues: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n // Cancel after a short delay (after preflight, during dependents)\n let signal_clone = signal.clone();\n tokio::spawn(async move {\n tokio::time::sleep(Duration::from_millis(200)).await;\n signal_clone.cancel();\n });\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"cancel-dep\"), &signal,\n ).await.unwrap();\n\n // Preflight should have run, but ingest may be partial\n assert!(result.surgical_mode == Some(true));\n}\n\n#[tokio::test]\nasync fn per_entity_timeout_with_continuation() {\n let server = MockServer::start().await;\n // Issue 7: slow response (simulates timeout)\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\\?.*iids\\[\\]=7\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([mock_issue_json(7)]))\n .set_body_delay(Duration::from_secs(30)))\n .mount(&server).await;\n // Issue 42: fast response\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\\?.*iids\\[\\]=42\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([mock_issue_json(42)])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issues: vec![7, 42],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n // With a per-entity timeout, issue 7 should fail, issue 42 should succeed\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"timeout-test\"), &signal,\n ).await.unwrap();\n\n let entities = result.entity_results.as_ref().unwrap();\n // One should be failed (timeout), one should be synced\n let failed = entities.iter().filter(|e| e.outcome == \"failed\").count();\n let synced = entities.iter().filter(|e| e.outcome == \"synced\").count();\n assert!(failed >= 1 || synced >= 1, \"Expected mixed outcomes\");\n}\n\n#[tokio::test]\nasync fn embed_scope_isolation() {\n let server = MockServer::start().await;\n // Mock two issues\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([\n mock_issue_json(7), mock_issue_json(42)\n ])))\n .mount(&server).await;\n // Mock empty discussions for both\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/\\d+/discussions\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issues: vec![7, 42],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n no_embed: false,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"embed-iso\"), &signal,\n ).await.unwrap();\n\n // Embedding should only have processed documents from issues 7 and 42\n // Not the full corpus. Verify via document counts.\n assert!(result.documents_embedded <= 2,\n \"Expected at most 2 documents embedded (one per issue), got {}\",\n result.documents_embedded);\n}\n\n#[tokio::test]\nasync fn payload_project_id_mismatch_rejection() {\n let server = MockServer::start().await;\n // Return issue with project_id=999 (doesn't match resolved project_id=1)\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([{\n \"id\": 200, \"iid\": 7, \"project_id\": 999, \"title\": \"Wrong Project\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/other/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issues: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"mismatch\"), &signal,\n ).await.unwrap();\n\n let entities = result.entity_results.as_ref().unwrap();\n assert_eq!(entities.len(), 1);\n assert_eq!(entities[0].outcome, \"failed\");\n assert!(entities[0].error.as_ref().unwrap().contains(\"project_id\"));\n}\n\n#[tokio::test]\nasync fn successful_full_pipeline() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([mock_issue_json(7)])))\n .mount(&server).await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/discussions\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n // Mock any resource event endpoints\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/resource_\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issues: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n no_embed: true, // Skip embed to avoid Ollama dependency\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"full-pipe\"), &signal,\n ).await.unwrap();\n\n assert_eq!(result.surgical_mode, Some(true));\n assert_eq!(result.surgical_iids.as_ref().unwrap().issues, vec![7]);\n assert_eq!(result.preflight_only, Some(false));\n\n let entities = result.entity_results.as_ref().unwrap();\n assert_eq!(entities.len(), 1);\n assert_eq!(entities[0].entity_type, \"issue\");\n assert_eq!(entities[0].iid, 7);\n assert_eq!(entities[0].outcome, \"synced\");\n assert!(entities[0].error.is_none());\n\n assert!(result.issues_updated >= 1);\n assert!(result.documents_regenerated >= 1);\n}\n```\n\n## Edge Cases\n\n- **Wiremock delay vs tokio timeout**: Use `set_body_delay` on wiremock, not `tokio::time::sleep` in tests. The per-entity timeout in the orchestrator (bd-1i4i) should use `tokio::time::timeout` around the HTTP call.\n- **Embed isolation without Ollama**: Tests that verify embed scoping should either mock Ollama or use `no_embed: true` and verify the document ID list passed to the embed function. The `successful_full_pipeline` test uses `no_embed: true` to avoid requiring a running Ollama server in CI.\n- **Test isolation**: Each test creates its own `MockServer`, in-memory DB, and `ShutdownSignal`. No shared state between tests.\n- **Flakiness prevention**: Cancellation timing tests (test 2) use deterministic delays (cancel after 200ms, response delayed 2s). If flaky, increase the gap between cancel time and response delay.\n- **CI compatibility**: No real GitLab, no real Ollama, no real filesystem locks (in-memory DB means AppLock may need adaptation for tests — consider a test-only lock bypass or use a temp file DB for lock tests).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-1i4i (the `run_sync_surgical` function under test), bd-wcja (SyncResult surgical fields to assert), bd-1lja (SyncOptions extensions), bd-3sez (surgical ingest for TOCTOU test), bd-arka (SyncRunRecorder for recorder state assertions), bd-1elx (scoped embed for isolation test), bd-kanh (per-entity helpers)\n- **No downstream dependents** — this is a terminal test-only bead.\n- These tests validate the behavioral contracts that all upstream beads promise. They are the acceptance gate for the surgical sync feature.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-17T19:18:46.182356Z","created_by":"tayloreernisse","updated_at":"2026-02-17T20:04:49.331351Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} +{"id":"bd-3jqx","title":"Implement async integration tests: cancellation, timeout, embed isolation, payload integrity","description":"## Background\n\nThe surgical sync pipeline involves async operations, cancellation signals, timeouts, scoped embedding, and multi-entity coordination. Unit tests in individual beads cover their own logic, but integration tests are needed to verify the full pipeline under realistic conditions: cancellation at different stages, timeout behavior with continuation, embedding scope isolation (only affected documents get embedded), and payload integrity (project_id mismatches rejected). These tests use wiremock for HTTP mocking and tokio for async runtime.\n\n## Approach\n\nCreate `tests/surgical_integration.rs` as an integration test file (Rust convention: `tests/` directory for integration tests). Six test functions covering the critical behavioral properties of the surgical pipeline:\n\n1. **Cancellation before preflight**: Signal cancelled before any HTTP call. Verify: recorder marked failed, no GitLab requests made, result has zero updates.\n2. **Cancellation during dependent stage**: Signal cancelled after preflight succeeds but during discussion fetch. Verify: partial results recorded, recorder marked failed, entities processed before cancellation have outcomes.\n3. **Per-entity timeout with continuation**: One entity's GitLab endpoint is slow (wiremock delay). Verify: that entity gets `failed` outcome with timeout error, remaining entities continue and succeed.\n4. **Embed scope isolation**: Sync two issues. Verify: only documents generated from those two issues are embedded, not the entire corpus. Assert by checking document IDs passed to embed function.\n5. **Payload project_id mismatch rejection**: Preflight returns an issue with `project_id` different from the resolved project. Verify: that entity gets `failed` outcome with clear error, other entities unaffected.\n6. **Successful full pipeline**: Sync one issue end-to-end through all stages. Verify: SyncResult has correct counts, entity_results has `synced` outcome, documents regenerated, embeddings created.\n\nAll tests use in-memory SQLite (`create_connection(Path::new(\":memory:\"))` + `run_migrations`) and wiremock `MockServer`.\n\n## Acceptance Criteria\n\n1. All 6 tests compile and pass\n2. Tests are isolated (each creates its own DB and mock server)\n3. Cancellation tests verify recorder state (failed status in sync_runs table)\n4. Timeout test uses wiremock delay, not `tokio::time::sleep` on the test side\n5. Embed isolation test verifies document-level scoping, not just function call\n6. Tests run in CI without flakiness (no real network, no real Ollama)\n\n## Files\n\n- `tests/surgical_integration.rs` — all 6 integration tests\n\n## TDD Anchor\n\n```rust\n// tests/surgical_integration.rs\n\nuse lore::cli::commands::sync::{SyncOptions, SyncResult};\nuse lore::core::db::{create_connection, run_migrations};\nuse lore::core::shutdown::ShutdownSignal;\nuse lore::Config;\nuse std::path::Path;\nuse std::time::Duration;\nuse wiremock::{Mock, MockServer, ResponseTemplate};\nuse wiremock::matchers::{method, path_regex};\n\nfn test_config(mock_url: &str) -> Config {\n let mut config = Config::default();\n config.gitlab.url = mock_url.to_string();\n config.gitlab.token = \"test-token\".to_string();\n config\n}\n\nfn setup_db() -> rusqlite::Connection {\n let conn = create_connection(Path::new(\":memory:\")).unwrap();\n run_migrations(&conn).unwrap();\n conn.execute(\n \"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)\n VALUES (1, 'group/project', 'https://gitlab.example.com/group/project')\",\n [],\n ).unwrap();\n conn\n}\n\nfn mock_issue_json(iid: u64) -> serde_json::Value {\n serde_json::json!({\n \"id\": 100 + iid, \"iid\": iid, \"project_id\": 1, \"title\": format!(\"Issue {}\", iid),\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": format!(\"https://gitlab.example.com/group/project/-/issues/{}\", iid)\n })\n}\n\n#[tokio::test]\nasync fn cancellation_before_preflight() {\n let server = MockServer::start().await;\n // No mocks mounted — if any request is made, wiremock will return 404\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issue_iids: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n signal.cancel(); // Cancel before anything starts\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"cancel-pre\"), &signal,\n ).await.unwrap();\n\n assert_eq!(result.issues_updated, 0);\n assert_eq!(result.mrs_updated, 0);\n // Verify no HTTP requests were made\n assert_eq!(server.received_requests().await.unwrap().len(), 0);\n}\n\n#[tokio::test]\nasync fn cancellation_during_dependent_stage() {\n let server = MockServer::start().await;\n // Mock issue fetch (preflight succeeds)\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([mock_issue_json(7)])))\n .mount(&server).await;\n // Mock discussion fetch with delay (gives time to cancel)\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/discussions\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([]))\n .set_body_delay(Duration::from_secs(2)))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issue_iids: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n // Cancel after a short delay (after preflight, during dependents)\n let signal_clone = signal.clone();\n tokio::spawn(async move {\n tokio::time::sleep(Duration::from_millis(200)).await;\n signal_clone.cancel();\n });\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"cancel-dep\"), &signal,\n ).await.unwrap();\n\n // Preflight should have run, but ingest may be partial\n assert!(result.surgical_mode == Some(true));\n}\n\n#[tokio::test]\nasync fn per_entity_timeout_with_continuation() {\n let server = MockServer::start().await;\n // Issue 7: slow response (simulates timeout)\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\\?.*iids\\[\\]=7\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([mock_issue_json(7)]))\n .set_body_delay(Duration::from_secs(30)))\n .mount(&server).await;\n // Issue 42: fast response\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\\?.*iids\\[\\]=42\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([mock_issue_json(42)])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issue_iids: vec![7, 42],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n // With a per-entity timeout, issue 7 should fail, issue 42 should succeed\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"timeout-test\"), &signal,\n ).await.unwrap();\n\n let entities = result.entity_results.as_ref().unwrap();\n // One should be failed (timeout), one should be synced\n let failed = entities.iter().filter(|e| e.outcome == \"failed\").count();\n let synced = entities.iter().filter(|e| e.outcome == \"synced\").count();\n assert!(failed >= 1 || synced >= 1, \"Expected mixed outcomes\");\n}\n\n#[tokio::test]\nasync fn embed_scope_isolation() {\n let server = MockServer::start().await;\n // Mock two issues\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([\n mock_issue_json(7), mock_issue_json(42)\n ])))\n .mount(&server).await;\n // Mock empty discussions for both\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/\\d+/discussions\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issue_iids: vec![7, 42],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n no_embed: false,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"embed-iso\"), &signal,\n ).await.unwrap();\n\n // Embedding should only have processed documents from issues 7 and 42\n // Not the full corpus. Verify via document counts.\n assert!(result.documents_embedded <= 2,\n \"Expected at most 2 documents embedded (one per issue), got {}\",\n result.documents_embedded);\n}\n\n#[tokio::test]\nasync fn payload_project_id_mismatch_rejection() {\n let server = MockServer::start().await;\n // Return issue with project_id=999 (doesn't match resolved project_id=1)\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([{\n \"id\": 200, \"iid\": 7, \"project_id\": 999, \"title\": \"Wrong Project\",\n \"state\": \"opened\", \"created_at\": \"2026-01-01T00:00:00Z\",\n \"updated_at\": \"2026-02-17T00:00:00Z\",\n \"author\": {\"id\": 1, \"username\": \"dev\", \"name\": \"Dev\"},\n \"web_url\": \"https://gitlab.example.com/other/project/-/issues/7\"\n }])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issue_iids: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"mismatch\"), &signal,\n ).await.unwrap();\n\n let entities = result.entity_results.as_ref().unwrap();\n assert_eq!(entities.len(), 1);\n assert_eq!(entities[0].outcome, \"failed\");\n assert!(entities[0].error.as_ref().unwrap().contains(\"project_id\"));\n}\n\n#[tokio::test]\nasync fn successful_full_pipeline() {\n let server = MockServer::start().await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues\"))\n .respond_with(ResponseTemplate::new(200)\n .set_body_json(serde_json::json!([mock_issue_json(7)])))\n .mount(&server).await;\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/discussions\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n // Mock any resource event endpoints\n Mock::given(method(\"GET\"))\n .and(path_regex(r\"/api/v4/projects/1/issues/7/resource_\"))\n .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))\n .mount(&server).await;\n\n let config = test_config(&server.uri());\n let options = SyncOptions {\n issue_iids: vec![7],\n project: Some(\"group/project\".to_string()),\n robot_mode: true,\n no_embed: true, // Skip embed to avoid Ollama dependency\n ..SyncOptions::default()\n };\n let signal = ShutdownSignal::new();\n\n let result = lore::cli::commands::sync_surgical::run_sync_surgical(\n &config, options, Some(\"full-pipe\"), &signal,\n ).await.unwrap();\n\n assert_eq!(result.surgical_mode, Some(true));\n assert_eq!(result.surgical_iids.as_ref().unwrap().issues, vec![7]);\n assert_eq!(result.preflight_only, Some(false));\n\n let entities = result.entity_results.as_ref().unwrap();\n assert_eq!(entities.len(), 1);\n assert_eq!(entities[0].entity_type, \"issue\");\n assert_eq!(entities[0].iid, 7);\n assert_eq!(entities[0].outcome, \"synced\");\n assert!(entities[0].error.is_none());\n\n assert!(result.issues_updated >= 1);\n assert!(result.documents_regenerated >= 1);\n}\n```\n\n## Edge Cases\n\n- **Wiremock delay vs tokio timeout**: Use `set_body_delay` on wiremock, not `tokio::time::sleep` in tests. The per-entity timeout in the orchestrator (bd-1i4i) should use `tokio::time::timeout` around the HTTP call.\n- **Embed isolation without Ollama**: Tests that verify embed scoping should either mock Ollama or use `no_embed: true` and verify the document ID list passed to the embed function. The `successful_full_pipeline` test uses `no_embed: true` to avoid requiring a running Ollama server in CI.\n- **Test isolation**: Each test creates its own `MockServer`, in-memory DB, and `ShutdownSignal`. No shared state between tests.\n- **Flakiness prevention**: Cancellation timing tests (test 2) use deterministic delays (cancel after 200ms, response delayed 2s). If flaky, increase the gap between cancel time and response delay.\n- **CI compatibility**: No real GitLab, no real Ollama, no real filesystem locks (in-memory DB means AppLock may need adaptation for tests — consider a test-only lock bypass or use a temp file DB for lock tests).\n\n## Dependency Context\n\n- **Depends on (upstream)**: bd-1i4i (the `run_sync_surgical` function under test), bd-wcja (SyncResult surgical fields to assert), bd-1lja (SyncOptions extensions), bd-3sez (surgical ingest for TOCTOU test), bd-arka (SyncRunRecorder for recorder state assertions), bd-1elx (scoped embed for isolation test), bd-kanh (per-entity helpers)\n- **No downstream dependents** — this is a terminal test-only bead.\n- These tests validate the behavioral contracts that all upstream beads promise. They are the acceptance gate for the surgical sync feature.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-17T19:18:46.182356Z","created_by":"tayloreernisse","updated_at":"2026-02-18T19:24:23.804017Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} {"id":"bd-3js","title":"Implement MR CLI commands (list, show, count)","description":"## Background\nCLI commands for viewing and filtering merge requests. Includes list, show, and count commands with MR-specific filters.\n\n## Approach\nUpdate existing CLI command files:\n1. `list.rs` - Add MR listing with filters\n2. `show.rs` - Add MR detail view with discussions\n3. `count.rs` - Add MR counting with state breakdown\n\n## Files\n- `src/cli/commands/list.rs` - Add MR subcommand\n- `src/cli/commands/show.rs` - Add MR detail view\n- `src/cli/commands/count.rs` - Add MR counting\n\n## Acceptance Criteria\n- [ ] `gi list mrs` shows MR table with iid, title, state, author, branches\n- [ ] `gi list mrs --state=merged` filters by state\n- [ ] `gi list mrs --state=locked` filters locally (not server-side)\n- [ ] `gi list mrs --draft` shows only draft MRs\n- [ ] `gi list mrs --no-draft` excludes draft MRs\n- [ ] `gi list mrs --reviewer=username` filters by reviewer\n- [ ] `gi list mrs --target-branch=main` filters by target branch\n- [ ] `gi list mrs --source-branch=feature/x` filters by source branch\n- [ ] Draft MRs show `[DRAFT]` prefix in title\n- [ ] `gi show mr ` displays full detail including discussions\n- [ ] DiffNote shows file context: `[src/file.ts:45]`\n- [ ] Multi-line DiffNote shows: `[src/file.ts:45-48]`\n- [ ] `gi show mr` shows `detailed_merge_status`\n- [ ] `gi count mrs` shows total with state breakdown\n- [ ] `gi sync-status` shows MR cursor positions\n- [ ] `cargo test cli_commands` passes\n\n## TDD Loop\nRED: `cargo test list_mrs` -> command not found\nGREEN: Add MR subcommand\nVERIFY: `gi list mrs --help`\n\n## gi list mrs Output\n```\nMerge Requests (showing 20 of 1,234)\n\n !847 Refactor auth to use JWT tokens merged @johndoe main <- feature/jwt 3 days ago\n !846 Fix memory leak in websocket handler opened @janedoe main <- fix/websocket 5 days ago\n !845 [DRAFT] Add dark mode CSS variables opened @bobsmith main <- ui/dark-mode 1 week ago\n```\n\n## SQL for MR Listing\n```sql\nSELECT \n m.iid, m.title, m.state, m.draft, m.author_username,\n m.target_branch, m.source_branch, m.updated_at\nFROM merge_requests m\nWHERE m.project_id = ?\n AND (? IS NULL OR m.state = ?) -- state filter\n AND (? IS NULL OR m.draft = ?) -- draft filter\n AND (? IS NULL OR m.author_username = ?) -- author filter\n AND (? IS NULL OR m.target_branch = ?) -- target-branch filter\n AND (? IS NULL OR m.source_branch = ?) -- source-branch filter\n AND (? IS NULL OR EXISTS ( -- reviewer filter\n SELECT 1 FROM mr_reviewers r \n WHERE r.merge_request_id = m.id AND r.username = ?\n ))\nORDER BY m.updated_at DESC\nLIMIT ?\n```\n\n## gi show mr Output\n```\nMerge Request !847: Refactor auth to use JWT tokens\n================================================================================\n\nProject: group/project-one\nState: merged\nDraft: No\nAuthor: @johndoe\nAssignees: @janedoe, @bobsmith\nReviewers: @alice, @charlie\nSource: feature/jwt\nTarget: main\nMerge Status: mergeable\nMerged By: @alice\nMerged At: 2024-03-20 14:30:00\nLabels: enhancement, auth, reviewed\n\nDescription:\n Moving away from session cookies to JWT-based authentication...\n\nDiscussions (8):\n\n @janedoe (2024-03-16) [src/auth/jwt.ts:45]:\n Should we use a separate signing key for refresh tokens?\n\n @johndoe (2024-03-16):\n Good point. I'll add a separate key with rotation support.\n\n @alice (2024-03-18) [RESOLVED]:\n Looks good! Just one nit about the token expiry constant.\n```\n\n## DiffNote File Context Display\n```rust\n// Build file context string\nlet file_context = match (note.position_new_path, note.position_new_line, note.position_line_range_end) {\n (Some(path), Some(line), Some(end_line)) if line != end_line => {\n format!(\"[{}:{}-{}]\", path, line, end_line)\n }\n (Some(path), Some(line), _) => {\n format!(\"[{}:{}]\", path, line)\n }\n _ => String::new(),\n};\n```\n\n## gi count mrs Output\n```\nMerge Requests: 1,234\n opened: 89\n merged: 1,045\n closed: 100\n```\n\n## Filter Arguments (clap)\n```rust\n#[derive(Parser)]\nstruct ListMrsArgs {\n #[arg(long)]\n state: Option, // opened|merged|closed|locked|all\n #[arg(long)]\n draft: bool,\n #[arg(long)]\n no_draft: bool,\n #[arg(long)]\n author: Option,\n #[arg(long)]\n assignee: Option,\n #[arg(long)]\n reviewer: Option,\n #[arg(long)]\n target_branch: Option,\n #[arg(long)]\n source_branch: Option,\n #[arg(long)]\n label: Vec,\n #[arg(long)]\n project: Option,\n #[arg(long, default_value = \"20\")]\n limit: u32,\n}\n```\n\n## Edge Cases\n- `--state=locked` must filter locally (GitLab API doesn't support it)\n- Ambiguous MR iid across projects: prompt for `--project`\n- Empty discussions: show \"No discussions\" message\n- Multi-line DiffNotes: show line range in context","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:43.354939Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:37:31.792569Z","closed_at":"2026-01-27T00:37:31.792504Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3js","depends_on_id":"bd-20h","type":"blocks","created_at":"2026-01-26T22:08:55.209249Z","created_by":"tayloreernisse"},{"issue_id":"bd-3js","depends_on_id":"bd-ser","type":"blocks","created_at":"2026-01-26T22:08:55.117728Z","created_by":"tayloreernisse"}]} {"id":"bd-3kj","title":"[CP0] gi version, backup, reset, sync-status commands","description":"## Background\n\nThese are the remaining utility commands for CP0. version is trivial. backup creates safety copies before destructive operations. reset provides clean-slate capability. sync-status is a stub for CP0 that will be implemented in CP1.\n\nReference: docs/prd/checkpoint-0.md sections \"gi version\", \"gi backup\", \"gi reset\", \"gi sync-status\"\n\n## Approach\n\n**src/cli/commands/version.ts:**\n```typescript\nimport { Command } from 'commander';\nimport { version } from '../../../package.json' with { type: 'json' };\n\nexport const versionCommand = new Command('version')\n .description('Show version information')\n .action(() => {\n console.log(\\`gi version \\${version}\\`);\n });\n```\n\n**src/cli/commands/backup.ts:**\n```typescript\nimport { Command } from 'commander';\nimport { copyFileSync, mkdirSync } from 'node:fs';\nimport { loadConfig } from '../../core/config';\nimport { getDbPath, getBackupDir } from '../../core/paths';\n\nexport const backupCommand = new Command('backup')\n .description('Create timestamped database backup')\n .action(async (options, command) => {\n const globalOpts = command.optsWithGlobals();\n const config = loadConfig(globalOpts.config);\n \n const dbPath = getDbPath(config.storage?.dbPath);\n const backupDir = getBackupDir(config.storage?.backupDir);\n \n mkdirSync(backupDir, { recursive: true });\n \n // Format: data-2026-01-24T10-30-00.db (colons replaced for Windows compat)\n const timestamp = new Date().toISOString().replace(/:/g, '-').replace(/\\\\..*/, '');\n const backupPath = \\`\\${backupDir}/data-\\${timestamp}.db\\`;\n \n copyFileSync(dbPath, backupPath);\n console.log(\\`Created backup: \\${backupPath}\\`);\n });\n```\n\n**src/cli/commands/reset.ts:**\n```typescript\nimport { Command } from 'commander';\nimport { unlinkSync, existsSync } from 'node:fs';\nimport { createInterface } from 'node:readline';\nimport { loadConfig } from '../../core/config';\nimport { getDbPath } from '../../core/paths';\n\nexport const resetCommand = new Command('reset')\n .description('Delete database and reset all state')\n .option('--confirm', 'Skip confirmation prompt')\n .action(async (options, command) => {\n const globalOpts = command.optsWithGlobals();\n const config = loadConfig(globalOpts.config);\n const dbPath = getDbPath(config.storage?.dbPath);\n \n if (!existsSync(dbPath)) {\n console.log('No database to reset.');\n return;\n }\n \n if (!options.confirm) {\n console.log(\\`This will delete:\\n - Database: \\${dbPath}\\n - All sync cursors\\n - All cached data\\n\\`);\n // Prompt for 'yes' confirmation\n // If not 'yes', exit 2\n }\n \n unlinkSync(dbPath);\n // Also delete WAL and SHM files if they exist\n if (existsSync(\\`\\${dbPath}-wal\\`)) unlinkSync(\\`\\${dbPath}-wal\\`);\n if (existsSync(\\`\\${dbPath}-shm\\`)) unlinkSync(\\`\\${dbPath}-shm\\`);\n \n console.log(\"Database reset. Run 'gi sync' to repopulate.\");\n });\n```\n\n**src/cli/commands/sync-status.ts:**\n```typescript\n// CP0 stub - full implementation in CP1\nexport const syncStatusCommand = new Command('sync-status')\n .description('Show sync state')\n .action(() => {\n console.log(\"No sync runs yet. Run 'gi sync' to start.\");\n });\n```\n\n## Acceptance Criteria\n\n- [ ] `gi version` outputs \"gi version X.Y.Z\"\n- [ ] `gi backup` creates timestamped copy of database\n- [ ] Backup filename is Windows-compatible (no colons)\n- [ ] Backup directory created if missing\n- [ ] `gi reset` prompts for 'yes' confirmation\n- [ ] `gi reset --confirm` skips prompt\n- [ ] Reset deletes .db, .db-wal, and .db-shm files\n- [ ] Reset exits 2 if user doesn't type 'yes'\n- [ ] `gi sync-status` outputs stub message\n\n## Files\n\nCREATE:\n- src/cli/commands/version.ts\n- src/cli/commands/backup.ts\n- src/cli/commands/reset.ts\n- src/cli/commands/sync-status.ts\n\n## TDD Loop\n\nN/A - simple commands, verify manually:\n\n```bash\ngi version\ngi backup\nls ~/.local/share/gi/backups/\ngi reset # type 'no'\ngi reset --confirm\nls ~/.local/share/gi/data.db # should not exist\ngi sync-status\n```\n\n## Edge Cases\n\n- Backup when database doesn't exist - show clear error\n- Reset when database doesn't exist - show \"No database to reset\"\n- WAL/SHM files may not exist - check before unlinking\n- Timestamp with milliseconds could cause very long filename\n- readline prompt in non-interactive terminal - handle SIGINT","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:51.774210Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:31:46.227285Z","closed_at":"2026-01-25T03:31:46.227220Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3kj","depends_on_id":"bd-13b","type":"blocks","created_at":"2026-01-24T16:13:10.810953Z","created_by":"tayloreernisse"},{"issue_id":"bd-3kj","depends_on_id":"bd-3ng","type":"blocks","created_at":"2026-01-24T16:13:10.827689Z","created_by":"tayloreernisse"}]} {"id":"bd-3l56","title":"Add lore sync --tui convenience flag","description":"## Background\n\nThe PRD defines two CLI entry paths to the TUI: `lore tui` (full TUI) and `lore sync --tui` (convenience shortcut that launches the TUI directly on the Sync screen in inline mode). The `lore tui` command is covered by bd-26lp. This bead adds the `--tui` flag to the existing `SyncArgs` struct, which delegates to the `lore-tui` binary with `--sync` flag.\n\n## Approach\n\nTwo changes to the existing lore CLI crate (NOT the lore-tui crate):\n\n1. **Add `--tui` flag to `SyncArgs`** in `src/cli/mod.rs`:\n ```rust\n /// Show sync progress in interactive TUI (inline mode)\n #[arg(long)]\n pub tui: bool,\n ```\n\n2. **Handle the flag in sync command dispatch** in `src/main.rs` (or wherever Commands::Sync is matched):\n - If `args.tui` is true, call `resolve_tui_binary()` (from bd-26lp) and spawn it with `--sync` flag\n - Forward the config path if specified\n - Exit with the lore-tui process exit code\n - If lore-tui is not found, print a helpful error message\n\nThe `resolve_tui_binary()` function is implemented by bd-26lp (CLI integration). This bead simply adds the flag and the early-return delegation path in the sync command handler.\n\n## Acceptance Criteria\n- [ ] `lore sync --tui` is accepted by the CLI parser (no unknown flag error)\n- [ ] When `--tui` is set, the sync command delegates to `lore-tui --sync` binary\n- [ ] Config path is forwarded if `--config` was specified\n- [ ] If lore-tui binary is not found, prints error with install instructions and exits non-zero\n- [ ] `lore sync --tui --full` does NOT pass `--full` to lore-tui (TUI has its own sync controls)\n- [ ] `--tui` flag appears in `lore sync --help` output\n\n## Files\n- MODIFY: src/cli/mod.rs (add `tui: bool` field to `SyncArgs` struct at line ~776)\n- MODIFY: src/main.rs or src/cli/commands/sync.rs (add early-return delegation when `args.tui`)\n\n## TDD Anchor\nRED: Write `test_sync_tui_flag_accepted` that verifies `SyncArgs` can be parsed with `--tui` flag.\nGREEN: Add the `tui: bool` field to SyncArgs.\nVERIFY: cargo test sync_tui_flag\n\nAdditional tests:\n- test_sync_tui_flag_default_false (not set by default)\n\n## Edge Cases\n- `--tui` combined with `--dry-run` — the TUI handles dry-run internally, so `--dry-run` should be ignored when `--tui` is set (or warn)\n- `--tui` when lore-tui binary does not exist — clear error, not a panic\n- `--tui` in robot mode (`--robot`) — nonsensical combination, should error with \"cannot use --tui with --robot\"\n\n## Dependency Context\n- Depends on bd-26lp (CLI integration) which implements `resolve_tui_binary()` and `validate_tui_compat()` functions that this bead calls.\n- The SyncArgs struct is at src/cli/mod.rs:739. The existing fields are: full, no_full, force, no_force, no_embed, no_docs, no_events, no_file_changes, dry_run, no_dry_run.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T19:29:40.785182Z","created_by":"tayloreernisse","updated_at":"2026-02-12T19:29:49.341576Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-3l56","depends_on_id":"bd-26lp","type":"blocks","created_at":"2026-02-12T19:29:49.341556Z","created_by":"tayloreernisse"}]} @@ -243,7 +243,7 @@ {"id":"bd-3qn6","title":"Rewrite who --path to use mr_file_changes for authorship signal","description":"## Problem\n\nwho --path currently only queries DiffNote records (notes.position_new_path), so it only finds people who left inline review comments on that exact file. This is highly misleading -- it reports 'no experts' for files that have been actively authored and reviewed, just without inline comments on that specific path.\n\n## Solution\n\nRewrite query_expert() to incorporate mr_file_changes as a primary signal source:\n\n1. MR authorship signal: JOIN mr_file_changes to find MR authors who touched the file (strongest signal)\n2. MR reviewer signal: JOIN mr_file_changes + merge_request_reviewers to find reviewers of MRs that touched the file (even without DiffNotes on that file)\n3. DiffNote signal: Keep existing DiffNote query as a supplementary signal (inline comments show deep familiarity)\n\n### Scoring weights (to tune):\n- MR author who touched the file: 15 points per MR\n- MR reviewer of MR touching the file: 10 points per MR\n- DiffNote reviewer on that file: 20 points per MR + 1 per note (existing)\n- DiffNote MR author: 12 points per MR (existing)\n\n### Path matching:\n- Reuse build_path_query() but extend DB probes to also check mr_file_changes.new_path\n- For prefix matching, LIKE on mr_file_changes.new_path\n\n### Also fix:\n- build_path_query() probes should check mr_file_changes in addition to notes, so path resolution works even when no DiffNotes exist\n\n## Acceptance Criteria\n- who --path returns results for files touched in MRs even without DiffNotes\n- Existing DiffNote-based scoring still contributes\n- build_path_query probes mr_file_changes for path existence\n- Tests cover: MR-only authorship, DiffNote-only, combined scoring\n- Robot mode JSON output unchanged (same schema)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-08T18:16:41.991344Z","created_by":"tayloreernisse","updated_at":"2026-02-08T18:34:25.704024Z","closed_at":"2026-02-08T18:34:25.703965Z","close_reason":"Rewrote query_expert() and query_overlap() in who.rs to incorporate mr_file_changes + mr_reviewers as signal sources alongside existing DiffNote data. Uses 4-branch UNION ALL with COUNT(DISTINCT CASE) for proper deduplication across signal types. 8 new tests, all 397 pass.","compaction_level":0,"original_size":0,"labels":["cli","phase-b","who"],"dependencies":[{"issue_id":"bd-3qn6","depends_on_id":"bd-2yo","type":"blocks","created_at":"2026-02-08T18:16:41.994443Z","created_by":"tayloreernisse"}]} {"id":"bd-3qs","title":"Implement lore generate-docs CLI command","description":"## Background\nThe generate-docs CLI command is the user-facing wrapper around the document regeneration pipeline. It has two modes: incremental (default, processes dirty_sources queue only) and full (seeds dirty_sources with ALL entities, then drains). Both modes use the same regenerator codepath to avoid logic divergence. Full mode uses keyset pagination (WHERE id > last_id) for seeding to avoid O(n^2) OFFSET degradation on large tables.\n\n## Approach\nCreate `src/cli/commands/generate_docs.rs` per PRD Section 2.4.\n\n**Core function:**\n```rust\npub fn run_generate_docs(\n config: &Config,\n full: bool,\n project_filter: Option<&str>,\n) -> Result\n```\n\n**Full mode seeding (keyset pagination):**\n```rust\nconst FULL_MODE_CHUNK_SIZE: usize = 2000;\n\n// For each source type (issues, MRs, discussions):\nlet mut last_id: i64 = 0;\nloop {\n let tx = conn.transaction()?;\n let inserted = tx.execute(\n \"INSERT INTO dirty_sources (source_type, source_id, queued_at, ...)\n SELECT 'issue', id, ?, 0, NULL, NULL, NULL\n FROM issues WHERE id > ? ORDER BY id LIMIT ?\n ON CONFLICT(source_type, source_id) DO NOTHING\",\n params![now_ms(), last_id, FULL_MODE_CHUNK_SIZE],\n )?;\n if inserted == 0 { tx.commit()?; break; }\n // Advance keyset cursor...\n tx.commit()?;\n}\n```\n\n**After draining (full mode only):**\n```sql\nINSERT INTO documents_fts(documents_fts) VALUES('optimize')\n```\n\n**CLI args:**\n```rust\n#[derive(Args)]\npub struct GenerateDocsArgs {\n #[arg(long)]\n full: bool,\n #[arg(long)]\n project: Option,\n}\n```\n\n**Output:** Human-readable table + JSON robot mode.\n\n## Acceptance Criteria\n- [ ] Default mode (no --full): processes only existing dirty_sources entries\n- [ ] --full mode: seeds dirty_sources with ALL issues, MRs, and discussions\n- [ ] Full mode uses keyset pagination (WHERE id > last_id, not OFFSET)\n- [ ] Full mode chunk size is 2000\n- [ ] Full mode does FTS optimize after completion\n- [ ] Both modes use regenerate_dirty_documents() (same codepath)\n- [ ] Progress bar shown in human mode (via indicatif)\n- [ ] JSON output in robot mode with GenerateDocsResult\n- [ ] GenerateDocsResult has issues/mrs/discussions/total/truncated/skipped counts\n- [ ] `cargo build` succeeds\n\n## Files\n- `src/cli/commands/generate_docs.rs` — new file\n- `src/cli/commands/mod.rs` — add `pub mod generate_docs;`\n- `src/cli/mod.rs` — add GenerateDocsArgs, wire up generate-docs subcommand\n- `src/main.rs` — add generate-docs command handler\n\n## TDD Loop\nRED: Integration test with seeded DB\nGREEN: Implement run_generate_docs with seeding + drain\nVERIFY: `cargo build && cargo test generate_docs`\n\n## Edge Cases\n- Empty database (no issues/MRs/discussions): full mode seeds nothing, returns all-zero counts\n- --project filter in full mode: only seed dirty_sources for entities in that project\n- Interrupted full mode: dirty_sources entries persist (ON CONFLICT DO NOTHING), resume by re-running\n- FTS optimize on empty FTS table: no-op (safe)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:25:55.226666Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:49:23.397157Z","closed_at":"2026-01-30T17:49:23.397098Z","close_reason":"Implemented generate-docs command with incremental + full mode, keyset pagination seeding, FTS optimize, project filter, human + JSON output. Builds clean.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3qs","depends_on_id":"bd-1u1","type":"blocks","created_at":"2026-01-30T15:29:16.089769Z","created_by":"tayloreernisse"},{"issue_id":"bd-3qs","depends_on_id":"bd-221","type":"blocks","created_at":"2026-01-30T15:29:16.125158Z","created_by":"tayloreernisse"}]} {"id":"bd-3rl","title":"Epic: Gate C - Sync MVP","description":"## Background\nGate C adds the sync orchestrator and queue infrastructure that makes the search pipeline incremental and self-maintaining. It introduces dirty source tracking (change detection during ingestion), the discussion fetch queue, and the unified lore sync command that orchestrates the full pipeline. Gate C also adds integrity checks and repair paths.\n\n## Gate C Deliverables\n1. Orchestrated lore sync command with incremental doc regen + re-embedding\n2. Integrity checks + repair paths for FTS/embeddings consistency\n\n## Bead Dependencies (execution order, after Gate A)\n1. **bd-mem** — Shared backoff utility (no deps, shared with Gate B)\n2. **bd-38q** — Dirty source tracking (blocked by bd-36p, bd-hrs, bd-mem)\n3. **bd-1je** — Discussion queue (blocked by bd-hrs, bd-mem)\n4. **bd-1i2** — Integrate dirty tracking into ingestion (blocked by bd-38q)\n5. **bd-1x6** — Sync CLI (blocked by bd-38q, bd-1je, bd-1i2, bd-3qs, bd-2sx)\n\n## Acceptance Criteria\n- [ ] `lore sync` runs full pipeline: ingest -> generate-docs -> embed\n- [ ] `lore sync --full` does full re-sync + regeneration\n- [ ] `lore sync --no-embed` skips embedding stage\n- [ ] Dirty tracking: upserted entities automatically marked for regeneration\n- [ ] Queue draining: dirty_sources fully drained in bounded batch loop\n- [ ] Backoff: failed items use exponential backoff with jitter\n- [ ] `lore stats --check` detects inconsistencies\n- [ ] `lore stats --repair` fixes FTS/embedding inconsistencies","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-30T15:25:13.494698Z","created_by":"tayloreernisse","updated_at":"2026-01-30T18:05:52.121666Z","closed_at":"2026-01-30T18:05:52.121619Z","close_reason":"All Gate C sub-beads complete: backoff utility, dirty tracking, discussion queue, ingestion integration, sync CLI, stats CLI","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3rl","depends_on_id":"bd-1x6","type":"blocks","created_at":"2026-01-30T15:29:35.853817Z","created_by":"tayloreernisse"},{"issue_id":"bd-3rl","depends_on_id":"bd-pr1","type":"blocks","created_at":"2026-01-30T15:29:35.892441Z","created_by":"tayloreernisse"}]} -{"id":"bd-3sez","title":"Create surgical.rs core module with preflight fetch, ingest functions, and TOCTOU guards","description":"## Background\n\nThe surgical sync pipeline needs a core module (`src/ingestion/surgical.rs`) that fetches a single issue or MR by IID from GitLab and ingests it into the local SQLite database. This replaces the bulk pagination path (`ingest_issues`/`ingest_merge_requests`) for targeted, on-demand sync of specific entities.\n\nKey constraints:\n- `process_single_issue` (issues.rs:143) and `process_single_mr` (merge_requests.rs:144) are private functions. This bead wraps them with pub(crate) entry points that add TOCTOU guard logic and dirty marking.\n- `updated_at` is a `String` (ISO 8601) in `GitLabIssue`/`GitLabMergeRequest` but stored as `INTEGER` (ms-epoch) in the DB. The TOCTOU guard must parse the ISO string to ms-epoch for comparison.\n- `ProcessMrResult` (merge_requests.rs:138) is a private struct. The MR ingest wrapper returns its own result type or re-exports the needed fields.\n- `SyncRunRecorder` has `succeed()` and `fail()` that consume `self`. Not needed here since surgical.rs is called from the orchestrator which owns the recorder.\n\n## Approach\n\nCreate `src/ingestion/surgical.rs` with:\n\n1. **`preflight_fetch`** (async): Takes `&GitLabClient`, `gitlab_project_id`, and a list of `(entity_type, iid)` targets. Calls `client.get_issue_by_iid()` and `client.get_mr_by_iid()` (from bd-159p). Returns `PreflightResult { issues: Vec, merge_requests: Vec, failures: Vec }`.\n\n2. **`ingest_issue_by_iid`** (sync): Takes `&Connection`, `&Config`, `project_id`, `&GitLabIssue`. Applies TOCTOU guard (compare payload `updated_at` parsed to ms-epoch vs DB `updated_at`), then calls `process_single_issue` (requires making it `pub(crate)` in bd-1sc6), marks dirty via `dirty_tracker::mark_dirty(conn, SourceType::Issue, local_issue_id)`, and returns `IngestIssueResult { upserted: bool, labels_created: usize, skipped_stale: bool, dirty_source_keys: Vec<(SourceType, i64)> }`.\n\n3. **`ingest_mr_by_iid`** (sync): Same pattern for MRs. Calls `process_single_mr` (requires `pub(crate)` in bd-1sc6), returns `IngestMrResult { upserted: bool, labels_created: usize, assignees_linked: usize, reviewers_linked: usize, skipped_stale: bool, dirty_source_keys: Vec<(SourceType, i64)> }`.\n\n4. **TOCTOU guard**: `fn is_stale(payload_updated_at: &str, db_updated_at_ms: Option) -> Result`. Parses ISO 8601 string to ms-epoch using `chrono::DateTime::parse_from_rfc3339`. Returns `true` if `payload_ms <= db_ms` (payload is same age or older than what we already have).\n\nWire the module in `src/ingestion/mod.rs`.\n\n## Acceptance Criteria\n\n- [ ] `preflight_fetch` calls GitLabClient by-IID methods and collects successes + failures\n- [ ] `ingest_issue_by_iid` wraps `process_single_issue` with TOCTOU guard and dirty marking\n- [ ] `ingest_mr_by_iid` wraps `process_single_mr` with TOCTOU guard and dirty marking\n- [ ] TOCTOU guard correctly parses ISO 8601 String to ms-epoch for comparison with DB i64\n- [ ] Stale payloads (payload updated_at <= DB updated_at) are skipped, not ingested\n- [ ] `dirty_source_keys` returned include the `(SourceType, source_id)` tuples for downstream scoped doc regen\n- [ ] Module registered in `src/ingestion/mod.rs`\n- [ ] All tests from bd-x8oq pass\n\n## Files\n\n- `src/ingestion/surgical.rs` (NEW)\n- `src/ingestion/mod.rs` (add `pub(crate) mod surgical;`)\n- `src/ingestion/issues.rs` (change `process_single_issue` to `pub(crate)` — done in bd-1sc6)\n- `src/ingestion/merge_requests.rs` (change `process_single_mr` and `ProcessMrResult` to `pub(crate)` — done in bd-1sc6)\n\n## TDD Anchor\n\nTests live in bd-x8oq (`src/ingestion/surgical_tests.rs`), referenced via `#[cfg(test)] #[path = \"surgical_tests.rs\"] mod tests;` in surgical.rs. Key tests that validate this bead:\n\n- `test_ingest_issue_by_iid_upserts_and_marks_dirty` — verifies full issue ingest path + dirty marking\n- `test_ingest_mr_by_iid_upserts_and_marks_dirty` — verifies full MR ingest path + dirty marking\n- `test_toctou_skips_stale_issue` — inserts issue with updated_at=T1, calls ingest with payload updated_at=T1, asserts skipped_stale=true\n- `test_toctou_skips_stale_mr` — same for MRs\n- `test_toctou_allows_newer_issue` — payload T2 > DB T1, asserts upserted=true\n- `test_is_stale_parses_iso8601` — unit test for the ISO 8601 to ms-epoch parsing\n- `test_is_stale_handles_none_db_value` — first ingest (no existing row), should return false (not stale)\n- `test_preflight_fetch_returns_issues_and_mrs` — wiremock test for successful preflight\n- `test_preflight_fetch_collects_failures` — wiremock 404 returns failure, not error\n\n## Edge Cases\n\n- ISO 8601 with timezone offset (GitLab returns `+00:00` not `Z`) must parse correctly\n- First-ever ingest of an IID: no existing DB row, TOCTOU guard must treat as \"not stale\" (db_updated_at is None)\n- GitLab returns 404 for a deleted issue/MR during preflight: failure, not hard error\n- Concurrent surgical syncs for same IID: `process_single_issue` uses `unchecked_transaction()` with UPSERT, so last-writer-wins is safe\n- `process_single_mr` returns `ProcessMrResult` which is private: either make it `pub(crate)` in bd-1sc6 or replicate needed fields\n\n## Dependency Context\n\n- **Blocked by bd-159p**: `get_issue_by_iid` and `get_mr_by_iid` on GitLabClient (preflight needs these)\n- **Blocked by bd-1sc6**: Visibility changes to `process_single_issue`, `process_single_mr`, `ProcessMrResult` (must be `pub(crate)`)\n- **Blocks bd-1i4i**: Orchestration function calls `preflight_fetch` + `ingest_issue_by_iid` / `ingest_mr_by_iid`\n- **Blocks bd-kanh**: Dependent helpers are called after ingest to fetch discussions, resource events, etc.\n- **Blocks bd-wcja**: SyncResult surgical fields depend on return types from this module\n- **Co-depends with bd-x8oq**: Tests for this code live in that bead's test file","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-17T19:14:19.449695Z","created_by":"tayloreernisse","updated_at":"2026-02-17T20:02:01.692160Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"],"dependencies":[{"issue_id":"bd-3sez","depends_on_id":"bd-1i4i","type":"blocks","created_at":"2026-02-17T19:19:24.701282Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sez","depends_on_id":"bd-3jqx","type":"blocks","created_at":"2026-02-17T19:19:25.751613Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sez","depends_on_id":"bd-kanh","type":"blocks","created_at":"2026-02-17T19:19:23.887247Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sez","depends_on_id":"bd-wcja","type":"blocks","created_at":"2026-02-17T19:19:24.029305Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sez","depends_on_id":"bd-x8oq","type":"blocks","created_at":"2026-02-17T19:19:23.657468Z","created_by":"tayloreernisse"}]} +{"id":"bd-3sez","title":"Create surgical.rs core module with preflight fetch, ingest functions, and TOCTOU guards","description":"## Background\n\nThe surgical sync pipeline needs a core module (`src/ingestion/surgical.rs`) that fetches a single issue or MR by IID from GitLab and ingests it into the local SQLite database. This replaces the bulk pagination path (`ingest_issues`/`ingest_merge_requests`) for targeted, on-demand sync of specific entities.\n\nKey constraints:\n- `process_single_issue` (issues.rs:143) and `process_single_mr` (merge_requests.rs:144) are private functions. This bead wraps them with pub(crate) entry points that add TOCTOU guard logic and dirty marking.\n- `updated_at` is a `String` (ISO 8601) in `GitLabIssue`/`GitLabMergeRequest` but stored as `INTEGER` (ms-epoch) in the DB. The TOCTOU guard must parse the ISO string to ms-epoch for comparison.\n- `ProcessMrResult` (merge_requests.rs:138) is a private struct. The MR ingest wrapper returns its own result type or re-exports the needed fields.\n- `SyncRunRecorder` has `succeed()` and `fail()` that consume `self`. Not needed here since surgical.rs is called from the orchestrator which owns the recorder.\n\n## Approach\n\nCreate `src/ingestion/surgical.rs` with:\n\n1. **`preflight_fetch`** (async): Takes `&GitLabClient`, `gitlab_project_id`, and a list of `(entity_type, iid)` targets. Calls `client.get_issue_by_iid()` and `client.get_mr_by_iid()` (from bd-159p). Returns `PreflightResult { issues: Vec, merge_requests: Vec, failures: Vec }`.\n\n2. **`ingest_issue_by_iid`** (sync): Takes `&Connection`, `&Config`, `project_id`, `&GitLabIssue`. Applies TOCTOU guard (compare payload `updated_at` parsed to ms-epoch vs DB `updated_at`), then calls `process_single_issue` (requires making it `pub(crate)` in bd-1sc6), marks dirty via `dirty_tracker::mark_dirty(conn, SourceType::Issue, local_issue_id)`, and returns `IngestIssueResult { upserted: bool, labels_created: usize, skipped_stale: bool, dirty_source_keys: Vec<(SourceType, i64)> }`.\n\n3. **`ingest_mr_by_iid`** (sync): Same pattern for MRs. Calls `process_single_mr` (requires `pub(crate)` in bd-1sc6), returns `IngestMrResult { upserted: bool, labels_created: usize, assignees_linked: usize, reviewers_linked: usize, skipped_stale: bool, dirty_source_keys: Vec<(SourceType, i64)> }`.\n\n4. **TOCTOU guard**: `fn is_stale(payload_updated_at: &str, db_updated_at_ms: Option) -> Result`. Parses ISO 8601 string to ms-epoch using `chrono::DateTime::parse_from_rfc3339`. Returns `true` if `payload_ms <= db_ms` (payload is same age or older than what we already have).\n\nWire the module in `src/ingestion/mod.rs`.\n\n## Acceptance Criteria\n\n- [ ] `preflight_fetch` calls GitLabClient by-IID methods and collects successes + failures\n- [ ] `ingest_issue_by_iid` wraps `process_single_issue` with TOCTOU guard and dirty marking\n- [ ] `ingest_mr_by_iid` wraps `process_single_mr` with TOCTOU guard and dirty marking\n- [ ] TOCTOU guard correctly parses ISO 8601 String to ms-epoch for comparison with DB i64\n- [ ] Stale payloads (payload updated_at <= DB updated_at) are skipped, not ingested\n- [ ] `dirty_source_keys` returned include the `(SourceType, source_id)` tuples for downstream scoped doc regen\n- [ ] Module registered in `src/ingestion/mod.rs`\n- [ ] All 14 sync tests pass with in-memory SQLite\n- [ ] Async preflight test passes with wiremock\n- [ ] Test helpers produce valid GitLabIssue/GitLabMergeRequest fixtures that pass `transform_issue`/`transform_merge_request`\n- [ ] No flaky tests: deterministic timestamps, no real network calls\n\n## Files\n\n- `src/ingestion/surgical.rs` (NEW) — production code + `#[cfg(test)] #[path = \"surgical_tests.rs\"] mod tests;`\n- `src/ingestion/surgical_tests.rs` (NEW) — test suite\n- `src/ingestion/mod.rs` (add `pub(crate) mod surgical;`)\n- `src/ingestion/issues.rs` (change `process_single_issue` to `pub(crate)` — done in bd-1sc6)\n- `src/ingestion/merge_requests.rs` (change `process_single_mr` and `ProcessMrResult` to `pub(crate)` — done in bd-1sc6)\n\n## TDD Anchor\n\nTests live in `src/ingestion/surgical_tests.rs`, referenced via `#[cfg(test)] #[path = \"surgical_tests.rs\"] mod tests;` in surgical.rs.\n\n### Test Helpers\n- `setup_db() -> Connection` — in-memory DB with migrations + test project row\n- `make_test_issue(iid: i64, updated_at: &str) -> GitLabIssue` — minimal valid JSON fixture\n- `make_test_mr(iid: i64, updated_at: &str) -> GitLabMergeRequest` — minimal valid JSON fixture\n- `get_db_updated_at(conn, table, iid) -> Option` — helper to query DB updated_at for assertions\n- `get_dirty_keys(conn) -> Vec<(String, i64)>` — query dirty_sources for assertions\n\n### Sync Tests (13)\n1. `test_ingest_issue_by_iid_upserts_and_marks_dirty` — fresh issue ingest, verify DB row + dirty_sources entry\n2. `test_ingest_mr_by_iid_upserts_and_marks_dirty` — fresh MR ingest, verify DB row + dirty_sources entry\n3. `test_toctou_skips_stale_issue` — insert issue at T1, call ingest with payload at T1, assert skipped_stale=true and no dirty mark\n4. `test_toctou_skips_stale_mr` — same for MRs\n5. `test_toctou_allows_newer_issue` — DB has T1, payload has T2 (T2 > T1), assert upserted=true\n6. `test_toctou_allows_newer_mr` — same for MRs\n7. `test_is_stale_parses_iso8601` — unit test: `\"2026-02-17T12:00:00.000+00:00\"` parses to correct ms-epoch\n8. `test_is_stale_handles_none_db_value` — first ingest, no DB row, assert not stale\n9. `test_is_stale_with_z_suffix` — `\"2026-02-17T12:00:00Z\"` also parses correctly\n10. `test_ingest_issue_returns_dirty_source_keys` — verify `dirty_source_keys` contains `(SourceType::Issue, local_id)`\n11. `test_ingest_mr_returns_dirty_source_keys` — verify MR dirty source keys\n12. `test_ingest_issue_updates_existing` — ingest same IID twice with newer updated_at, verify update\n13. `test_ingest_mr_updates_existing` — same for MRs\n\n### Async Preflight Test (1, wiremock)\n14. `test_preflight_fetch_returns_issues_and_mrs` — wiremock GET `/projects/:id/issues?iids[]=42` returns 200 with fixture, verify PreflightResult.issues has 1 entry\n\n### Key test code:\n\n```rust\n#[test]\nfn test_ingest_issue_by_iid_upserts_and_marks_dirty() {\n let conn = setup_db();\n let issue = make_test_issue(42, \"2026-02-17T12:00:00.000+00:00\");\n let config = Config::default();\n let result = ingest_issue_by_iid(&conn, &config, /*project_id=*/1, &issue).unwrap();\n assert!(result.upserted);\n assert!(!result.skipped_stale);\n let dirty = get_dirty_keys(&conn);\n assert!(dirty.contains(&(\"issue\".to_string(), /*local_id from DB*/)));\n}\n\n#[test]\nfn test_toctou_skips_stale_issue() {\n let conn = setup_db();\n let issue = make_test_issue(42, \"2026-02-17T12:00:00.000+00:00\");\n ingest_issue_by_iid(&conn, &Config::default(), 1, &issue).unwrap();\n // Ingest same timestamp again\n let result = ingest_issue_by_iid(&conn, &Config::default(), 1, &issue).unwrap();\n assert!(result.skipped_stale);\n}\n\n#[tokio::test]\nasync fn test_preflight_fetch_returns_issues_and_mrs() {\n let mock = MockServer::start().await;\n // ... wiremock setup ...\n}\n```\n\n### Testing constraints\n- In-memory DB pattern: `create_connection(Path::new(\":memory:\"))` + `run_migrations(&conn)`\n- Test project insert: `INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)` (no `name`/`last_seen_at` columns)\n- `GitLabIssue` required fields: `id`, `iid`, `project_id`, `title`, `state`, `created_at`, `updated_at`, `author`, `web_url`\n- `GitLabMergeRequest` adds: `source_branch`, `target_branch`, `draft`, `merge_status`, `reviewers`\n- `updated_at` is `String` (ISO 8601) in GitLab types, e.g. `\"2026-02-17T12:00:00.000+00:00\"`\n- `SourceType` enum variants: `Issue`, `MergeRequest`, `Discussion`, `Note`\n- `dirty_sources` table: `(source_type TEXT, source_id INTEGER)` primary key\n\nRED: Write all test functions. They will not compile until production code exists.\nGREEN: Implement preflight_fetch, ingest_issue_by_iid, ingest_mr_by_iid, is_stale.\nVERIFY: `cargo test -p lore surgical`\n\n## Edge Cases\n\n- ISO 8601 with timezone offset (GitLab returns `+00:00` not `Z`) must parse correctly\n- `test_is_stale_with_z_suffix` ensures `Z` variant also works\n- First-ever ingest of an IID: no existing DB row, TOCTOU guard must treat as \"not stale\" (db_updated_at is None)\n- GitLab returns 404 for a deleted issue/MR during preflight: failure, not hard error\n- Concurrent surgical syncs for same IID: `process_single_issue` uses `unchecked_transaction()` with UPSERT, so last-writer-wins is safe\n- `process_single_mr` returns `ProcessMrResult` which is private: either make it `pub(crate)` in bd-1sc6 or replicate needed fields\n- `make_test_issue` must produce all required fields or `transform_issue` will fail\n- `make_test_mr` additionally needs `source_branch`, `target_branch`, `draft`, `merge_status`, `reviewers`\n- ISO 8601 fixtures must use `+00:00` suffix (GitLab format), not `Z` (test both)\n- Test DB needs `run_migrations` to create all tables including `dirty_sources`, `documents`, `issues`, `merge_requests`\n\n## Dependency Context\n\n- **Blocked by bd-159p**: `get_issue_by_iid` and `get_mr_by_iid` on GitLabClient (preflight needs these)\n- **Blocked by bd-1sc6**: Visibility changes to `process_single_issue`, `process_single_mr`, `ProcessMrResult` (must be `pub(crate)`)\n- **Blocks bd-1i4i**: Orchestration function calls `preflight_fetch` + `ingest_issue_by_iid` / `ingest_mr_by_iid`\n- **Blocks bd-kanh**: Dependent helpers are called after ingest to fetch discussions, resource events, etc.\n- **Blocks bd-wcja**: SyncResult surgical fields depend on return types from this module\n- **Blocks bd-3jqx**: Integration test stubs from this bead's test file are implemented in bd-3jqx","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-17T19:14:19.449695Z","created_by":"tayloreernisse","updated_at":"2026-02-18T19:25:52.672159Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"],"dependencies":[{"issue_id":"bd-3sez","depends_on_id":"bd-1i4i","type":"blocks","created_at":"2026-02-17T19:19:24.701282Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sez","depends_on_id":"bd-3jqx","type":"blocks","created_at":"2026-02-17T19:19:25.751613Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sez","depends_on_id":"bd-kanh","type":"blocks","created_at":"2026-02-17T19:19:23.887247Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sez","depends_on_id":"bd-wcja","type":"blocks","created_at":"2026-02-17T19:19:24.029305Z","created_by":"tayloreernisse"}]} {"id":"bd-3sh","title":"Add 'lore count events' command with robot mode","description":"## Background\nNeed to verify event ingestion and report counts by type. The existing count command (src/cli/commands/count.rs) handles issues, mrs, discussions, notes with both human and robot output. This adds 'events' as a new count subcommand.\n\n## Approach\nExtend the existing count command in src/cli/commands/count.rs:\n\n1. Add CountTarget::Events variant (or string match) in the count dispatcher\n2. Query each event table with GROUP BY entity type:\n```sql\nSELECT \n CASE WHEN issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END as entity_type,\n COUNT(*) as count\nFROM resource_state_events\nGROUP BY entity_type;\n-- (repeat for label and milestone events)\n```\n\n3. Human output: table format\n```\nEvent Type Issues MRs Total\nState events 1,234 567 1,801\nLabel events 2,345 890 3,235\nMilestone events 456 123 579\nTotal 4,035 1,580 5,615\n```\n\n4. Robot JSON:\n```json\n{\n \"ok\": true,\n \"data\": {\n \"state_events\": {\"issue\": 1234, \"merge_request\": 567, \"total\": 1801},\n \"label_events\": {\"issue\": 2345, \"merge_request\": 890, \"total\": 3235},\n \"milestone_events\": {\"issue\": 456, \"merge_request\": 123, \"total\": 579},\n \"total\": 5615\n }\n}\n```\n\n5. Register in CLI: add \"events\" to count's entity_type argument in src/cli/mod.rs\n\n## Acceptance Criteria\n- [ ] `lore count events` shows correct counts by event type and entity type\n- [ ] Robot JSON matches the schema above\n- [ ] Works with empty tables (all zeros)\n- [ ] Does not error if migration 011 hasn't been applied (graceful degradation or \"no event tables\" message)\n\n## Files\n- src/cli/commands/count.rs (add events counting logic)\n- src/cli/mod.rs (add \"events\" to count's accepted entity types)\n\n## TDD Loop\nRED: tests/count_tests.rs (or extend existing):\n- `test_count_events_empty_tables` - verify all zeros on fresh DB\n- `test_count_events_with_data` - seed state + label events, verify correct counts\n- `test_count_events_robot_json` - verify JSON structure\n\nGREEN: Add the events branch to count command\n\nVERIFY: `cargo test count -- --nocapture`\n\n## Edge Cases\n- Tables don't exist if user hasn't run migrate — check table existence first or catch the error\n- COUNT with GROUP BY returns no rows for empty tables — need to handle missing entity types as 0","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-02T21:31:57.379702Z","created_by":"tayloreernisse","updated_at":"2026-02-03T16:21:21.408874Z","closed_at":"2026-02-03T16:21:21.408806Z","close_reason":"Added 'events' to count CLI parser, run_count_events function, print_event_count (table format) and print_event_count_json (structured JSON). Wired into handle_count in main.rs.","compaction_level":0,"original_size":0,"labels":["cli","gate-1","phase-b"],"dependencies":[{"issue_id":"bd-3sh","depends_on_id":"bd-2zl","type":"parent-child","created_at":"2026-02-02T21:31:57.380927Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sh","depends_on_id":"bd-hu3","type":"blocks","created_at":"2026-02-02T21:32:06.308285Z","created_by":"tayloreernisse"}]} {"id":"bd-3t1b","title":"Implement MR Detail (state + action + view)","description":"## Background\nThe MR Detail shows a single merge request with file changes, diff discussions (position-specific comments), and general discussions. Same progressive hydration pattern as Issue Detail. MR detail has additional sections: file change list and diff-context notes.\n\n## Approach\nState (state/mr_detail.rs):\n- MrDetailState: current_key (Option), metadata (Option), discussions (Vec), diff_discussions (Vec), file_changes (Vec), cross_refs (Vec), tree_state (TreePersistState), scroll_offset, active_tab (MrTab: Overview|Files|Discussions)\n- MrMetadata: iid, title, description, state, author, reviewer, assignee, labels, target_branch, source_branch, created_at, updated_at, web_url, draft, merge_status\n- FileChange: old_path, new_path, change_type (added/modified/deleted/renamed), diff_line_count\n- DiffDiscussion: file_path, old_line, new_line, notes (Vec)\n\nAction (action.rs):\n- fetch_mr_detail(conn, key, clock) -> Result: uses with_read_snapshot\n\nView (view/mr_detail.rs):\n- render_mr_detail(frame, state, area, theme): header, tab bar (Overview|Files|Discussions), tab content\n- Overview tab: description + cross-refs\n- Files tab: file change list with change type indicators (+/-/~)\n- Discussions tab: general discussions + diff discussions grouped by file\n\n## Acceptance Criteria\n- [ ] MR metadata loads in Phase 1\n- [ ] Tab navigation between Overview, Files, Discussions\n- [ ] File changes list shows change type and line count\n- [ ] Diff discussions grouped by file path\n- [ ] General discussions rendered in tree widget\n- [ ] Cross-references navigable (related issues, etc.)\n- [ ] All text sanitized via sanitize_for_terminal()\n- [ ] Esc returns to MR List with state preserved\n\n## Files\n- MODIFY: crates/lore-tui/src/state/mr_detail.rs (expand from stub)\n- MODIFY: crates/lore-tui/src/action.rs (add fetch_mr_detail)\n- CREATE: crates/lore-tui/src/view/mr_detail.rs\n\n## TDD Anchor\nRED: Write test_fetch_mr_detail in action.rs that inserts an MR with 3 file changes, calls fetch_mr_detail, asserts 3 files returned.\nGREEN: Implement fetch_mr_detail with file change query.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml test_fetch_mr_detail\n\n## Edge Cases\n- MR with no file changes (draft MR created without pushes): show \"No file changes\" message\n- Diff discussions referencing deleted files: show file path with strikethrough style\n- Very large MRs (hundreds of files): paginate file list, don't load all at once\n\n## Dependency Context\nUses discussion tree and cross-ref widgets from \"Implement discussion tree + cross-reference widgets\" task.\nUses same patterns as \"Implement Issue Detail\" task.\nUses MrDetailState from \"Implement AppState composition\" task.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T16:59:38.427124Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:28.423643Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-3t1b","depends_on_id":"bd-1cl9","type":"blocks","created_at":"2026-02-12T18:11:28.423617Z","created_by":"tayloreernisse"},{"issue_id":"bd-3t1b","depends_on_id":"bd-1d6z","type":"blocks","created_at":"2026-02-12T17:09:48.656416Z","created_by":"tayloreernisse"},{"issue_id":"bd-3t1b","depends_on_id":"bd-2kr0","type":"blocks","created_at":"2026-02-12T17:09:48.646513Z","created_by":"tayloreernisse"}]} {"id":"bd-3t6r","title":"Epic: TUI Phase 5 — Polish","description":"## Background\nPhase 5 adds polish features: responsive breakpoints for all screens, session state persistence (resume where you left off), single-instance locking, entity/render caches for performance, text width handling for Unicode, snapshot tests, and terminal compatibility test matrix.\n\n## Acceptance Criteria\n- [ ] All screens adapt to terminal width with responsive breakpoints\n- [ ] Session state persisted and restored on relaunch\n- [ ] Single-instance lock prevents concurrent TUI launches\n- [ ] Entity cache enables near-instant detail view reopens\n- [ ] Snapshot tests produce deterministic output with FakeClock\n- [ ] Terminal compat verified across iTerm2, tmux, Alacritty, kitty","status":"open","priority":1,"issue_type":"epic","created_at":"2026-02-12T17:02:47.178645Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:51.435708Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-3t6r","depends_on_id":"bd-1df9","type":"blocks","created_at":"2026-02-12T18:11:51.435686Z","created_by":"tayloreernisse"}]} @@ -318,7 +318,7 @@ {"id":"bd-wnuo","title":"Implement performance benchmark fixtures (S/M/L tiers)","description":"## Background\nTiered performance fixtures validate latency at three data scales. S and M tiers are CI-enforced gates; L tier is advisory. Fixtures are synthetic SQLite databases with realistic data distributions.\n\n## Approach\nFixture generator (benches/ or tests/fixtures/):\n- S-tier: 10k issues, 5k MRs, 50k notes, 10k docs\n- M-tier: 100k issues, 50k MRs, 500k notes, 50k docs\n- L-tier: 250k issues, 100k MRs, 1M notes, 100k docs\n- Realistic distributions: state (60% closed, 30% opened, 10% other), authors from pool of 50 names, labels from pool of 20, dates spanning 2 years\n\nBenchmarks:\n- p95 first-paint latency: Dashboard load, Issue List load, MR List load\n- p95 keyset pagination: next page fetch\n- p95 search latency: lexical and hybrid modes\n- Memory ceiling: RSS after full dashboard + list load\n- SLO assertions per tier (see Phase 0 criteria)\n\nRequired indexes must be present in fixture DBs:\n- idx_issues_list_default, idx_mrs_list_default, idx_discussions_entity, idx_notes_discussion\n\n## Acceptance Criteria\n- [ ] S-tier fixture generated with correct counts\n- [ ] M-tier fixture generated with correct counts\n- [ ] L-tier fixture generated (on-demand, not CI)\n- [ ] p95 first-paint < 50ms (S), < 75ms (M), < 150ms (L)\n- [ ] p95 keyset pagination < 50ms (S), < 75ms (M), < 100ms (L)\n- [ ] p95 search latency < 100ms (S), < 200ms (M), < 400ms (L)\n- [ ] Memory < 150MB RSS (S), < 250MB RSS (M)\n- [ ] All required indexes present in fixtures\n- [ ] EXPLAIN QUERY PLAN shows index usage for top 10 queries\n\n## Files\n- CREATE: crates/lore-tui/benches/perf_benchmarks.rs\n- CREATE: crates/lore-tui/tests/fixtures/generate_fixtures.rs\n\n## TDD Anchor\nRED: Write benchmark_dashboard_load_s_tier that generates S-tier fixture, measures Dashboard load time, asserts p95 < 50ms.\nGREEN: Implement fetch_dashboard with efficient queries.\nVERIFY: cargo bench --manifest-path crates/lore-tui/Cargo.toml\n\n## Edge Cases\n- Fixture generation must be deterministic (seeded RNG) for reproducible benchmarks\n- CI machines may be slower — use generous multipliers or relative thresholds\n- S-tier fits in memory; M-tier requires WAL mode for concurrent access\n- Benchmark warmup: discard first 5 iterations\n\n## Dependency Context\nUses all action.rs query functions from Phase 2/3 tasks.\nUses DbManager from \"Implement DbManager\" task.\nUses required index migrations from the main lore crate.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T17:05:12.867291Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:38.463811Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-wnuo","depends_on_id":"bd-1b6k","type":"blocks","created_at":"2026-02-12T18:11:38.463783Z","created_by":"tayloreernisse"},{"issue_id":"bd-wnuo","depends_on_id":"bd-3eis","type":"blocks","created_at":"2026-02-12T17:10:02.976166Z","created_by":"tayloreernisse"}]} {"id":"bd-wrw1","title":"Implement CLI/TUI parity tests (counts, lists, detail, search, sanitization)","description":"## Background\nParity tests ensure the TUI and CLI show the same data. Both interfaces query the same SQLite database, but through different code paths (TUI action functions vs CLI command handlers). Drift can occur when query functions are duplicated or modified independently. These tests catch drift by running both code paths against the same in-memory DB and comparing results.\n\n## Approach\n\n### Test Strategy: Library-Level (Same Process)\nTests run in the same process with a shared in-memory SQLite DB. No binary execution, no JSON parsing, no process spawning. Both TUI action functions and CLI query functions are called as library code.\n\nSetup pattern:\n```rust\nuse lore::core::db::{create_connection, run_migrations};\nuse std::path::Path;\n\nfn setup_parity_db() -> rusqlite::Connection {\n let conn = create_connection(Path::new(\":memory:\")).unwrap();\n run_migrations(&conn).unwrap();\n insert_fixture_data(&conn); // shared fixture with known counts\n conn\n}\n```\n\n### Fixture Data\nCreate a deterministic fixture with known quantities:\n- 1 project (gitlab_project_id=1, path_with_namespace=\"group/repo\", web_url=\"https://gitlab.example.com/group/repo\")\n- 15 issues (5 opened, 5 closed, 5 with various states)\n- 10 merge_requests (3 opened, 3 merged, 2 closed, 2 draft)\n- 30 discussions (20 for issues, 10 for MRs)\n- 60 notes (2 per discussion)\n- Insert via direct SQL (same pattern as existing tests in src/core/db.rs)\n\n### Parity Checks\n\n**Dashboard Count Parity:**\n- TUI: call the dashboard fetch function that returns entity counts\n- CLI: call the same count query functions used by `lore --robot count`\n- Assert: issue_count, mr_count, discussion_count, note_count all match\n\n**Issue List Parity:**\n- TUI: call issue list action with default filter (state=all, limit=50, sort=updated_at DESC)\n- CLI: call the issue list query used by `lore --robot issues`\n- Assert: same IIDs in same order, same state values for each\n\n**MR List Parity:**\n- TUI: call MR list action with default filter\n- CLI: call the MR list query used by `lore --robot mrs`\n- Assert: same IIDs in same order, same state values, same draft flags\n\n**Issue Detail Parity:**\n- TUI: call issue detail fetch for a specific IID\n- CLI: call the issue detail query used by `lore --robot issues `\n- Assert: same metadata fields (title, state, author, labels, created_at, updated_at), same discussion count\n\n**Search Parity:**\n- TUI: call search action with a known query term\n- CLI: call the search function used by `lore --robot search`\n- Assert: same document IDs returned in same rank order\n\n**Sanitization Parity:**\n- Insert an issue with ANSI escape sequences in the title: \"Normal \\x1b[31mRED\\x1b[0m text\"\n- TUI: fetch and sanitize via terminal safety module\n- CLI: fetch and render via robot mode (which strips ANSI)\n- Assert: both produce clean output without raw escape sequences\n\n## Acceptance Criteria\n- [ ] Dashboard counts: TUI == CLI for issues, MRs, discussions, notes on shared fixture\n- [ ] Issue list: TUI returns same IIDs in same order as CLI query function\n- [ ] MR list: TUI returns same IIDs in same order as CLI query function\n- [ ] Issue detail: TUI metadata matches CLI for title, state, author, discussion count\n- [ ] Search results: same document IDs in same rank order\n- [ ] Sanitization: both strip ANSI escape sequences from issue titles\n- [ ] All tests use in-memory DB (no file I/O, no binary spawning)\n- [ ] Tests are deterministic (fixed fixture, no wall clock dependency)\n\n## Files\n- CREATE: crates/lore-tui/tests/parity_tests.rs\n\n## TDD Anchor\nRED: Write `test_dashboard_count_parity` that creates shared fixture DB, calls both TUI dashboard fetch and CLI count query functions, asserts all counts equal.\nGREEN: Ensure TUI query functions exist and match CLI query logic.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml parity\n\nAdditional tests:\n- test_issue_list_parity\n- test_mr_list_parity\n- test_issue_detail_parity\n- test_search_parity\n- test_sanitization_parity\n\n## Edge Cases\n- CLI and TUI may use different default sort orders — normalize to same ORDER BY in test setup\n- CLI list commands default to limit=50, TUI may default to page size — test with explicit limit\n- Fixture must include edge cases: NULL labels, empty descriptions, issues with work item status set\n- Schema version must match between both code paths (same migration version)\n- FTS index must be populated for search parity (call generate-docs equivalent on fixture)\n\n## Dependency Context\n- Uses TUI action functions from Phase 2/3 screen beads (must exist as library code)\n- Uses CLI query functions from src/cli/ (already exist as `lore` library exports)\n- Uses lore::core::db for shared DB setup\n- Uses terminal safety module (bd-3ir1) for sanitization comparison\n- Depends on bd-14hv (soak tests) being complete per phase ordering","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T17:05:51.620596Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:38.629958Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-wrw1","depends_on_id":"bd-14hv","type":"blocks","created_at":"2026-02-12T17:10:02.997223Z","created_by":"tayloreernisse"},{"issue_id":"bd-wrw1","depends_on_id":"bd-2o49","type":"blocks","created_at":"2026-02-12T18:11:38.629931Z","created_by":"tayloreernisse"}]} {"id":"bd-wzqi","title":"Implement Command Palette (state + view)","description":"## Background\nThe Command Palette is a modal overlay (Ctrl+P) that provides fuzzy-match access to all commands. It uses FrankenTUI's built-in CommandPalette widget and is populated from the CommandRegistry.\n\n## Approach\nState (state/command_palette.rs):\n- CommandPaletteState: wraps ftui CommandPalette widget state\n- input (String), filtered_commands (Vec), selected_index (usize), visible (bool)\n\nView (view/command_palette.rs):\n- Modal overlay centered on screen (60% width, 50% height)\n- Text input at top for fuzzy search\n- Scrollable list of matching commands with keybinding hints\n- Enter executes selected command, Esc closes palette\n- Fuzzy matching: subsequence match on command label and help text\n\nIntegration:\n- Ctrl+P from any screen opens palette (handled in interpret_key stage 2)\n- execute_palette_action() in app.rs converts selected command to Msg\n\n## Acceptance Criteria\n- [ ] Ctrl+P opens palette from any screen in Normal mode\n- [ ] Fuzzy matching filters commands as user types\n- [ ] Commands show label + keybinding + help text\n- [ ] Enter executes selected command\n- [ ] Esc closes palette without action\n- [ ] Palette populated from CommandRegistry (single source of truth)\n- [ ] Modal renders on top of current screen content\n\n## Files\n- MODIFY: crates/lore-tui/src/state/command_palette.rs (expand from stub)\n- CREATE: crates/lore-tui/src/view/command_palette.rs\n\n## TDD Anchor\nRED: Write test_palette_fuzzy_match that creates registry with 5 commands, filters with \"iss\", asserts Issue-related commands match.\nGREEN: Implement fuzzy matching on command labels.\nVERIFY: cargo test --manifest-path crates/lore-tui/Cargo.toml test_palette_fuzzy\n\n## Edge Cases\n- Empty search shows all commands\n- Very long command labels: truncate with ellipsis\n- Command not available on current screen: show but gray out\n- Palette should not steal focus from text inputs — only opens in Normal mode\n\n## Dependency Context\nUses CommandRegistry from \"Implement CommandRegistry\" task.\nUses ftui CommandPalette widget from FrankenTUI.\nUses InputMode::Palette from \"Implement core types\" task.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T17:01:37.250065Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:11:34.175286Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-wzqi","depends_on_id":"bd-35g5","type":"blocks","created_at":"2026-02-12T17:10:02.852753Z","created_by":"tayloreernisse"},{"issue_id":"bd-wzqi","depends_on_id":"bd-nwux","type":"blocks","created_at":"2026-02-12T18:11:34.175260Z","created_by":"tayloreernisse"}]} -{"id":"bd-x8oq","title":"Write surgical_tests.rs with TDD test suite","description":"## Background\n\nThe surgical sync module (`src/ingestion/surgical.rs` from bd-3sez) needs a comprehensive test suite. Tests use in-memory SQLite (no real GitLab or Ollama) and wiremock for HTTP mocks. The test file lives at `src/ingestion/surgical_tests.rs` and is included via `#[cfg(test)] #[path = \"surgical_tests.rs\"] mod tests;` in surgical.rs.\n\nKey testing constraints:\n- In-memory DB pattern: `create_connection(Path::new(\":memory:\"))` + `run_migrations(&conn)`\n- Test project insert: `INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)` (no `name`/`last_seen_at` columns)\n- `GitLabIssue` required fields: `id`, `iid`, `project_id`, `title`, `state`, `created_at`, `updated_at`, `author`, `web_url`\n- `GitLabMergeRequest` adds: `source_branch`, `target_branch`, `draft`, `merge_status`, `reviewers`\n- `updated_at` is `String` (ISO 8601) in GitLab types, e.g. `\"2026-02-17T12:00:00.000+00:00\"`\n- `SourceType` enum variants: `Issue`, `MergeRequest`, `Discussion`, `Note`\n- `dirty_sources` table: `(source_type TEXT, source_id INTEGER)` primary key\n\n## Approach\n\nCreate `src/ingestion/surgical_tests.rs` with:\n\n### Test Helpers\n- `setup_db() -> Connection` — in-memory DB with migrations + test project row\n- `make_test_issue(iid: i64, updated_at: &str) -> GitLabIssue` — minimal valid JSON fixture\n- `make_test_mr(iid: i64, updated_at: &str) -> GitLabMergeRequest` — minimal valid JSON fixture\n- `get_db_updated_at(conn, table, iid) -> Option` — helper to query DB updated_at for assertions\n- `get_dirty_keys(conn) -> Vec<(String, i64)>` — query dirty_sources for assertions\n\n### Sync Tests (13)\n1. `test_ingest_issue_by_iid_upserts_and_marks_dirty` — fresh issue ingest, verify DB row + dirty_sources entry\n2. `test_ingest_mr_by_iid_upserts_and_marks_dirty` — fresh MR ingest, verify DB row + dirty_sources entry\n3. `test_toctou_skips_stale_issue` — insert issue at T1, call ingest with payload at T1, assert skipped_stale=true and no dirty mark\n4. `test_toctou_skips_stale_mr` — same for MRs\n5. `test_toctou_allows_newer_issue` — DB has T1, payload has T2 (T2 > T1), assert upserted=true\n6. `test_toctou_allows_newer_mr` — same for MRs\n7. `test_is_stale_parses_iso8601` — unit test: `\"2026-02-17T12:00:00.000+00:00\"` parses to correct ms-epoch\n8. `test_is_stale_handles_none_db_value` — first ingest, no DB row, assert not stale\n9. `test_is_stale_with_z_suffix` — `\"2026-02-17T12:00:00Z\"` also parses correctly\n10. `test_ingest_issue_returns_dirty_source_keys` — verify `dirty_source_keys` contains `(SourceType::Issue, local_id)`\n11. `test_ingest_mr_returns_dirty_source_keys` — verify MR dirty source keys\n12. `test_ingest_issue_updates_existing` — ingest same IID twice with newer updated_at, verify update\n13. `test_ingest_mr_updates_existing` — same for MRs\n\n### Async Preflight Test (1, wiremock)\n14. `test_preflight_fetch_returns_issues_and_mrs` — wiremock GET `/projects/:id/issues?iids[]=42` returns 200 with fixture, verify PreflightResult.issues has 1 entry\n\n### Integration Stubs (4, for bd-3jqx)\n15. `test_surgical_cancellation_during_preflight` — stub: signal.cancel() before preflight, verify early return\n16. `test_surgical_timeout_during_fetch` — stub: wiremock delay exceeds timeout\n17. `test_surgical_embed_isolation` — stub: verify only surgical docs get embedded\n18. `test_surgical_payload_integrity` — stub: verify ingested data matches GitLab payload exactly\n\n## Acceptance Criteria\n\n- [ ] All 13 sync tests pass with in-memory SQLite\n- [ ] Async preflight test passes with wiremock\n- [ ] 4 integration stubs compile and are marked `#[ignore]` (implemented in bd-3jqx)\n- [ ] Test helpers produce valid GitLabIssue/GitLabMergeRequest fixtures that pass `transform_issue`/`transform_merge_request`\n- [ ] No flaky tests: deterministic timestamps, no real network calls\n- [ ] File wired into surgical.rs via `#[cfg(test)] #[path = \"surgical_tests.rs\"] mod tests;`\n\n## Files\n\n- `src/ingestion/surgical_tests.rs` (NEW)\n- `src/ingestion/surgical.rs` (add `#[cfg(test)]` module path — created in bd-3sez)\n\n## TDD Anchor\n\nThis bead IS the test suite. Tests are written first (TDD red phase), then bd-3sez implements the production code to make them pass (green phase). Specific test signatures:\n\n```rust\n#[test]\nfn test_ingest_issue_by_iid_upserts_and_marks_dirty() {\n let conn = setup_db();\n let issue = make_test_issue(42, \"2026-02-17T12:00:00.000+00:00\");\n let config = Config::default();\n let result = ingest_issue_by_iid(&conn, &config, /*project_id=*/1, &issue).unwrap();\n assert!(result.upserted);\n assert!(!result.skipped_stale);\n let dirty = get_dirty_keys(&conn);\n assert!(dirty.contains(&(\"issue\".to_string(), /*local_id from DB*/)));\n}\n\n#[test]\nfn test_toctou_skips_stale_issue() {\n let conn = setup_db();\n let issue = make_test_issue(42, \"2026-02-17T12:00:00.000+00:00\");\n ingest_issue_by_iid(&conn, &Config::default(), 1, &issue).unwrap();\n // Ingest same timestamp again\n let result = ingest_issue_by_iid(&conn, &Config::default(), 1, &issue).unwrap();\n assert!(result.skipped_stale);\n}\n\n#[tokio::test]\nasync fn test_preflight_fetch_returns_issues_and_mrs() {\n let mock = MockServer::start().await;\n // ... wiremock setup ...\n}\n```\n\n## Edge Cases\n\n- `make_test_issue` must produce all required fields (`id`, `iid`, `project_id`, `title`, `state`, `created_at`, `updated_at`, `author` with `username` and `id`, `web_url`) or `transform_issue` will fail\n- `make_test_mr` additionally needs `source_branch`, `target_branch`, `draft`, `merge_status`, `reviewers`\n- ISO 8601 fixtures must use `+00:00` suffix (GitLab format), not `Z`\n- Integration stubs must be `#[ignore]` so they do not fail CI before bd-3jqx implements them\n- Test DB needs `run_migrations` to create all tables including `dirty_sources`, `documents`, `issues`, `merge_requests`\n\n## Dependency Context\n\n- **Blocked by bd-3sez**: Cannot compile tests until surgical.rs module exists (circular co-dependency — develop together)\n- **Blocks bd-3jqx**: Integration test stubs are implemented in that bead\n- **No other blockers**: Uses only in-memory DB and wiremock, no external dependencies","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-17T19:15:05.498388Z","created_by":"tayloreernisse","updated_at":"2026-02-17T20:02:42.840151Z","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} +{"id":"bd-x8oq","title":"Write surgical_tests.rs with TDD test suite","description":"## Background\n\nThe surgical sync module (`src/ingestion/surgical.rs` from bd-3sez) needs a comprehensive test suite. Tests use in-memory SQLite (no real GitLab or Ollama) and wiremock for HTTP mocks. The test file lives at `src/ingestion/surgical_tests.rs` and is included via `#[cfg(test)] #[path = \"surgical_tests.rs\"] mod tests;` in surgical.rs.\n\nKey testing constraints:\n- In-memory DB pattern: `create_connection(Path::new(\":memory:\"))` + `run_migrations(&conn)`\n- Test project insert: `INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)` (no `name`/`last_seen_at` columns)\n- `GitLabIssue` required fields: `id`, `iid`, `project_id`, `title`, `state`, `created_at`, `updated_at`, `author`, `web_url`\n- `GitLabMergeRequest` adds: `source_branch`, `target_branch`, `draft`, `merge_status`, `reviewers`\n- `updated_at` is `String` (ISO 8601) in GitLab types, e.g. `\"2026-02-17T12:00:00.000+00:00\"`\n- `SourceType` enum variants: `Issue`, `MergeRequest`, `Discussion`, `Note`\n- `dirty_sources` table: `(source_type TEXT, source_id INTEGER)` primary key\n\n## Approach\n\nCreate `src/ingestion/surgical_tests.rs` with:\n\n### Test Helpers\n- `setup_db() -> Connection` — in-memory DB with migrations + test project row\n- `make_test_issue(iid: i64, updated_at: &str) -> GitLabIssue` — minimal valid JSON fixture\n- `make_test_mr(iid: i64, updated_at: &str) -> GitLabMergeRequest` — minimal valid JSON fixture\n- `get_db_updated_at(conn, table, iid) -> Option` — helper to query DB updated_at for assertions\n- `get_dirty_keys(conn) -> Vec<(String, i64)>` — query dirty_sources for assertions\n\n### Sync Tests (13)\n1. `test_ingest_issue_by_iid_upserts_and_marks_dirty` — fresh issue ingest, verify DB row + dirty_sources entry\n2. `test_ingest_mr_by_iid_upserts_and_marks_dirty` — fresh MR ingest, verify DB row + dirty_sources entry\n3. `test_toctou_skips_stale_issue` — insert issue at T1, call ingest with payload at T1, assert skipped_stale=true and no dirty mark\n4. `test_toctou_skips_stale_mr` — same for MRs\n5. `test_toctou_allows_newer_issue` — DB has T1, payload has T2 (T2 > T1), assert upserted=true\n6. `test_toctou_allows_newer_mr` — same for MRs\n7. `test_is_stale_parses_iso8601` — unit test: `\"2026-02-17T12:00:00.000+00:00\"` parses to correct ms-epoch\n8. `test_is_stale_handles_none_db_value` — first ingest, no DB row, assert not stale\n9. `test_is_stale_with_z_suffix` — `\"2026-02-17T12:00:00Z\"` also parses correctly\n10. `test_ingest_issue_returns_dirty_source_keys` — verify `dirty_source_keys` contains `(SourceType::Issue, local_id)`\n11. `test_ingest_mr_returns_dirty_source_keys` — verify MR dirty source keys\n12. `test_ingest_issue_updates_existing` — ingest same IID twice with newer updated_at, verify update\n13. `test_ingest_mr_updates_existing` — same for MRs\n\n### Async Preflight Test (1, wiremock)\n14. `test_preflight_fetch_returns_issues_and_mrs` — wiremock GET `/projects/:id/issues?iids[]=42` returns 200 with fixture, verify PreflightResult.issues has 1 entry\n\n### Integration Stubs (4, for bd-3jqx)\n15. `test_surgical_cancellation_during_preflight` — stub: signal.cancel() before preflight, verify early return\n16. `test_surgical_timeout_during_fetch` — stub: wiremock delay exceeds timeout\n17. `test_surgical_embed_isolation` — stub: verify only surgical docs get embedded\n18. `test_surgical_payload_integrity` — stub: verify ingested data matches GitLab payload exactly\n\n## Acceptance Criteria\n\n- [ ] All 13 sync tests pass with in-memory SQLite\n- [ ] Async preflight test passes with wiremock\n- [ ] 4 integration stubs compile and are marked `#[ignore]` (implemented in bd-3jqx)\n- [ ] Test helpers produce valid GitLabIssue/GitLabMergeRequest fixtures that pass `transform_issue`/`transform_merge_request`\n- [ ] No flaky tests: deterministic timestamps, no real network calls\n- [ ] File wired into surgical.rs via `#[cfg(test)] #[path = \"surgical_tests.rs\"] mod tests;`\n\n## Files\n\n- `src/ingestion/surgical_tests.rs` (NEW)\n- `src/ingestion/surgical.rs` (add `#[cfg(test)]` module path — created in bd-3sez)\n\n## TDD Anchor\n\nThis bead IS the test suite. Tests are written first (TDD red phase), then bd-3sez implements the production code to make them pass (green phase). Specific test signatures:\n\n```rust\n#[test]\nfn test_ingest_issue_by_iid_upserts_and_marks_dirty() {\n let conn = setup_db();\n let issue = make_test_issue(42, \"2026-02-17T12:00:00.000+00:00\");\n let config = Config::default();\n let result = ingest_issue_by_iid(&conn, &config, /*project_id=*/1, &issue).unwrap();\n assert!(result.upserted);\n assert!(!result.skipped_stale);\n let dirty = get_dirty_keys(&conn);\n assert!(dirty.contains(&(\"issue\".to_string(), /*local_id from DB*/)));\n}\n\n#[test]\nfn test_toctou_skips_stale_issue() {\n let conn = setup_db();\n let issue = make_test_issue(42, \"2026-02-17T12:00:00.000+00:00\");\n ingest_issue_by_iid(&conn, &Config::default(), 1, &issue).unwrap();\n // Ingest same timestamp again\n let result = ingest_issue_by_iid(&conn, &Config::default(), 1, &issue).unwrap();\n assert!(result.skipped_stale);\n}\n\n#[tokio::test]\nasync fn test_preflight_fetch_returns_issues_and_mrs() {\n let mock = MockServer::start().await;\n // ... wiremock setup ...\n}\n```\n\n## Edge Cases\n\n- `make_test_issue` must produce all required fields (`id`, `iid`, `project_id`, `title`, `state`, `created_at`, `updated_at`, `author` with `username` and `id`, `web_url`) or `transform_issue` will fail\n- `make_test_mr` additionally needs `source_branch`, `target_branch`, `draft`, `merge_status`, `reviewers`\n- ISO 8601 fixtures must use `+00:00` suffix (GitLab format), not `Z`\n- Integration stubs must be `#[ignore]` so they do not fail CI before bd-3jqx implements them\n- Test DB needs `run_migrations` to create all tables including `dirty_sources`, `documents`, `issues`, `merge_requests`\n\n## Dependency Context\n\n- **Blocked by bd-3sez**: Cannot compile tests until surgical.rs module exists (circular co-dependency — develop together)\n- **Blocks bd-3jqx**: Integration test stubs are implemented in that bead\n- **No other blockers**: Uses only in-memory DB and wiremock, no external dependencies","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-17T19:15:05.498388Z","created_by":"tayloreernisse","updated_at":"2026-02-18T19:25:57.434371Z","closed_at":"2026-02-18T19:25:57.434313Z","close_reason":"Merged into bd-3sez: tests belong with the code they test, not in a separate bead. TDD is the default workflow, not a separate deliverable.","compaction_level":0,"original_size":0,"labels":["surgical-sync"]} {"id":"bd-xhz","title":"[CP1] GitLab client pagination methods","description":"## Background\n\nGitLab pagination methods enable fetching large result sets (issues, discussions) as async streams. The client uses `x-next-page` headers to determine continuation and applies cursor rewind for tuple-based incremental sync.\n\n## Approach\n\nAdd pagination methods to GitLabClient using `async-stream` crate:\n\n### Methods to Add\n\n```rust\nimpl GitLabClient {\n /// Paginate through issues for a project.\n pub fn paginate_issues(\n &self,\n gitlab_project_id: i64,\n updated_after: Option, // ms epoch cursor\n cursor_rewind_seconds: u32,\n ) -> Pin> + Send + '_>>\n\n /// Paginate through discussions for an issue.\n pub fn paginate_issue_discussions(\n &self,\n gitlab_project_id: i64,\n issue_iid: i64,\n ) -> Pin> + Send + '_>>\n\n /// Make request and return response with headers for pagination.\n async fn request_with_headers(\n &self,\n path: &str,\n params: &[(&str, String)],\n ) -> Result<(T, HeaderMap)>\n}\n```\n\n### Pagination Logic\n\n1. Start at page 1, per_page=100\n2. For issues: add scope=all, state=all, order_by=updated_at, sort=asc\n3. Apply cursor rewind: `updated_after = cursor - rewind_seconds` (clamped to 0)\n4. Yield each item from response\n5. Check `x-next-page` header for continuation\n6. Stop when header is empty/absent OR response is empty\n\n### Cursor Rewind\n\n```rust\nif let Some(ts) = updated_after {\n let rewind_ms = (cursor_rewind_seconds as i64) * 1000;\n let rewound = (ts - rewind_ms).max(0); // Clamp to avoid underflow\n // Convert to ISO 8601 for updated_after param\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `paginate_issues` returns Stream of GitLabIssue\n- [ ] `paginate_issues` adds scope=all, state=all, order_by=updated_at, sort=asc\n- [ ] `paginate_issues` applies cursor rewind with max(0) clamping\n- [ ] `paginate_issue_discussions` returns Stream of GitLabDiscussion\n- [ ] Both methods follow x-next-page header until empty\n- [ ] Both methods stop on empty response (fallback)\n- [ ] `request_with_headers` returns (T, HeaderMap) tuple\n\n## Files\n\n- src/gitlab/client.rs (edit - add methods)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/pagination_tests.rs\n#[tokio::test] async fn fetches_all_pages_when_multiple_exist()\n#[tokio::test] async fn respects_per_page_parameter()\n#[tokio::test] async fn follows_x_next_page_header_until_empty()\n#[tokio::test] async fn falls_back_to_empty_page_stop_if_headers_missing()\n#[tokio::test] async fn applies_cursor_rewind_for_tuple_semantics()\n#[tokio::test] async fn clamps_negative_rewind_to_zero()\n```\n\nGREEN: Implement pagination methods with async-stream\n\nVERIFY: `cargo test pagination`\n\n## Edge Cases\n\n- cursor_updated_at near zero - rewind must not underflow (use max(0))\n- GitLab returns empty x-next-page - treat as end of pages\n- GitLab omits pagination headers entirely - use empty response as stop condition\n- DateTime conversion fails - omit updated_after and fetch all (safe fallback)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.222168Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:28:39.192876Z","closed_at":"2026-01-25T22:28:39.192815Z","close_reason":"Implemented paginate_issues and paginate_issue_discussions with async-stream, cursor rewind with max(0) clamping, x-next-page header following, 4 unit tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-xhz","depends_on_id":"bd-1np","type":"blocks","created_at":"2026-01-25T17:04:05.398212Z","created_by":"tayloreernisse"},{"issue_id":"bd-xhz","depends_on_id":"bd-2ys","type":"blocks","created_at":"2026-01-25T17:04:05.371440Z","created_by":"tayloreernisse"}]} {"id":"bd-xsgw","title":"NOTE-TEST2: Another test bead","description":"type: task","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T16:58:53.392214Z","updated_at":"2026-02-12T16:59:02.051710Z","closed_at":"2026-02-12T16:59:02.051663Z","close_reason":"test","compaction_level":0,"original_size":0} {"id":"bd-y095","title":"Implement SyncDeltaLedger for post-sync filtered navigation","description":"## Background\n\nAfter a sync completes, the Sync Summary screen shows delta counts (+12 new issues, +3 new MRs). Pressing `i` or `m` should navigate to Issue/MR List filtered to show ONLY the entities that changed in this sync run. The SyncDeltaLedger is an in-memory data structure (not persisted to DB) that records the exact IIDs of new/updated entities during a sync run. It lives for the duration of one TUI session and is cleared when a new sync starts. If the ledger is unavailable (e.g., after app restart), the Sync Summary falls back to a timestamp-based filter using `sync_status.last_completed_at`.\n\n## Approach\n\nCreate a `sync_delta.rs` module with:\n\n1. **`SyncDeltaLedger` struct**:\n ```rust\n pub struct SyncDeltaLedger {\n issues_new: Vec, // IIDs of newly created issues\n issues_updated: Vec, // IIDs of updated (not new) issues\n mrs_new: Vec, // IIDs of newly created MRs\n mrs_updated: Vec, // IIDs of updated MRs\n discussions_new: usize, // count only (too many to track individually)\n events_new: usize, // count only\n completed_at: Option, // timestamp when sync finished (fallback anchor)\n }\n ```\n2. **Builder pattern** — `SyncDeltaLedger::new()` starts empty, populated during sync via:\n - `record_issue(iid: i64, is_new: bool)`\n - `record_mr(iid: i64, is_new: bool)`\n - `record_discussions(count: usize)`\n - `record_events(count: usize)`\n - `finalize(completed_at: i64)` — marks ledger as complete\n3. **Query methods**:\n - `new_issue_iids() -> &[i64]` — for `i` key navigation in Summary mode\n - `new_mr_iids() -> &[i64]` — for `m` key navigation\n - `all_changed_issue_iids() -> Vec` — new + updated combined\n - `all_changed_mr_iids() -> Vec` — new + updated combined\n - `is_available() -> bool` — true if finalize() was called\n - `fallback_timestamp() -> Option` — completed_at for timestamp-based fallback\n4. **`clear()`** — resets all fields when a new sync starts\n\nThe ledger is owned by `SyncState` (part of `AppState`) and populated by the sync action handler when processing `SyncResult` from `run_sync()`. The existing `SyncResult` struct (src/cli/commands/sync.rs:30) already tracks `issues_updated` and `mrs_updated` counts but not individual IIDs — the TUI sync action will need to collect IIDs from the ingest callbacks.\n\n## Acceptance Criteria\n- [ ] `SyncDeltaLedger::new()` creates an empty ledger with `is_available() == false`\n- [ ] `record_issue(42, true)` adds 42 to `issues_new`; `record_issue(43, false)` adds to `issues_updated`\n- [ ] `new_issue_iids()` returns only new IIDs, `all_changed_issue_iids()` returns new + updated\n- [ ] `finalize(ts)` sets `is_available() == true` and stores the timestamp\n- [ ] `clear()` resets everything back to empty with `is_available() == false`\n- [ ] `fallback_timestamp()` returns None before finalize, Some(ts) after\n- [ ] Ledger handles >10,000 IIDs without issues (just Vec growth)\n\n## Files\n- CREATE: crates/lore-tui/src/sync_delta.rs\n- MODIFY: crates/lore-tui/src/lib.rs (add `pub mod sync_delta;`)\n\n## TDD Anchor\nRED: Write `test_empty_ledger_not_available` that asserts `SyncDeltaLedger::new().is_available() == false` and `new_issue_iids().is_empty()`.\nGREEN: Implement the struct with new() and is_available().\nVERIFY: cargo test -p lore-tui sync_delta\n\nAdditional tests:\n- test_record_and_query_issues\n- test_record_and_query_mrs\n- test_finalize_makes_available\n- test_clear_resets_everything\n- test_all_changed_combines_new_and_updated\n- test_fallback_timestamp\n\n## Edge Cases\n- Recording the same IID twice (e.g., issue updated twice during sync) — should deduplicate or allow duplicates? Allow duplicates (Vec, not HashSet) for simplicity; consumers can deduplicate if needed.\n- Very large syncs with >50,000 entities — Vec is fine, no cap needed.\n- Calling query methods before finalize — returns data so far (is_available=false signals incompleteness).\n\n## Dependency Context\n- Depends on bd-2x2h (Sync screen) which owns SyncState and drives the sync lifecycle. The ledger is a field of SyncState.\n- Consumed by Sync Summary mode's `i`/`m` key handlers to produce filtered Issue/MR List navigation with exact IID sets.","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-12T19:29:38.738460Z","created_by":"tayloreernisse","updated_at":"2026-02-12T19:29:48.475698Z","compaction_level":0,"original_size":0,"labels":["TUI"],"dependencies":[{"issue_id":"bd-y095","depends_on_id":"bd-2x2h","type":"blocks","created_at":"2026-02-12T19:29:48.475674Z","created_by":"tayloreernisse"}]} diff --git a/.beads/last-touched b/.beads/last-touched index 64c7c8a..fce8a5b 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -bd-1elx +bd-1i4i diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..634dcfb --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,820 @@ +# CLAUDE.md + +## RULE 0 - THE FUNDAMENTAL OVERRIDE PEROGATIVE + +If I tell you to do something, even if it goes against what follows below, YOU MUST LISTEN TO ME. I AM IN CHARGE, NOT YOU. + +--- + +## RULE NUMBER 1: NO FILE DELETION + +**YOU ARE NEVER ALLOWED TO DELETE A FILE WITHOUT EXPRESS PERMISSION.** Even a new file that you yourself created, such as a test code file. You have a horrible track record of deleting critically important files or otherwise throwing away tons of expensive work. As a result, you have permanently lost any and all rights to determine that a file or folder should be deleted. + +**YOU MUST ALWAYS ASK AND RECEIVE CLEAR, WRITTEN PERMISSION BEFORE EVER DELETING A FILE OR FOLDER OF ANY KIND.** + +--- + +## Version Control: jj-First (CRITICAL) + +**ALWAYS prefer jj (Jujutsu) over git for all VCS operations.** This is a colocated repo with both `.jj/` and `.git/`. When instructed to use git by anything — even later in this file — use the best jj replacement commands instead. Only fall back to raw `git` for things jj cannot do (hooks, LFS, submodules, `gh` CLI interop). + +See `~/.claude/rules/jj-vcs/` for the full command reference, translation table, revsets, patterns, and recovery recipes. + +--- + +## Irreversible Git & Filesystem Actions — DO NOT EVER BREAK GLASS + +> **Note:** Treat destructive commands as break-glass. If there's any doubt, stop and ask. + +1. **Absolutely forbidden commands:** `git reset --hard`, `git clean -fd`, `rm -rf`, or any command that can delete or overwrite code/data must never be run unless the user explicitly provides the exact command and states, in the same message, that they understand and want the irreversible consequences. +2. **No guessing:** If there is any uncertainty about what a command might delete or overwrite, stop immediately and ask the user for specific approval. "I think it's safe" is never acceptable. +3. **Safer alternatives first:** When cleanup or rollbacks are needed, request permission to use non-destructive options (`git status`, `git diff`, `git stash`, copying to backups) before ever considering a destructive command. +4. **Mandatory explicit plan:** Even after explicit user authorization, restate the command verbatim, list exactly what will be affected, and wait for a confirmation that your understanding is correct. Only then may you execute it—if anything remains ambiguous, refuse and escalate. +5. **Document the confirmation:** When running any approved destructive command, record (in the session notes / final response) the exact user text that authorized it, the command actually run, and the execution time. If that record is absent, the operation did not happen. + +--- + +## Toolchain: Rust & Cargo + +We only use **Cargo** in this project, NEVER any other package manager. + +- **Edition/toolchain:** Follow `rust-toolchain.toml` (if present). Do not assume stable vs nightly. +- **Dependencies:** Explicit versions for stability; keep the set minimal. +- **Configuration:** Cargo.toml only +- **Unsafe code:** Forbidden (`#![forbid(unsafe_code)]`) + +When writing Rust code, reference RUST_CLI_TOOLS_BEST_PRACTICES.md + +### Release Profile + +Use the release profile defined in `Cargo.toml`. If you need to change it, justify the +performance/size tradeoff and how it impacts determinism and cancellation behavior. + +--- + +## Code Editing Discipline + +### No Script-Based Changes + +**NEVER** run a script that processes/changes code files in this repo. Brittle regex-based transformations create far more problems than they solve. + +- **Always make code changes manually**, even when there are many instances +- For many simple changes: use parallel subagents +- For subtle/complex changes: do them methodically yourself + +### No File Proliferation + +If you want to change something or add a feature, **revise existing code files in place**. + +**NEVER** create variations like: +- `mainV2.rs` +- `main_improved.rs` +- `main_enhanced.rs` + +New files are reserved for **genuinely new functionality** that makes zero sense to include in any existing file. The bar for creating new files is **incredibly high**. + +--- + +## Backwards Compatibility + +We do not care about backwards compatibility—we're in early development with no users. We want to do things the **RIGHT** way with **NO TECH DEBT**. + +- Never create "compatibility shims" +- Never create wrapper functions for deprecated APIs +- Just fix the code directly + +--- + +## Compiler Checks (CRITICAL) + +**After any substantive code changes, you MUST verify no errors were introduced:** + +```bash +# Check for compiler errors and warnings +cargo check --all-targets + +# Check for clippy lints (pedantic + nursery are enabled) +cargo clippy --all-targets -- -D warnings + +# Verify formatting +cargo fmt --check +``` + +If you see errors, **carefully understand and resolve each issue**. Read sufficient context to fix them the RIGHT way. + +--- + +## Testing + +### Unit & Property Tests + +```bash +# Run all tests +cargo test + +# Run with output +cargo test -- --nocapture +``` + +When adding or changing primitives, add tests that assert the core invariants: + +- no task leaks +- no obligation leaks +- losers are drained after races +- region close implies quiescence + +Prefer deterministic lab-runtime tests for concurrency-sensitive behavior. + +--- + +## MCP Agent Mail — Multi-Agent Coordination + +A mail-like layer that lets coding agents coordinate asynchronously via MCP tools and resources. Provides identities, inbox/outbox, searchable threads, and advisory file reservations with human-auditable artifacts in Git. + +### Why It's Useful + +- **Prevents conflicts:** Explicit file reservations (leases) for files/globs +- **Token-efficient:** Messages stored in per-project archive, not in context +- **Quick reads:** `resource://inbox/...`, `resource://thread/...` + +### Same Repository Workflow + +1. **Register identity:** + ``` + ensure_project(project_key=) + register_agent(project_key, program, model) + ``` + +2. **Reserve files before editing:** + ``` + file_reservation_paths(project_key, agent_name, ["src/**"], ttl_seconds=3600, exclusive=true) + ``` + +3. **Communicate with threads:** + ``` + send_message(..., thread_id="FEAT-123") + fetch_inbox(project_key, agent_name) + acknowledge_message(project_key, agent_name, message_id) + ``` + +4. **Quick reads:** + ``` + resource://inbox/{Agent}?project=&limit=20 + resource://thread/{id}?project=&include_bodies=true + ``` + +### Macros vs Granular Tools + +- **Prefer macros for speed:** `macro_start_session`, `macro_prepare_thread`, `macro_file_reservation_cycle`, `macro_contact_handshake` +- **Use granular tools for control:** `register_agent`, `file_reservation_paths`, `send_message`, `fetch_inbox`, `acknowledge_message` + +### Common Pitfalls + +- `"from_agent not registered"`: Always `register_agent` in the correct `project_key` first +- `"FILE_RESERVATION_CONFLICT"`: Adjust patterns, wait for expiry, or use non-exclusive reservation +- **Auth errors:** If JWT+JWKS enabled, include bearer token with matching `kid` + +--- + +## Beads (br) — Dependency-Aware Issue Tracking + +Beads provides a lightweight, dependency-aware issue database and CLI (`br` / beads_rust) for selecting "ready work," setting priorities, and tracking status. It complements MCP Agent Mail's messaging and file reservations. + +**Note:** `br` is non-invasive—it never executes git commands directly. You must run git commands manually after `br sync --flush-only`. + +### Conventions + +- **Single source of truth:** Beads for task status/priority/dependencies; Agent Mail for conversation and audit +- **Shared identifiers:** Use Beads issue ID (e.g., `br-123`) as Mail `thread_id` and prefix subjects with `[br-123]` +- **Reservations:** When starting a task, call `file_reservation_paths()` with the issue ID in `reason` + +### Typical Agent Flow + +1. **Pick ready work (Beads):** + ```bash + br ready --json # Choose highest priority, no blockers + ``` + +2. **Reserve edit surface (Mail):** + ``` + file_reservation_paths(project_key, agent_name, ["src/**"], ttl_seconds=3600, exclusive=true, reason="br-123") + ``` + +3. **Announce start (Mail):** + ``` + send_message(..., thread_id="br-123", subject="[br-123] Start: ", ack_required=true) + ``` + +4. **Work and update:** Reply in-thread with progress + +5. **Complete and release:** + ```bash + br close br-123 --reason "Completed" + ``` + ``` + release_file_reservations(project_key, agent_name, paths=["src/**"]) + ``` + Final Mail reply: `[br-123] Completed` with summary + +### Mapping Cheat Sheet + +| Concept | Value | +|---------|-------| +| Mail `thread_id` | `br-###` | +| Mail subject | `[br-###] ...` | +| File reservation `reason` | `br-###` | +| Commit messages | Include `br-###` for traceability | + +--- + +## bv — Graph-Aware Triage Engine + +bv is a graph-aware triage engine for Beads projects (`.beads/beads.jsonl`). It computes PageRank, betweenness, critical path, cycles, HITS, eigenvector, and k-core metrics deterministically. + +**Scope boundary:** bv handles *what to work on* (triage, priority, planning). For agent-to-agent coordination (messaging, work claiming, file reservations), use MCP Agent Mail. + +**CRITICAL: Use ONLY `--robot-*` flags. Bare `bv` launches an interactive TUI that blocks your session.** + +### The Workflow: Start With Triage + +**`bv --robot-triage` is your single entry point.** It returns: +- `quick_ref`: at-a-glance counts + top 3 picks +- `recommendations`: ranked actionable items with scores, reasons, unblock info +- `quick_wins`: low-effort high-impact items +- `blockers_to_clear`: items that unblock the most downstream work +- `project_health`: status/type/priority distributions, graph metrics +- `commands`: copy-paste shell commands for next steps + +```bash +bv --robot-triage # THE MEGA-COMMAND: start here +bv --robot-next # Minimal: just the single top pick + claim command +``` + +### Command Reference + +**Planning:** +| Command | Returns | +|---------|---------| +| `--robot-plan` | Parallel execution tracks with `unblocks` lists | +| `--robot-priority` | Priority misalignment detection with confidence | + +**Graph Analysis:** +| Command | Returns | +|---------|---------| +| `--robot-insights` | Full metrics: PageRank, betweenness, HITS, eigenvector, critical path, cycles, k-core, articulation points, slack | +| `--robot-label-health` | Per-label health: `health_level`, `velocity_score`, `staleness`, `blocked_count` | +| `--robot-label-flow` | Cross-label dependency: `flow_matrix`, `dependencies`, `bottleneck_labels` | +| `--robot-label-attention [--attention-limit=N]` | Attention-ranked labels | + +**History & Change Tracking:** +| Command | Returns | +|---------|---------| +| `--robot-history` | Bead-to-commit correlations | +| `--robot-diff --diff-since <ref>` | Changes since ref: new/closed/modified issues, cycles | + +**Other:** +| Command | Returns | +|---------|---------| +| `--robot-burndown <sprint>` | Sprint burndown, scope changes, at-risk items | +| `--robot-forecast <id\|all>` | ETA predictions with dependency-aware scheduling | +| `--robot-alerts` | Stale issues, blocking cascades, priority mismatches | +| `--robot-suggest` | Hygiene: duplicates, missing deps, label suggestions | +| `--robot-graph [--graph-format=json\|dot\|mermaid]` | Dependency graph export | +| `--export-graph <file.html>` | Interactive HTML visualization | + +### Scoping & Filtering + +```bash +bv --robot-plan --label backend # Scope to label's subgraph +bv --robot-insights --as-of HEAD~30 # Historical point-in-time +bv --recipe actionable --robot-plan # Pre-filter: ready to work +bv --recipe high-impact --robot-triage # Pre-filter: top PageRank +bv --robot-triage --robot-triage-by-track # Group by parallel work streams +bv --robot-triage --robot-triage-by-label # Group by domain +``` + +### Understanding Robot Output + +**All robot JSON includes:** +- `data_hash` — Fingerprint of source beads.jsonl +- `status` — Per-metric state: `computed|approx|timeout|skipped` + elapsed ms +- `as_of` / `as_of_commit` — Present when using `--as-of` + +**Two-phase analysis:** +- **Phase 1 (instant):** degree, topo sort, density +- **Phase 2 (async, 500ms timeout):** PageRank, betweenness, HITS, eigenvector, cycles + +### jq Quick Reference + +```bash +bv --robot-triage | jq '.quick_ref' # At-a-glance summary +bv --robot-triage | jq '.recommendations[0]' # Top recommendation +bv --robot-plan | jq '.plan.summary.highest_impact' # Best unblock target +bv --robot-insights | jq '.status' # Check metric readiness +bv --robot-insights | jq '.Cycles' # Circular deps (must fix!) +``` + +--- + +## UBS — Ultimate Bug Scanner + +**Golden Rule:** `ubs <changed-files>` before every commit. Exit 0 = safe. Exit >0 = fix & re-run. + +### Commands + +```bash +ubs file.rs file2.rs # Specific files (< 1s) — USE THIS +ubs $(jj diff --name-only) # Changed files — before commit +ubs --only=rust,toml src/ # Language filter (3-5x faster) +ubs --ci --fail-on-warning . # CI mode — before PR +ubs . # Whole project (ignores target/, Cargo.lock) +``` + +### Output Format + +``` +⚠️ Category (N errors) + file.rs:42:5 – Issue description + 💡 Suggested fix +Exit code: 1 +``` + +Parse: `file:line:col` → location | 💡 → how to fix | Exit 0/1 → pass/fail + +### Fix Workflow + +1. Read finding → category + fix suggestion +2. Navigate `file:line:col` → view context +3. Verify real issue (not false positive) +4. Fix root cause (not symptom) +5. Re-run `ubs <file>` → exit 0 +6. Commit + +### Bug Severity + +- **Critical (always fix):** Memory safety, use-after-free, data races, SQL injection +- **Important (production):** Unwrap panics, resource leaks, overflow checks +- **Contextual (judgment):** TODO/FIXME, println! debugging + +--- + +## ast-grep vs ripgrep + +**Use `ast-grep` when structure matters.** It parses code and matches AST nodes, ignoring comments/strings, and can **safely rewrite** code. + +- Refactors/codemods: rename APIs, change import forms +- Policy checks: enforce patterns across a repo +- Editor/automation: LSP mode, `--json` output + +**Use `ripgrep` when text is enough.** Fastest way to grep literals/regex. + +- Recon: find strings, TODOs, log lines, config values +- Pre-filter: narrow candidate files before ast-grep + +### Rule of Thumb + +- Need correctness or **applying changes** → `ast-grep` +- Need raw speed or **hunting text** → `rg` +- Often combine: `rg` to shortlist files, then `ast-grep` to match/modify + +### Rust Examples + +```bash +# Find structured code (ignores comments) +ast-grep run -l Rust -p 'fn $NAME($$$ARGS) -> $RET { $$$BODY }' + +# Find all unwrap() calls +ast-grep run -l Rust -p '$EXPR.unwrap()' + +# Quick textual hunt +rg -n 'println!' -t rust + +# Combine speed + precision +rg -l -t rust 'unwrap\(' | xargs ast-grep run -l Rust -p '$X.unwrap()' --json +``` + +--- + +## Morph Warp Grep — AI-Powered Code Search + +**Use `mcp__morph-mcp__warp_grep` for exploratory "how does X work?" questions.** An AI agent expands your query, greps the codebase, reads relevant files, and returns precise line ranges with full context. + +**Use `ripgrep` for targeted searches.** When you know exactly what you're looking for. + +**Use `ast-grep` for structural patterns.** When you need AST precision for matching/rewriting. + +### When to Use What + +| Scenario | Tool | Why | +|----------|------|-----| +| "How is pattern matching implemented?" | `warp_grep` | Exploratory; don't know where to start | +| "Where is the quick reject filter?" | `warp_grep` | Need to understand architecture | +| "Find all uses of `Regex::new`" | `ripgrep` | Targeted literal search | +| "Find files with `println!`" | `ripgrep` | Simple pattern | +| "Replace all `unwrap()` with `expect()`" | `ast-grep` | Structural refactor | + +### warp_grep Usage + +``` +mcp__morph-mcp__warp_grep( + repoPath: "/path/to/dcg", + query: "How does the safe pattern whitelist work?" +) +``` + +Returns structured results with file paths, line ranges, and extracted code snippets. + +### Anti-Patterns + +- **Don't** use `warp_grep` to find a specific function name → use `ripgrep` +- **Don't** use `ripgrep` to understand "how does X work" → wastes time with manual reads +- **Don't** use `ripgrep` for codemods → risks collateral edits + +<!-- bv-agent-instructions-v1 --> + +--- + +## Beads Workflow Integration + +This project uses [beads_viewer](https://github.com/Dicklesworthstone/beads_viewer) for issue tracking. Issues are stored in `.beads/` and tracked in version control. + +**Note:** `br` is non-invasive—it never executes VCS commands directly. You must commit manually after `br sync --flush-only`. + +### Essential Commands + +```bash +# View issues (launches TUI - avoid in automated sessions) +bv + +# CLI commands for agents (use these instead) +br ready # Show issues ready to work (no blockers) +br list --status=open # All open issues +br show <id> # Full issue details with dependencies +br create --title="..." --type=task --priority=2 +br update <id> --status=in_progress +br close <id> --reason="Completed" +br close <id1> <id2> # Close multiple issues at once +br sync --flush-only # Export to JSONL (then: jj commit -m "Update beads") +``` + +### Workflow Pattern + +1. **Start**: Run `br ready` to find actionable work +2. **Claim**: Use `br update <id> --status=in_progress` +3. **Work**: Implement the task +4. **Complete**: Use `br close <id>` +5. **Sync**: Run `br sync --flush-only`, then `git add .beads/ && git commit -m "Update beads"` + +### Key Concepts + +- **Dependencies**: Issues can block other issues. `br ready` shows only unblocked work. +- **Priority**: P0=critical, P1=high, P2=medium, P3=low, P4=backlog (use numbers, not words) +- **Types**: task, bug, feature, epic, question, docs +- **Blocking**: `br dep add <issue> <depends-on>` to add dependencies + +### Session Protocol + +**Before ending any session, run this checklist (solo/lead only — workers skip VCS):** + +```bash +jj status # Check what changed +br sync --flush-only # Export beads to JSONL +jj commit -m "..." # Commit code and beads (jj auto-tracks all changes) +jj bookmark set <name> -r @- # Point bookmark at committed work +jj git push -b <name> # Push to remote +``` + +### Best Practices + +- Check `br ready` at session start to find available work +- Update status as you work (in_progress → closed) +- Create new issues with `br create` when you discover tasks +- Use descriptive titles and set appropriate priority/type +- Always run `br sync --flush-only` then commit before ending session (jj auto-tracks .beads/) + +<!-- end-bv-agent-instructions --> + +## Landing the Plane (Session Completion) + +**When ending a work session**, you MUST complete ALL steps below. Work is NOT complete until push succeeds. + +**WHO RUNS THIS:** Solo agents run it themselves. In multi-agent sessions, ONLY the team lead runs this. Workers skip VCS entirely. + +**MANDATORY WORKFLOW:** + +1. **File issues for remaining work** - Create issues for anything that needs follow-up +2. **Run quality gates** (if code changed) - Tests, linters, builds +3. **Update issue status** - Close finished work, update in-progress items +4. **PUSH TO REMOTE** - This is MANDATORY: + ```bash + jj git fetch # Get latest remote state + jj rebase -d trunk() # Rebase onto latest trunk if needed + br sync --flush-only # Export beads to JSONL + jj commit -m "Update beads" # Commit (jj auto-tracks .beads/ changes) + jj bookmark set <name> -r @- # Point bookmark at committed work + jj git push -b <name> # Push to remote + jj log -r '<name>' # Verify bookmark position + ``` +5. **Clean up** - Abandon empty orphan changes if any (`jj abandon <rev>`) +6. **Verify** - All changes committed AND pushed +7. **Hand off** - Provide context for next session + +**CRITICAL RULES:** +- Work is NOT complete until `jj git push` succeeds +- NEVER stop before pushing - that leaves work stranded locally +- NEVER say "ready to push when you are" - YOU must push +- If push fails, resolve and retry until it succeeds + +--- + +## cass — Cross-Agent Session Search + +`cass` indexes prior agent conversations (Claude Code, Codex, Cursor, Gemini, ChatGPT, etc.) so we can reuse solved problems. + +**Rules:** Never run bare `cass` (TUI). Always use `--robot` or `--json`. + +### Examples + +```bash +cass health +cass search "async runtime" --robot --limit 5 +cass view /path/to/session.jsonl -n 42 --json +cass expand /path/to/session.jsonl -n 42 -C 3 --json +cass capabilities --json +cass robot-docs guide +``` + +### Tips + +- Use `--fields minimal` for lean output +- Filter by agent with `--agent` +- Use `--days N` to limit to recent history + +stdout is data-only, stderr is diagnostics; exit code 0 means success. + +Treat cass as a way to avoid re-solving problems other agents already handled. + +--- + +## Note for Codex/GPT-5.2 + +You constantly bother me and stop working with concerned questions that look similar to this: + +``` +Unexpected changes (need guidance) + +- Working tree still shows edits I did not make in Cargo.toml, Cargo.lock, src/runtime.rs, src/scope.rs. Please advise whether to keep/commit/revert these before any further work. I did not touch them. + +Next steps (pick one) + +1. Decide how to handle the unrelated modified files above so we can resume cleanly. +``` + +NEVER EVER DO THAT AGAIN. The answer is literally ALWAYS the same: those are changes created by the potentially dozen of other agents working on the project at the same time. This is not only a common occurence, it happens multiple times PER MINUTE. The way to deal with it is simple: you NEVER, under ANY CIRCUMSTANCE, stash, revert, overwrite, or otherwise disturb in ANY way the work of other agents. Just treat those changes identically to changes that you yourself made. Just fool yourself into thinking YOU made the changes and simply don't recall it for some reason. + +--- + +## Note on Built-in TODO Functionality + +Also, if I ask you to explicitly use your built-in TODO functionality, don't complain about this and say you need to use beads. You can use built-in TODOs if I tell you specifically to do so. Always comply with such orders. + +## TDD Requirements + +Test-first development is mandatory: +1. **RED** - Write failing test first +2. **GREEN** - Minimal implementation to pass +3. **REFACTOR** - Clean up while green + +## Key Patterns + +Find the simplest solution that meets all acceptance criteria. +Use third party libraries whenever there's a well-maintained, active, and widely adopted solution (for example, date-fns for TS date math) +Build extensible pieces of logic that can easily be integrated with other pieces. +DRY principles should be loosely held. +Architecture MUST be clear and well thought-out. Ask the user for clarification whenever ambiguity is discovered around architecture, or you think a better approach than planned exists. + +--- + +## Third-Party Library Usage + +If you aren't 100% sure how to use a third-party library, **SEARCH ONLINE** to find the latest documentation and mid-2025 best practices. + +--- + +## Gitlore Robot Mode + +The `lore` CLI has a robot mode optimized for AI agent consumption with compact JSON output, structured errors with machine-actionable recovery steps, meaningful exit codes, response timing metadata, field selection for token efficiency, and TTY auto-detection. + +### Activation + +```bash +# Explicit flag +lore --robot issues -n 10 + +# JSON shorthand (-J) +lore -J issues -n 10 + +# Auto-detection (when stdout is not a TTY) +lore issues | jq . + +# Environment variable +LORE_ROBOT=1 lore issues +``` + +### Robot Mode Commands + +```bash +# List issues/MRs with JSON output +lore --robot issues -n 10 +lore --robot mrs -s opened + +# Filter issues by work item status (case-insensitive) +lore --robot issues --status "In progress" + +# List with field selection (reduces token usage ~60%) +lore --robot issues --fields minimal +lore --robot mrs --fields iid,title,state,draft + +# Show detailed entity info +lore --robot issues 123 +lore --robot mrs 456 -p group/repo + +# Count entities +lore --robot count issues +lore --robot count discussions --for mr + +# Search indexed documents +lore --robot search "authentication bug" + +# Check sync status +lore --robot status + +# Run full sync pipeline +lore --robot sync + +# Run sync without resource events +lore --robot sync --no-events + +# Run ingestion only +lore --robot ingest issues + +# Check environment health +lore --robot doctor + +# Document and index statistics +lore --robot stats + +# Quick health pre-flight check (exit 0 = healthy, 19 = unhealthy) +lore --robot health + +# Generate searchable documents from ingested data +lore --robot generate-docs + +# Generate vector embeddings via Ollama +lore --robot embed + +# Agent self-discovery manifest (all commands, flags, exit codes, response schemas) +lore robot-docs + +# Version information +lore --robot version +``` + +### Response Format + +All commands return compact JSON with a uniform envelope and timing metadata: + +```json +{"ok":true,"data":{...},"meta":{"elapsed_ms":42}} +``` + +Errors return structured JSON to stderr with machine-actionable recovery steps: + +```json +{"error":{"code":"CONFIG_NOT_FOUND","message":"...","suggestion":"Run 'lore init'","actions":["lore init"]}} +``` + +The `actions` array contains executable shell commands for automated recovery. It is omitted when empty. + +### Field Selection + +The `--fields` flag on `issues` and `mrs` list commands controls which fields appear in the JSON response: + +```bash +lore -J issues --fields minimal # Preset: iid, title, state, updated_at_iso +lore -J mrs --fields iid,title,state,draft,labels # Custom field list +``` + +### Exit Codes + +| Code | Meaning | +|------|---------| +| 0 | Success | +| 1 | Internal error / not implemented | +| 2 | Usage error (invalid flags or arguments) | +| 3 | Config invalid | +| 4 | Token not set | +| 5 | GitLab auth failed | +| 6 | Resource not found | +| 7 | Rate limited | +| 8 | Network error | +| 9 | Database locked | +| 10 | Database error | +| 11 | Migration failed | +| 12 | I/O error | +| 13 | Transform error | +| 14 | Ollama unavailable | +| 15 | Ollama model not found | +| 16 | Embedding failed | +| 17 | Not found (entity does not exist) | +| 18 | Ambiguous match (use `-p` to specify project) | +| 19 | Health check failed | +| 20 | Config not found | + +### Configuration Precedence + +1. CLI flags (highest priority) +2. Environment variables (`LORE_ROBOT`, `GITLAB_TOKEN`, `LORE_CONFIG_PATH`) +3. Config file (`~/.config/lore/config.json`) +4. Built-in defaults (lowest priority) + +### Best Practices + +- Use `lore --robot` or `lore -J` for all agent interactions +- Check exit codes for error handling +- Parse JSON errors from stderr; use `actions` array for automated recovery +- Use `--fields minimal` to reduce token usage (~60% fewer tokens) +- Use `-n` / `--limit` to control response size +- Use `-q` / `--quiet` to suppress progress bars and non-essential output +- Use `--color never` in non-TTY automation for ANSI-free output +- Use `-v` / `-vv` / `-vvv` for increasing verbosity (debug/trace logging) +- Use `--log-format json` for machine-readable log output to stderr +- TTY detection handles piped commands automatically +- Use `lore --robot health` as a fast pre-flight check before queries +- Use `lore robot-docs` for response schema discovery +- The `-p` flag supports fuzzy project matching (suffix and substring) + +--- + +## Read/Write Split: lore vs glab + +| Operation | Tool | Why | +|-----------|------|-----| +| List issues/MRs | lore | Richer: includes status, discussions, closing MRs | +| View issue/MR detail | lore | Pre-joined discussions, work-item status | +| Search across entities | lore | FTS5 + vector hybrid search | +| Expert/workload analysis | lore | who command — no glab equivalent | +| Timeline reconstruction | lore | Chronological narrative — no glab equivalent | +| Create/update/close | glab | Write operations | +| Approve/merge MR | glab | Write operations | +| CI/CD pipelines | glab | Not in lore scope | + +````markdown +## UBS Quick Reference for AI Agents + +UBS stands for "Ultimate Bug Scanner": **The AI Coding Agent's Secret Weapon: Flagging Likely Bugs for Fixing Early On** + +**Install:** `curl -sSL https://raw.githubusercontent.com/Dicklesworthstone/ultimate_bug_scanner/master/install.sh | bash` + +**Golden Rule:** `ubs <changed-files>` before every commit. Exit 0 = safe. Exit >0 = fix & re-run. + +**Commands:** +```bash +ubs file.ts file2.py # Specific files (< 1s) — USE THIS +ubs $(git diff --name-only --cached) # Staged files — before commit +ubs --only=js,python src/ # Language filter (3-5x faster) +ubs --ci --fail-on-warning . # CI mode — before PR +ubs --help # Full command reference +ubs sessions --entries 1 # Tail the latest install session log +ubs . # Whole project (ignores things like .venv and node_modules automatically) +``` + +**Output Format:** +``` +⚠️ Category (N errors) + file.ts:42:5 – Issue description + 💡 Suggested fix +Exit code: 1 +``` +Parse: `file:line:col` → location | 💡 → how to fix | Exit 0/1 → pass/fail + +**Fix Workflow:** +1. Read finding → category + fix suggestion +2. Navigate `file:line:col` → view context +3. Verify real issue (not false positive) +4. Fix root cause (not symptom) +5. Re-run `ubs <file>` → exit 0 +6. Commit + +**Speed Critical:** Scope to changed files. `ubs src/file.ts` (< 1s) vs `ubs .` (30s). Never full scan for small edits. + +**Bug Severity:** +- **Critical** (always fix): Null safety, XSS/injection, async/await, memory leaks +- **Important** (production): Type narrowing, division-by-zero, resource leaks +- **Contextual** (judgment): TODO/FIXME, console logs + +**Anti-Patterns:** +- ❌ Ignore findings → ✅ Investigate each +- ❌ Full scan per edit → ✅ Scope to file +- ❌ Fix symptom (`if (x) { x.y }`) → ✅ Root cause (`x?.y`) +```` diff --git a/acceptance-criteria.md b/acceptance-criteria.md new file mode 100644 index 0000000..5c51d84 --- /dev/null +++ b/acceptance-criteria.md @@ -0,0 +1,64 @@ +# Trace/File-History Empty-Result Diagnostics + +## AC-1: Human mode shows searched paths on empty results + +When `lore trace <path>` returns 0 chains in human mode, the output includes the resolved path(s) that were searched. If renames were followed, show the full rename chain. + +## AC-2: Human mode shows actionable reason on empty results + +When 0 chains are found, the hint message distinguishes between: +- "No MR file changes synced yet" (mr_file_changes table is empty for this project) -> suggest `lore sync` +- "File paths not found in MR file changes" (sync has run but this file has no matches) -> suggest checking the path or that the file may predate the sync window + +## AC-3: Robot mode includes diagnostics object on empty results + +When `total_chains == 0` in robot JSON output, add a `"diagnostics"` key to `"meta"` containing: +- `paths_searched: [...]` (already present as `resolved_paths` in data -- no duplication needed) +- `hints: [string]` -- same actionable reasons as AC-2 but machine-readable + +## AC-4: Info-level logging at each pipeline stage + +Add `tracing::info!` calls visible with `-v`: +- After rename resolution: number of paths found +- After MR query: number of MRs found +- After issue/discussion enrichment: counts per MR + +## AC-5: Apply same pattern to `lore file-history` + +All of the above (AC-1 through AC-4) also apply to `lore file-history` empty results. + +--- + +# Secure Token Resolution for Cron + +## AC-6: Stored token in config + +The configuration file supports an optional `token` field in the `gitlab` section, allowing users to persist their GitLab personal access token alongside other settings. Existing configuration files that omit this field continue to load and function normally. + +## AC-7: Token resolution precedence + +Lore resolves the GitLab token by checking the environment variable first, then falling back to the stored config token. This means environment variables always take priority, preserving CI/CD workflows and one-off overrides, while the stored token provides a reliable default for non-interactive contexts like cron jobs. If neither source provides a non-empty value, the user receives a clear `TOKEN_NOT_SET` error with guidance on how to fix it. + +## AC-8: `lore token set` command + +The `lore token set` command provides a secure, guided workflow for storing a GitLab token. It accepts the token via a `--token` flag, standard input (for piped automation), or an interactive masked prompt. Before storing, it validates the token against the GitLab API to catch typos and expired credentials early. After writing the token to the configuration file, it restricts file permissions to owner-only read/write (mode 0600) to prevent other users on the system from reading the token. The command supports both human and robot output modes. + +## AC-9: `lore token show` command + +The `lore token show` command displays the currently active token along with its source ("config file" or "environment variable"). By default the token value is masked for safety; the `--unmask` flag reveals the full value when needed. The command supports both human and robot output modes. + +## AC-10: Consistent token resolution across all commands + +Every command that requires a GitLab token uses the same two-step resolution logic described in AC-7. This ensures that storing a token once via `lore token set` is sufficient to make all commands work, including background cron syncs that have no access to shell environment variables. + +## AC-11: Cron install warns about missing stored token + +When `lore cron install` completes, it checks whether a token is available in the configuration file. If not, it displays a prominent warning explaining that cron jobs cannot access shell environment variables and directs the user to run `lore token set` to ensure unattended syncs will authenticate successfully. + +## AC-12: `TOKEN_NOT_SET` error recommends `lore token set` + +The `TOKEN_NOT_SET` error message recommends `lore token set` as the primary fix for missing credentials, with the environment variable export shown as an alternative for users who prefer that approach. In robot mode, the `actions` array lists both options so that automated recovery workflows can act on them. + +## AC-13: Doctor reports token source + +The `lore doctor` command includes the token's source in its GitLab connectivity check, reporting whether the token was found in the configuration file or an environment variable. This makes it straightforward to verify that cron jobs will have access to the token without relying on the user's interactive shell environment. diff --git a/migrations/027_surgical_sync_runs.sql b/migrations/027_surgical_sync_runs.sql new file mode 100644 index 0000000..005f980 --- /dev/null +++ b/migrations/027_surgical_sync_runs.sql @@ -0,0 +1,20 @@ +-- Migration 027: Extend sync_runs for surgical sync observability +-- Adds mode/phase tracking and surgical-specific counters. + +ALTER TABLE sync_runs ADD COLUMN mode TEXT; +ALTER TABLE sync_runs ADD COLUMN phase TEXT; +ALTER TABLE sync_runs ADD COLUMN surgical_iids_json TEXT; +ALTER TABLE sync_runs ADD COLUMN issues_fetched INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN mrs_fetched INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN issues_ingested INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN mrs_ingested INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN skipped_stale INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN docs_regenerated INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN docs_embedded INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN warnings_count INTEGER NOT NULL DEFAULT 0; +ALTER TABLE sync_runs ADD COLUMN cancelled_at INTEGER; + +CREATE INDEX IF NOT EXISTS idx_sync_runs_mode_started + ON sync_runs(mode, started_at DESC); +CREATE INDEX IF NOT EXISTS idx_sync_runs_status_phase_started + ON sync_runs(status, phase, started_at DESC); diff --git a/src/cli/autocorrect.rs b/src/cli/autocorrect.rs index 1f19af5..39fcef3 100644 --- a/src/cli/autocorrect.rs +++ b/src/cli/autocorrect.rs @@ -130,6 +130,10 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[ "--no-dry-run", "--timings", "--lock", + "--issue", + "--mr", + "--project", + "--preflight-only", ], ), ( diff --git a/src/cli/commands/auth_test.rs b/src/cli/commands/auth_test.rs index 751e357..c06a17f 100644 --- a/src/cli/commands/auth_test.rs +++ b/src/cli/commands/auth_test.rs @@ -1,5 +1,5 @@ use crate::core::config::Config; -use crate::core::error::{LoreError, Result}; +use crate::core::error::Result; use crate::gitlab::GitLabClient; pub struct AuthTestResult { @@ -11,17 +11,7 @@ pub struct AuthTestResult { pub async fn run_auth_test(config_path: Option<&str>) -> Result<AuthTestResult> { let config = Config::load(config_path)?; - let token = std::env::var(&config.gitlab.token_env_var) - .map(|t| t.trim().to_string()) - .map_err(|_| LoreError::TokenNotSet { - env_var: config.gitlab.token_env_var.clone(), - })?; - - if token.is_empty() { - return Err(LoreError::TokenNotSet { - env_var: config.gitlab.token_env_var.clone(), - }); - } + let token = config.gitlab.resolve_token()?; let client = GitLabClient::new(&config.gitlab.base_url, &token, None); diff --git a/src/cli/commands/doctor.rs b/src/cli/commands/doctor.rs index 0b507d7..0a5e943 100644 --- a/src/cli/commands/doctor.rs +++ b/src/cli/commands/doctor.rs @@ -240,16 +240,15 @@ async fn check_gitlab(config: Option<&Config>) -> GitLabCheck { }; }; - let token = match std::env::var(&config.gitlab.token_env_var) { - Ok(t) if !t.trim().is_empty() => t.trim().to_string(), - _ => { + let token = match config.gitlab.resolve_token() { + Ok(t) => t, + Err(_) => { return GitLabCheck { result: CheckResult { status: CheckStatus::Error, - message: Some(format!( - "{} not set in environment", - config.gitlab.token_env_var - )), + message: Some( + "Token not set. Run 'lore token set' or export GITLAB_TOKEN.".to_string(), + ), }, url: Some(config.gitlab.base_url.clone()), username: None, @@ -257,6 +256,8 @@ async fn check_gitlab(config: Option<&Config>) -> GitLabCheck { } }; + let source = config.gitlab.token_source().unwrap_or("unknown"); + let client = GitLabClient::new(&config.gitlab.base_url, &token, None); match client.get_current_user().await { @@ -264,7 +265,7 @@ async fn check_gitlab(config: Option<&Config>) -> GitLabCheck { result: CheckResult { status: CheckStatus::Ok, message: Some(format!( - "{} (authenticated as @{})", + "{} (authenticated as @{}, token from {source})", config.gitlab.base_url, user.username )), }, diff --git a/src/cli/commands/drift.rs b/src/cli/commands/drift.rs index de85018..e656f3a 100644 --- a/src/cli/commands/drift.rs +++ b/src/cli/commands/drift.rs @@ -382,7 +382,7 @@ fn extract_drift_topics(description: &str, notes: &[NoteRow], drift_idx: usize) } let mut sorted: Vec<(String, usize)> = freq.into_iter().collect(); - sorted.sort_by(|a, b| b.1.cmp(&a.1)); + sorted.sort_by_key(|b| std::cmp::Reverse(b.1)); sorted .into_iter() diff --git a/src/cli/commands/ingest.rs b/src/cli/commands/ingest.rs index 54cb82b..2604298 100644 --- a/src/cli/commands/ingest.rs +++ b/src/cli/commands/ingest.rs @@ -293,10 +293,7 @@ async fn run_ingest_inner( ); lock.acquire(force)?; - let token = - std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet { - env_var: config.gitlab.token_env_var.clone(), - })?; + let token = config.gitlab.resolve_token()?; let client = GitLabClient::new( &config.gitlab.base_url, diff --git a/src/cli/commands/init.rs b/src/cli/commands/init.rs index 81daf26..6b4bf1d 100644 --- a/src/cli/commands/init.rs +++ b/src/cli/commands/init.rs @@ -1,9 +1,10 @@ use std::fs; +use std::io::{IsTerminal, Read}; -use crate::core::config::{MinimalConfig, MinimalGitLabConfig, ProjectConfig}; +use crate::core::config::{Config, MinimalConfig, MinimalGitLabConfig, ProjectConfig}; use crate::core::db::{create_connection, run_migrations}; use crate::core::error::{LoreError, Result}; -use crate::core::paths::{get_config_path, get_data_dir}; +use crate::core::paths::{ensure_config_permissions, get_config_path, get_data_dir}; use crate::gitlab::{GitLabClient, GitLabProject}; pub struct InitInputs { @@ -172,3 +173,115 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitRe default_project: inputs.default_project, }) } + +// ── token set / show ── + +pub struct TokenSetResult { + pub username: String, + pub config_path: String, +} + +pub struct TokenShowResult { + pub token: String, + pub source: &'static str, +} + +/// Read token from --token flag or stdin, validate against GitLab, store in config. +pub async fn run_token_set( + config_path_override: Option<&str>, + token_arg: Option<String>, +) -> Result<TokenSetResult> { + let config_path = get_config_path(config_path_override); + + if !config_path.exists() { + return Err(LoreError::ConfigNotFound { + path: config_path.display().to_string(), + }); + } + + // Resolve token value: flag > stdin > error + let token = if let Some(t) = token_arg { + t.trim().to_string() + } else if !std::io::stdin().is_terminal() { + let mut buf = String::new(); + std::io::stdin() + .read_to_string(&mut buf) + .map_err(|e| LoreError::Other(format!("Failed to read token from stdin: {e}")))?; + buf.trim().to_string() + } else { + return Err(LoreError::Other( + "No token provided. Use --token or pipe to stdin.".to_string(), + )); + }; + + if token.is_empty() { + return Err(LoreError::Other("Token cannot be empty.".to_string())); + } + + // Load config to get the base URL for validation + let config = Config::load(config_path_override)?; + + // Validate token against GitLab + let client = GitLabClient::new(&config.gitlab.base_url, &token, None); + let user = client.get_current_user().await.map_err(|e| { + if matches!(e, LoreError::GitLabAuthFailed) { + LoreError::Other("Token validation failed: authentication rejected by GitLab.".into()) + } else { + e + } + })?; + + // Read config as raw JSON, insert token, write back + let content = fs::read_to_string(&config_path) + .map_err(|e| LoreError::Other(format!("Failed to read config file: {e}")))?; + + let mut json: serde_json::Value = + serde_json::from_str(&content).map_err(|e| LoreError::ConfigInvalid { + details: format!("Invalid JSON in config file: {e}"), + })?; + + json["gitlab"]["token"] = serde_json::Value::String(token); + + let output = serde_json::to_string_pretty(&json) + .map_err(|e| LoreError::Other(format!("Failed to serialize config: {e}")))?; + fs::write(&config_path, format!("{output}\n"))?; + + // Enforce permissions + ensure_config_permissions(&config_path); + + Ok(TokenSetResult { + username: user.username, + config_path: config_path.display().to_string(), + }) +} + +/// Show the current token (masked or unmasked) and its source. +pub fn run_token_show(config_path_override: Option<&str>, unmask: bool) -> Result<TokenShowResult> { + let config = Config::load(config_path_override)?; + + let source = config + .gitlab + .token_source() + .ok_or_else(|| LoreError::TokenNotSet { + env_var: config.gitlab.token_env_var.clone(), + })?; + + let token = config.gitlab.resolve_token()?; + + let display_token = if unmask { token } else { mask_token(&token) }; + + Ok(TokenShowResult { + token: display_token, + source, + }) +} + +fn mask_token(token: &str) -> String { + let len = token.len(); + if len <= 8 { + "*".repeat(len) + } else { + let visible = &token[..4]; + format!("{visible}{}", "*".repeat(len - 4)) + } +} diff --git a/src/cli/commands/list_tests.rs b/src/cli/commands/list_tests.rs index 19775cd..00a3946 100644 --- a/src/cli/commands/list_tests.rs +++ b/src/cli/commands/list_tests.rs @@ -95,6 +95,7 @@ fn test_config(default_project: Option<&str>) -> Config { gitlab: GitLabConfig { base_url: "https://gitlab.example.com".to_string(), token_env_var: "GITLAB_TOKEN".to_string(), + token: None, }, projects: vec![ProjectConfig { path: "group/project".to_string(), diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index 40e683e..d359bb6 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -15,6 +15,7 @@ pub mod show; pub mod stats; pub mod sync; pub mod sync_status; +pub mod sync_surgical; pub mod timeline; pub mod trace; pub mod who; @@ -39,7 +40,7 @@ pub use ingest::{ DryRunPreview, IngestDisplay, print_dry_run_preview, print_dry_run_preview_json, print_ingest_summary, print_ingest_summary_json, run_ingest, run_ingest_dry_run, }; -pub use init::{InitInputs, InitOptions, InitResult, run_init}; +pub use init::{InitInputs, InitOptions, InitResult, run_init, run_token_set, run_token_show}; pub use list::{ ListFilters, MrListFilters, NoteListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json, @@ -55,6 +56,7 @@ pub use show::{ pub use stats::{print_stats, print_stats_json, run_stats}; pub use sync::{SyncOptions, SyncResult, print_sync, print_sync_json, run_sync}; pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status}; +pub use sync_surgical::run_sync_surgical; pub use timeline::{TimelineParams, print_timeline, print_timeline_json_with_meta, run_timeline}; pub use trace::{parse_trace_path, print_trace, print_trace_json}; pub use who::{WhoRun, print_who_human, print_who_json, run_who}; diff --git a/src/cli/commands/sync.rs b/src/cli/commands/sync.rs index 65d2a6e..b8f30f0 100644 --- a/src/cli/commands/sync.rs +++ b/src/cli/commands/sync.rs @@ -16,6 +16,7 @@ use super::ingest::{ DryRunPreview, IngestDisplay, ProjectStatusEnrichment, ProjectSummary, run_ingest, run_ingest_dry_run, }; +use super::sync_surgical::run_sync_surgical; #[derive(Debug, Default)] pub struct SyncOptions { @@ -26,6 +27,35 @@ pub struct SyncOptions { pub no_events: bool, pub robot_mode: bool, pub dry_run: bool, + pub issue_iids: Vec<u64>, + pub mr_iids: Vec<u64>, + pub project: Option<String>, + pub preflight_only: bool, +} + +impl SyncOptions { + pub const MAX_SURGICAL_TARGETS: usize = 100; + + pub fn is_surgical(&self) -> bool { + !self.issue_iids.is_empty() || !self.mr_iids.is_empty() + } +} + +#[derive(Debug, Default, Serialize)] +pub struct SurgicalIids { + pub issues: Vec<u64>, + pub merge_requests: Vec<u64>, +} + +#[derive(Debug, Serialize)] +pub struct EntitySyncResult { + pub entity_type: String, + pub iid: u64, + pub outcome: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option<String>, + #[serde(skip_serializing_if = "Option::is_none")] + pub toctou_reason: Option<String>, } #[derive(Debug, Default, Serialize)] @@ -45,6 +75,14 @@ pub struct SyncResult { pub embedding_failed: usize, pub status_enrichment_errors: usize, pub statuses_enriched: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub surgical_mode: Option<bool>, + #[serde(skip_serializing_if = "Option::is_none")] + pub surgical_iids: Option<SurgicalIids>, + #[serde(skip_serializing_if = "Option::is_none")] + pub entity_results: Option<Vec<EntitySyncResult>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub preflight_only: Option<bool>, #[serde(skip)] pub issue_projects: Vec<ProjectSummary>, #[serde(skip)] @@ -66,6 +104,11 @@ pub async fn run_sync( run_id: Option<&str>, signal: &ShutdownSignal, ) -> Result<SyncResult> { + // Surgical dispatch: if any IIDs specified, route to surgical pipeline + if options.is_surgical() { + return run_sync_surgical(config, options, run_id, signal).await; + } + let generated_id; let run_id = match run_id { Some(id) => id, @@ -893,6 +936,22 @@ pub fn print_sync_dry_run_json(result: &SyncDryRunResult) { mod tests { use super::*; + fn default_options() -> SyncOptions { + SyncOptions { + full: false, + force: false, + no_embed: false, + no_docs: false, + no_events: false, + robot_mode: false, + dry_run: false, + issue_iids: vec![], + mr_iids: vec![], + project: None, + preflight_only: false, + } + } + #[test] fn append_failures_skips_zeroes() { let mut summary = "base".to_string(); @@ -1035,4 +1094,112 @@ mod tests { assert!(rows[0].contains("0 statuses updated")); assert!(rows[0].contains("skipped (disabled)")); } + + #[test] + fn is_surgical_with_issues() { + let opts = SyncOptions { + issue_iids: vec![1], + ..default_options() + }; + assert!(opts.is_surgical()); + } + + #[test] + fn is_surgical_with_mrs() { + let opts = SyncOptions { + mr_iids: vec![10], + ..default_options() + }; + assert!(opts.is_surgical()); + } + + #[test] + fn is_surgical_empty() { + let opts = default_options(); + assert!(!opts.is_surgical()); + } + + #[test] + fn max_surgical_targets_is_100() { + assert_eq!(SyncOptions::MAX_SURGICAL_TARGETS, 100); + } + + #[test] + fn sync_result_default_omits_surgical_fields() { + let result = SyncResult::default(); + let json = serde_json::to_value(&result).unwrap(); + assert!(json.get("surgical_mode").is_none()); + assert!(json.get("surgical_iids").is_none()); + assert!(json.get("entity_results").is_none()); + assert!(json.get("preflight_only").is_none()); + } + + #[test] + fn sync_result_with_surgical_fields_serializes_correctly() { + let result = SyncResult { + surgical_mode: Some(true), + surgical_iids: Some(SurgicalIids { + issues: vec![7, 42], + merge_requests: vec![10], + }), + entity_results: Some(vec![ + EntitySyncResult { + entity_type: "issue".to_string(), + iid: 7, + outcome: "synced".to_string(), + error: None, + toctou_reason: None, + }, + EntitySyncResult { + entity_type: "issue".to_string(), + iid: 42, + outcome: "skipped_toctou".to_string(), + error: None, + toctou_reason: Some("updated_at changed".to_string()), + }, + ]), + preflight_only: Some(false), + ..SyncResult::default() + }; + let json = serde_json::to_value(&result).unwrap(); + assert_eq!(json["surgical_mode"], true); + assert_eq!(json["surgical_iids"]["issues"], serde_json::json!([7, 42])); + assert_eq!(json["entity_results"].as_array().unwrap().len(), 2); + assert_eq!(json["entity_results"][1]["outcome"], "skipped_toctou"); + assert_eq!(json["preflight_only"], false); + } + + #[test] + fn entity_sync_result_omits_none_fields() { + let entity = EntitySyncResult { + entity_type: "merge_request".to_string(), + iid: 10, + outcome: "synced".to_string(), + error: None, + toctou_reason: None, + }; + let json = serde_json::to_value(&entity).unwrap(); + assert!(json.get("error").is_none()); + assert!(json.get("toctou_reason").is_none()); + assert!(json.get("entity_type").is_some()); + } + + #[test] + fn is_surgical_with_both_issues_and_mrs() { + let opts = SyncOptions { + issue_iids: vec![1, 2], + mr_iids: vec![10], + ..default_options() + }; + assert!(opts.is_surgical()); + } + + #[test] + fn is_not_surgical_with_only_project() { + let opts = SyncOptions { + project: Some("group/repo".to_string()), + ..default_options() + }; + assert!(!opts.is_surgical()); + } } diff --git a/src/cli/commands/sync_surgical.rs b/src/cli/commands/sync_surgical.rs new file mode 100644 index 0000000..f79760f --- /dev/null +++ b/src/cli/commands/sync_surgical.rs @@ -0,0 +1,715 @@ +use std::time::Instant; + +use tracing::{Instrument, debug, info, warn}; + +use crate::Config; +use crate::cli::commands::sync::{EntitySyncResult, SurgicalIids, SyncOptions, SyncResult}; +use crate::cli::progress::{format_stage_line, stage_spinner_v2}; +use crate::cli::render::{Icons, Theme}; +use crate::core::db::{LATEST_SCHEMA_VERSION, create_connection, get_schema_version}; +use crate::core::error::{LoreError, Result}; +use crate::core::lock::{AppLock, LockOptions}; +use crate::core::paths::get_db_path; +use crate::core::project::resolve_project; +use crate::core::shutdown::ShutdownSignal; +use crate::core::sync_run::SyncRunRecorder; +use crate::documents::{SourceType, regenerate_dirty_documents_for_sources}; +use crate::embedding::ollama::{OllamaClient, OllamaConfig}; +use crate::embedding::pipeline::{DEFAULT_EMBED_CONCURRENCY, embed_documents_by_ids}; +use crate::gitlab::GitLabClient; +use crate::ingestion::surgical::{ + fetch_dependents_for_issue, fetch_dependents_for_mr, ingest_issue_by_iid, ingest_mr_by_iid, + preflight_fetch, +}; + +pub async fn run_sync_surgical( + config: &Config, + options: SyncOptions, + run_id: Option<&str>, + signal: &ShutdownSignal, +) -> Result<SyncResult> { + // ── Generate run_id ── + let generated_id; + let run_id = match run_id { + Some(id) => id, + None => { + generated_id = uuid::Uuid::new_v4().simple().to_string(); + &generated_id[..8] + } + }; + let span = tracing::info_span!("surgical_sync", %run_id); + + async move { + let pipeline_start = Instant::now(); + let mut result = SyncResult { + run_id: run_id.to_string(), + surgical_mode: Some(true), + surgical_iids: Some(SurgicalIids { + issues: options.issue_iids.clone(), + merge_requests: options.mr_iids.clone(), + }), + ..SyncResult::default() + }; + let mut entity_results: Vec<EntitySyncResult> = Vec::new(); + + // ── Resolve project ── + let project_str = options.project.as_deref().ok_or_else(|| { + LoreError::Other( + "Surgical sync requires --project. Specify the project path.".to_string(), + ) + })?; + + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + + let schema_version = get_schema_version(&conn); + if schema_version < LATEST_SCHEMA_VERSION { + return Err(LoreError::MigrationFailed { + version: schema_version, + message: format!( + "Database is at schema version {schema_version} but {LATEST_SCHEMA_VERSION} is required. \ + Run 'lore sync' first to apply migrations." + ), + source: None, + }); + } + + let project_id = resolve_project(&conn, project_str)?; + + let gitlab_project_id: i64 = conn.query_row( + "SELECT gitlab_project_id FROM projects WHERE id = ?1", + [project_id], + |row| row.get(0), + )?; + + debug!( + project_str, + project_id, + gitlab_project_id, + "Resolved project for surgical sync" + ); + + // ── Start recorder ── + let recorder_conn = create_connection(&db_path)?; + let recorder = SyncRunRecorder::start(&recorder_conn, "surgical-sync", run_id)?; + + let iids_json = serde_json::to_string(&SurgicalIids { + issues: options.issue_iids.clone(), + merge_requests: options.mr_iids.clone(), + }) + .unwrap_or_else(|_| "{}".to_string()); + + recorder.set_surgical_metadata(&recorder_conn, "surgical", "preflight", &iids_json)?; + + // Wrap recorder in Option for consuming terminal methods + let mut recorder = Some(recorder); + + // ── Build GitLab client ── + let token = config.gitlab.resolve_token()?; + let client = GitLabClient::new( + &config.gitlab.base_url, + &token, + Some(config.sync.requests_per_second), + ); + + // ── Build targets list ── + let mut targets: Vec<(String, i64)> = Vec::new(); + for iid in &options.issue_iids { + targets.push(("issue".to_string(), *iid as i64)); + } + for iid in &options.mr_iids { + targets.push(("merge_request".to_string(), *iid as i64)); + } + + // ── Stage: Preflight ── + let stage_start = Instant::now(); + let spinner = + stage_spinner_v2(Icons::sync(), "Preflight", "fetching...", options.robot_mode); + + info!(targets = targets.len(), "Preflight: fetching entities from GitLab"); + let preflight = preflight_fetch(&client, gitlab_project_id, &targets).await; + + // Record preflight failures + for failure in &preflight.failures { + let is_not_found = matches!(&failure.error, LoreError::GitLabNotFound { .. }); + entity_results.push(EntitySyncResult { + entity_type: failure.entity_type.clone(), + iid: failure.iid as u64, + outcome: if is_not_found { + "not_found".to_string() + } else { + "preflight_failed".to_string() + }, + error: Some(failure.error.to_string()), + toctou_reason: None, + }); + if let Some(ref rec) = recorder { + let _ = rec.record_entity_result(&recorder_conn, &failure.entity_type, "warning"); + } + } + + let preflight_summary = format!( + "{} issues, {} MRs fetched ({} failed)", + preflight.issues.len(), + preflight.merge_requests.len(), + preflight.failures.len() + ); + let preflight_icon = color_icon( + if preflight.failures.is_empty() { + Icons::success() + } else { + Icons::warning() + }, + !preflight.failures.is_empty(), + ); + emit_stage_line( + &spinner, + &preflight_icon, + "Preflight", + &preflight_summary, + stage_start.elapsed(), + options.robot_mode, + ); + + // ── Preflight-only early return ── + if options.preflight_only { + result.preflight_only = Some(true); + result.entity_results = Some(entity_results); + if let Some(rec) = recorder.take() { + rec.succeed(&recorder_conn, &[], 0, preflight.failures.len())?; + } + return Ok(result); + } + + // ── Check cancellation ── + if signal.is_cancelled() { + if let Some(rec) = recorder.take() { + rec.cancel(&recorder_conn, "cancelled before ingest")?; + } + result.entity_results = Some(entity_results); + return Ok(result); + } + + // ── Acquire lock ── + let lock_conn = create_connection(&db_path)?; + let mut lock = AppLock::new( + lock_conn, + LockOptions { + name: "sync".to_string(), + stale_lock_minutes: config.sync.stale_lock_minutes, + heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds, + }, + ); + lock.acquire(options.force)?; + + // Wrap the rest in a closure-like block to ensure lock release on error + let pipeline_result = run_pipeline_stages( + &conn, + &recorder_conn, + config, + &client, + &options, + &preflight, + project_id, + gitlab_project_id, + &mut entity_results, + &mut result, + recorder.as_ref(), + signal, + ) + .await; + + match pipeline_result { + Ok(()) => { + // ── Finalize: succeed ── + if let Some(ref rec) = recorder { + let _ = rec.update_phase(&recorder_conn, "finalize"); + } + let total_items = result.issues_updated + + result.mrs_updated + + result.documents_regenerated + + result.documents_embedded; + let total_errors = result.documents_errored + + result.embedding_failed + + entity_results + .iter() + .filter(|e| e.outcome != "synced" && e.outcome != "skipped_stale") + .count(); + if let Some(rec) = recorder.take() { + rec.succeed(&recorder_conn, &[], total_items, total_errors)?; + } + } + Err(ref e) => { + if let Some(rec) = recorder.take() { + let _ = rec.fail(&recorder_conn, &e.to_string(), None); + } + } + } + + lock.release(); + + // Propagate error after cleanup + pipeline_result?; + + result.entity_results = Some(entity_results); + + let elapsed = pipeline_start.elapsed(); + debug!( + elapsed_ms = elapsed.as_millis(), + issues = result.issues_updated, + mrs = result.mrs_updated, + docs = result.documents_regenerated, + embedded = result.documents_embedded, + "Surgical sync pipeline complete" + ); + + Ok(result) + } + .instrument(span) + .await +} + +#[allow(clippy::too_many_arguments)] +async fn run_pipeline_stages( + conn: &rusqlite::Connection, + recorder_conn: &rusqlite::Connection, + config: &Config, + client: &GitLabClient, + options: &SyncOptions, + preflight: &crate::ingestion::surgical::PreflightResult, + project_id: i64, + gitlab_project_id: i64, + entity_results: &mut Vec<EntitySyncResult>, + result: &mut SyncResult, + recorder: Option<&SyncRunRecorder>, + signal: &ShutdownSignal, +) -> Result<()> { + let mut all_dirty_source_keys: Vec<(SourceType, i64)> = Vec::new(); + + // ── Stage: Ingest ── + if let Some(rec) = recorder { + rec.update_phase(recorder_conn, "ingest")?; + } + + let stage_start = Instant::now(); + let spinner = stage_spinner_v2(Icons::sync(), "Ingest", "processing...", options.robot_mode); + + // Ingest issues + for issue in &preflight.issues { + match ingest_issue_by_iid(conn, config, project_id, issue) { + Ok(ingest_result) => { + if ingest_result.skipped_stale { + entity_results.push(EntitySyncResult { + entity_type: "issue".to_string(), + iid: issue.iid as u64, + outcome: "skipped_stale".to_string(), + error: None, + toctou_reason: Some("updated_at not newer than DB".to_string()), + }); + if let Some(rec) = recorder { + let _ = rec.record_entity_result(recorder_conn, "issue", "skipped_stale"); + } + } else { + result.issues_updated += 1; + all_dirty_source_keys.extend(ingest_result.dirty_source_keys); + entity_results.push(EntitySyncResult { + entity_type: "issue".to_string(), + iid: issue.iid as u64, + outcome: "synced".to_string(), + error: None, + toctou_reason: None, + }); + if let Some(rec) = recorder { + let _ = rec.record_entity_result(recorder_conn, "issue", "ingested"); + } + } + } + Err(e) => { + warn!(iid = issue.iid, error = %e, "Failed to ingest issue"); + entity_results.push(EntitySyncResult { + entity_type: "issue".to_string(), + iid: issue.iid as u64, + outcome: "error".to_string(), + error: Some(e.to_string()), + toctou_reason: None, + }); + if let Some(rec) = recorder { + let _ = rec.record_entity_result(recorder_conn, "issue", "warning"); + } + } + } + } + + // Ingest MRs + for mr in &preflight.merge_requests { + match ingest_mr_by_iid(conn, config, project_id, mr) { + Ok(ingest_result) => { + if ingest_result.skipped_stale { + entity_results.push(EntitySyncResult { + entity_type: "merge_request".to_string(), + iid: mr.iid as u64, + outcome: "skipped_stale".to_string(), + error: None, + toctou_reason: Some("updated_at not newer than DB".to_string()), + }); + if let Some(rec) = recorder { + let _ = rec.record_entity_result(recorder_conn, "mr", "skipped_stale"); + } + } else { + result.mrs_updated += 1; + all_dirty_source_keys.extend(ingest_result.dirty_source_keys); + entity_results.push(EntitySyncResult { + entity_type: "merge_request".to_string(), + iid: mr.iid as u64, + outcome: "synced".to_string(), + error: None, + toctou_reason: None, + }); + if let Some(rec) = recorder { + let _ = rec.record_entity_result(recorder_conn, "mr", "ingested"); + } + } + } + Err(e) => { + warn!(iid = mr.iid, error = %e, "Failed to ingest MR"); + entity_results.push(EntitySyncResult { + entity_type: "merge_request".to_string(), + iid: mr.iid as u64, + outcome: "error".to_string(), + error: Some(e.to_string()), + toctou_reason: None, + }); + if let Some(rec) = recorder { + let _ = rec.record_entity_result(recorder_conn, "mr", "warning"); + } + } + } + } + + let ingest_summary = format!( + "{} issues, {} MRs ingested", + result.issues_updated, result.mrs_updated + ); + let ingest_icon = color_icon(Icons::success(), false); + emit_stage_line( + &spinner, + &ingest_icon, + "Ingest", + &ingest_summary, + stage_start.elapsed(), + options.robot_mode, + ); + + // ── Check cancellation ── + if signal.is_cancelled() { + debug!("Shutdown requested after ingest stage"); + return Ok(()); + } + + // ── Stage: Dependents ── + if let Some(rec) = recorder { + rec.update_phase(recorder_conn, "dependents")?; + } + + let stage_start = Instant::now(); + let spinner = stage_spinner_v2( + Icons::sync(), + "Dependents", + "fetching...", + options.robot_mode, + ); + + let mut total_discussions: usize = 0; + let mut total_events: usize = 0; + + // Fetch dependents for successfully ingested issues + for issue in &preflight.issues { + // Only fetch dependents for entities that were actually ingested + let was_ingested = entity_results.iter().any(|e| { + e.entity_type == "issue" && e.iid == issue.iid as u64 && e.outcome == "synced" + }); + if !was_ingested { + continue; + } + + let local_id: i64 = match conn.query_row( + "SELECT id FROM issues WHERE project_id = ?1 AND iid = ?2", + (project_id, issue.iid), + |row| row.get(0), + ) { + Ok(id) => id, + Err(e) => { + warn!(iid = issue.iid, error = %e, "Could not find local issue ID for dependents"); + continue; + } + }; + + match fetch_dependents_for_issue( + client, + conn, + project_id, + gitlab_project_id, + issue.iid, + local_id, + config, + ) + .await + { + Ok(dep_result) => { + total_discussions += dep_result.discussions_fetched; + total_events += dep_result.resource_events_fetched; + result.discussions_fetched += dep_result.discussions_fetched; + result.resource_events_fetched += dep_result.resource_events_fetched; + } + Err(e) => { + warn!(iid = issue.iid, error = %e, "Failed to fetch dependents for issue"); + } + } + } + + // Fetch dependents for successfully ingested MRs + for mr in &preflight.merge_requests { + let was_ingested = entity_results.iter().any(|e| { + e.entity_type == "merge_request" && e.iid == mr.iid as u64 && e.outcome == "synced" + }); + if !was_ingested { + continue; + } + + let local_id: i64 = match conn.query_row( + "SELECT id FROM merge_requests WHERE project_id = ?1 AND iid = ?2", + (project_id, mr.iid), + |row| row.get(0), + ) { + Ok(id) => id, + Err(e) => { + warn!(iid = mr.iid, error = %e, "Could not find local MR ID for dependents"); + continue; + } + }; + + match fetch_dependents_for_mr( + client, + conn, + project_id, + gitlab_project_id, + mr.iid, + local_id, + config, + ) + .await + { + Ok(dep_result) => { + total_discussions += dep_result.discussions_fetched; + total_events += dep_result.resource_events_fetched; + result.discussions_fetched += dep_result.discussions_fetched; + result.resource_events_fetched += dep_result.resource_events_fetched; + result.mr_diffs_fetched += dep_result.file_changes_stored; + } + Err(e) => { + warn!(iid = mr.iid, error = %e, "Failed to fetch dependents for MR"); + } + } + } + + let dep_summary = format!("{} discussions, {} events", total_discussions, total_events); + let dep_icon = color_icon(Icons::success(), false); + emit_stage_line( + &spinner, + &dep_icon, + "Dependents", + &dep_summary, + stage_start.elapsed(), + options.robot_mode, + ); + + // ── Check cancellation ── + if signal.is_cancelled() { + debug!("Shutdown requested after dependents stage"); + return Ok(()); + } + + // ── Stage: Docs ── + if !options.no_docs && !all_dirty_source_keys.is_empty() { + if let Some(rec) = recorder { + rec.update_phase(recorder_conn, "docs")?; + } + + let stage_start = Instant::now(); + let spinner = + stage_spinner_v2(Icons::sync(), "Docs", "regenerating...", options.robot_mode); + + let docs_result = regenerate_dirty_documents_for_sources(conn, &all_dirty_source_keys)?; + result.documents_regenerated = docs_result.regenerated; + result.documents_errored = docs_result.errored; + + for _ in 0..docs_result.regenerated { + if let Some(rec) = recorder { + let _ = rec.record_entity_result(recorder_conn, "doc", "regenerated"); + } + } + + let docs_summary = format!("{} documents regenerated", result.documents_regenerated); + let docs_icon = color_icon( + if docs_result.errored > 0 { + Icons::warning() + } else { + Icons::success() + }, + docs_result.errored > 0, + ); + emit_stage_line( + &spinner, + &docs_icon, + "Docs", + &docs_summary, + stage_start.elapsed(), + options.robot_mode, + ); + + // ── Check cancellation ── + if signal.is_cancelled() { + debug!("Shutdown requested after docs stage"); + return Ok(()); + } + + // ── Stage: Embed ── + if !options.no_embed && !docs_result.document_ids.is_empty() { + if let Some(rec) = recorder { + rec.update_phase(recorder_conn, "embed")?; + } + + let stage_start = Instant::now(); + let spinner = + stage_spinner_v2(Icons::sync(), "Embed", "embedding...", options.robot_mode); + + let ollama_config = OllamaConfig { + base_url: config.embedding.base_url.clone(), + model: config.embedding.model.clone(), + ..OllamaConfig::default() + }; + let ollama_client = OllamaClient::new(ollama_config); + + let model_name = &config.embedding.model; + let concurrency = if config.embedding.concurrency > 0 { + config.embedding.concurrency as usize + } else { + DEFAULT_EMBED_CONCURRENCY + }; + + match embed_documents_by_ids( + conn, + &ollama_client, + model_name, + concurrency, + &docs_result.document_ids, + signal, + ) + .await + { + Ok(embed_result) => { + result.documents_embedded = embed_result.docs_embedded; + result.embedding_failed = embed_result.failed; + + for _ in 0..embed_result.docs_embedded { + if let Some(rec) = recorder { + let _ = rec.record_entity_result(recorder_conn, "doc", "embedded"); + } + } + + let embed_summary = format!("{} chunks embedded", embed_result.chunks_embedded); + let embed_icon = color_icon( + if embed_result.failed > 0 { + Icons::warning() + } else { + Icons::success() + }, + embed_result.failed > 0, + ); + emit_stage_line( + &spinner, + &embed_icon, + "Embed", + &embed_summary, + stage_start.elapsed(), + options.robot_mode, + ); + } + Err(e) => { + let warn_summary = format!("skipped ({})", e); + let warn_icon = color_icon(Icons::warning(), true); + emit_stage_line( + &spinner, + &warn_icon, + "Embed", + &warn_summary, + stage_start.elapsed(), + options.robot_mode, + ); + warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing"); + } + } + } + } + + Ok(()) +} + +/// Apply semantic color to a stage-completion icon glyph. +fn color_icon(icon: &str, has_errors: bool) -> String { + if has_errors { + Theme::warning().render(icon) + } else { + Theme::success().render(icon) + } +} + +fn emit_stage_line( + pb: &indicatif::ProgressBar, + icon: &str, + label: &str, + summary: &str, + elapsed: std::time::Duration, + robot_mode: bool, +) { + pb.finish_and_clear(); + if !robot_mode { + crate::cli::progress::multi().suspend(|| { + println!("{}", format_stage_line(icon, label, summary, elapsed)); + }); + } +} + +#[cfg(test)] +mod tests { + use crate::cli::commands::sync::SyncOptions; + + #[test] + fn sync_options_is_surgical_required() { + let opts = SyncOptions { + issue_iids: vec![1], + project: Some("group/repo".to_string()), + ..SyncOptions::default() + }; + assert!(opts.is_surgical()); + } + + #[test] + fn sync_options_surgical_with_mrs() { + let opts = SyncOptions { + mr_iids: vec![10, 20], + project: Some("group/repo".to_string()), + ..SyncOptions::default() + }; + assert!(opts.is_surgical()); + } + + #[test] + fn sync_options_not_surgical_without_iids() { + let opts = SyncOptions { + project: Some("group/repo".to_string()), + ..SyncOptions::default() + }; + assert!(!opts.is_surgical()); + } +} diff --git a/src/cli/commands/who/mod.rs b/src/cli/commands/who/mod.rs index 6495621..b880d5c 100644 --- a/src/cli/commands/who/mod.rs +++ b/src/cli/commands/who/mod.rs @@ -143,6 +143,8 @@ pub fn run_who(config: &Config, args: &WhoArgs) -> Result<WhoRun> { "none" }; + let limit = args.limit.map_or(usize::MAX, usize::from); + match mode { WhoMode::Expert { path } => { // Compute as_of first so --since durations are relative to it. @@ -159,7 +161,6 @@ pub fn run_who(config: &Config, args: &WhoArgs) -> Result<WhoRun> { } else { resolve_since_from(args.since.as_deref(), "24m", as_of_ms)? }; - let limit = usize::from(args.limit); let result = expert::query_expert( &conn, &path, @@ -191,7 +192,7 @@ pub fn run_who(config: &Config, args: &WhoArgs) -> Result<WhoRun> { .as_deref() .map(resolve_since_required) .transpose()?; - let limit = usize::from(args.limit); + let result = workload::query_workload( &conn, username, @@ -231,7 +232,7 @@ pub fn run_who(config: &Config, args: &WhoArgs) -> Result<WhoRun> { } WhoMode::Active => { let since_ms = resolve_since(args.since.as_deref(), "7d")?; - let limit = usize::from(args.limit); + let result = active::query_active(&conn, project_id, since_ms, limit, args.include_closed)?; Ok(WhoRun { @@ -249,7 +250,7 @@ pub fn run_who(config: &Config, args: &WhoArgs) -> Result<WhoRun> { } WhoMode::Overlap { path } => { let since_ms = resolve_since(args.since.as_deref(), "30d")?; - let limit = usize::from(args.limit); + let result = overlap::query_overlap(&conn, &path, project_id, since_ms, limit)?; Ok(WhoRun { resolved_input: WhoResolvedInput { diff --git a/src/cli/commands/who/reviews.rs b/src/cli/commands/who/reviews.rs index 1f5b384..f3814c1 100644 --- a/src/cli/commands/who/reviews.rs +++ b/src/cli/commands/who/reviews.rs @@ -105,7 +105,7 @@ pub(super) fn query_reviews( }) .collect(); - categories.sort_by(|a, b| b.count.cmp(&a.count)); + categories.sort_by_key(|b| std::cmp::Reverse(b.count)); Ok(ReviewsResult { username: username.to_string(), diff --git a/src/cli/commands/who/types.rs b/src/cli/commands/who/types.rs index 6418270..f6378a2 100644 --- a/src/cli/commands/who/types.rs +++ b/src/cli/commands/who/types.rs @@ -18,7 +18,7 @@ pub struct WhoResolvedInput { pub since_iso: Option<String>, /// "default" (mode default applied), "explicit" (user provided --since), "none" (no window) pub since_mode: String, - pub limit: u16, + pub limit: Option<u16>, } /// Top-level result enum -- one variant per mode. diff --git a/src/cli/commands/who_tests.rs b/src/cli/commands/who_tests.rs index 1707963..22cc01b 100644 --- a/src/cli/commands/who_tests.rs +++ b/src/cli/commands/who_tests.rs @@ -286,7 +286,7 @@ fn test_is_file_path_discrimination() { reviews: false, since: None, project: None, - limit: 20, + limit: None, detail: false, no_detail: false, fields: None, @@ -310,7 +310,7 @@ fn test_is_file_path_discrimination() { reviews: false, since: None, project: None, - limit: 20, + limit: None, detail: false, no_detail: false, fields: None, @@ -334,7 +334,7 @@ fn test_is_file_path_discrimination() { reviews: false, since: None, project: None, - limit: 20, + limit: None, detail: false, no_detail: false, fields: None, @@ -358,7 +358,7 @@ fn test_is_file_path_discrimination() { reviews: true, since: None, project: None, - limit: 20, + limit: None, detail: false, no_detail: false, fields: None, @@ -382,7 +382,7 @@ fn test_is_file_path_discrimination() { reviews: false, since: None, project: None, - limit: 20, + limit: None, detail: false, no_detail: false, fields: None, @@ -406,7 +406,7 @@ fn test_is_file_path_discrimination() { reviews: false, since: None, project: None, - limit: 20, + limit: None, detail: false, no_detail: false, fields: None, @@ -431,7 +431,7 @@ fn test_detail_rejected_outside_expert_mode() { reviews: false, since: None, project: None, - limit: 20, + limit: None, detail: true, no_detail: false, fields: None, @@ -460,7 +460,7 @@ fn test_detail_allowed_in_expert_mode() { reviews: false, since: None, project: None, - limit: 20, + limit: None, detail: true, no_detail: false, fields: None, diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 1ec545e..5f0171d 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -4,7 +4,7 @@ pub mod progress; pub mod render; pub mod robot; -use clap::{Parser, Subcommand}; +use clap::{Args, Parser, Subcommand}; use std::io::IsTerminal; #[derive(Parser)] @@ -298,6 +298,15 @@ pub enum Commands { lore cron uninstall # Remove cron job")] Cron(CronArgs), + /// Manage stored GitLab token + #[command(after_help = "\x1b[1mExamples:\x1b[0m + lore token set # Interactive token entry + validation + lore token set --token glpat-xxx # Non-interactive token storage + echo glpat-xxx | lore token set # Pipe token from stdin + lore token show # Show token (masked) + lore token show --unmask # Show full token")] + Token(TokenArgs), + #[command(hide = true)] List { #[arg(value_parser = ["issues", "mrs"])] @@ -798,7 +807,9 @@ pub struct GenerateDocsArgs { lore sync --no-embed # Skip embedding step lore sync --no-status # Skip work-item status enrichment lore sync --full --force # Full re-sync, override stale lock - lore sync --dry-run # Preview what would change")] + lore sync --dry-run # Preview what would change + lore sync --issue 42 -p group/repo # Surgically sync one issue + lore sync --mr 10 --mr 20 -p g/r # Surgically sync two MRs")] pub struct SyncArgs { /// Reset cursors, fetch everything #[arg(long, overrides_with = "no_full")] @@ -848,6 +859,22 @@ pub struct SyncArgs { /// Acquire file lock before syncing (skip if another sync is running) #[arg(long)] pub lock: bool, + + /// Surgically sync specific issues by IID (repeatable, must be positive) + #[arg(long, value_parser = clap::value_parser!(u64).range(1..), action = clap::ArgAction::Append)] + pub issue: Vec<u64>, + + /// Surgically sync specific merge requests by IID (repeatable, must be positive) + #[arg(long, value_parser = clap::value_parser!(u64).range(1..), action = clap::ArgAction::Append)] + pub mr: Vec<u64>, + + /// Scope to a single project (required when --issue or --mr is used) + #[arg(short = 'p', long)] + pub project: Option<String>, + + /// Validate remote entities exist without DB writes (preflight only) + #[arg(long)] + pub preflight_only: bool, } #[derive(Parser)] @@ -973,15 +1000,14 @@ pub struct WhoArgs { #[arg(short = 'p', long, help_heading = "Filters")] pub project: Option<String>, - /// Maximum results per section (1..=500, bounded for output safety) + /// Maximum results per section (1..=500); omit for unlimited #[arg( short = 'n', long = "limit", - default_value = "20", value_parser = clap::value_parser!(u16).range(1..=500), help_heading = "Output" )] - pub limit: u16, + pub limit: Option<u16>, /// Select output fields (comma-separated, or 'minimal' preset; varies by mode) #[arg(long, help_heading = "Output", value_delimiter = ',')] @@ -1128,3 +1154,26 @@ pub enum CronAction { /// Show current cron configuration Status, } + +#[derive(Args)] +pub struct TokenArgs { + #[command(subcommand)] + pub action: TokenAction, +} + +#[derive(Subcommand)] +pub enum TokenAction { + /// Store a GitLab token in the config file + Set { + /// Token value (reads from stdin if omitted in non-interactive mode) + #[arg(long)] + token: Option<String>, + }, + + /// Show the current token (masked by default) + Show { + /// Show the full unmasked token + #[arg(long)] + unmask: bool, + }, +} diff --git a/src/core/config.rs b/src/core/config.rs index eee368f..eae67d4 100644 --- a/src/core/config.rs +++ b/src/core/config.rs @@ -12,6 +12,44 @@ pub struct GitLabConfig { #[serde(rename = "tokenEnvVar", default = "default_token_env_var")] pub token_env_var: String, + + /// Optional stored token (env var takes priority when both are set). + #[serde(default)] + pub token: Option<String>, +} + +impl GitLabConfig { + /// Resolve token with priority: env var > config file. + pub fn resolve_token(&self) -> Result<String> { + if let Ok(val) = std::env::var(&self.token_env_var) + && !val.trim().is_empty() + { + return Ok(val.trim().to_string()); + } + if let Some(ref t) = self.token + && !t.trim().is_empty() + { + return Ok(t.trim().to_string()); + } + Err(LoreError::TokenNotSet { + env_var: self.token_env_var.clone(), + }) + } + + /// Returns a human-readable label for where the token was found, or `None`. + pub fn token_source(&self) -> Option<&'static str> { + if let Ok(val) = std::env::var(&self.token_env_var) + && !val.trim().is_empty() + { + return Some("environment variable"); + } + if let Some(ref t) = self.token + && !t.trim().is_empty() + { + return Some("config file"); + } + None + } } fn default_token_env_var() -> String { @@ -531,6 +569,7 @@ mod tests { gitlab: GitLabConfig { base_url: "https://gitlab.example.com".to_string(), token_env_var: "GITLAB_TOKEN".to_string(), + token: None, }, projects: vec![ProjectConfig { path: "group/project".to_string(), @@ -554,6 +593,7 @@ mod tests { gitlab: GitLabConfig { base_url: "https://gitlab.example.com".to_string(), token_env_var: "GITLAB_TOKEN".to_string(), + token: None, }, projects: vec![ProjectConfig { path: "group/project".to_string(), @@ -574,6 +614,7 @@ mod tests { gitlab: GitLabConfig { base_url: "https://gitlab.example.com".to_string(), token_env_var: "GITLAB_TOKEN".to_string(), + token: None, }, projects: vec![ProjectConfig { path: "group/project".to_string(), diff --git a/src/core/db.rs b/src/core/db.rs index 78af367..c4da226 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -89,6 +89,10 @@ const MIGRATIONS: &[(&str, &str)] = &[ "026", include_str!("../../migrations/026_scoring_indexes.sql"), ), + ( + "027", + include_str!("../../migrations/027_surgical_sync_runs.sql"), + ), ]; pub fn create_connection(db_path: &Path) -> Result<Connection> { diff --git a/src/core/error.rs b/src/core/error.rs index 84ccad7..b9c5f2f 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -21,6 +21,7 @@ pub enum ErrorCode { EmbeddingFailed, NotFound, Ambiguous, + SurgicalPreflightFailed, } impl std::fmt::Display for ErrorCode { @@ -44,6 +45,7 @@ impl std::fmt::Display for ErrorCode { Self::EmbeddingFailed => "EMBEDDING_FAILED", Self::NotFound => "NOT_FOUND", Self::Ambiguous => "AMBIGUOUS", + Self::SurgicalPreflightFailed => "SURGICAL_PREFLIGHT_FAILED", }; write!(f, "{code}") } @@ -70,6 +72,9 @@ impl ErrorCode { Self::EmbeddingFailed => 16, Self::NotFound => 17, Self::Ambiguous => 18, + // Shares exit code 6 with GitLabNotFound — same semantic category (resource not found). + // Robot consumers distinguish via ErrorCode string, not exit code. + Self::SurgicalPreflightFailed => 6, } } } @@ -111,7 +116,7 @@ pub enum LoreError { source: Option<rusqlite::Error>, }, - #[error("GitLab token not set. Export {env_var} environment variable.")] + #[error("GitLab token not set. Run 'lore token set' or export {env_var}.")] TokenNotSet { env_var: String }, #[error("Database error: {0}")] @@ -153,6 +158,14 @@ pub enum LoreError { #[error("No embeddings found. Run: lore embed")] EmbeddingsNotBuilt, + + #[error("Surgical preflight failed for {entity_type} !{iid} in {project}: {reason}")] + SurgicalPreflightFailed { + entity_type: String, + iid: u64, + project: String, + reason: String, + }, } impl LoreError { @@ -179,6 +192,7 @@ impl LoreError { Self::OllamaModelNotFound { .. } => ErrorCode::OllamaModelNotFound, Self::EmbeddingFailed { .. } => ErrorCode::EmbeddingFailed, Self::EmbeddingsNotBuilt => ErrorCode::EmbeddingFailed, + Self::SurgicalPreflightFailed { .. } => ErrorCode::SurgicalPreflightFailed, } } @@ -207,7 +221,7 @@ impl LoreError { "Check database file permissions or reset with 'lore reset'.\n\n Example:\n lore migrate\n lore reset --yes", ), Self::TokenNotSet { .. } => Some( - "Export the token to your shell:\n\n export GITLAB_TOKEN=glpat-xxxxxxxxxxxx\n\n Your token needs the read_api scope.", + "Set your token:\n\n lore token set\n\n Or export to your shell:\n\n export GITLAB_TOKEN=glpat-xxxxxxxxxxxx\n\n Your token needs the read_api scope.", ), Self::Database(_) => Some( "Check database file permissions or reset with 'lore reset'.\n\n Example:\n lore doctor\n lore reset --yes", @@ -227,6 +241,9 @@ impl LoreError { Some("Check Ollama logs or retry with 'lore embed --retry-failed'") } Self::EmbeddingsNotBuilt => Some("Generate embeddings first: lore embed"), + Self::SurgicalPreflightFailed { .. } => Some( + "Verify the IID exists in the project and you have access.\n\n Example:\n lore issues -p <project>\n lore mrs -p <project>", + ), Self::Json(_) | Self::Io(_) | Self::Transform(_) | Self::Other(_) => None, } } @@ -246,7 +263,7 @@ impl LoreError { Self::GitLabAuthFailed => { vec!["export GITLAB_TOKEN=glpat-xxx", "lore auth"] } - Self::TokenNotSet { .. } => vec!["export GITLAB_TOKEN=glpat-xxx"], + Self::TokenNotSet { .. } => vec!["lore token set", "export GITLAB_TOKEN=glpat-xxx"], Self::OllamaUnavailable { .. } => vec!["ollama serve"], Self::OllamaModelNotFound { .. } => vec!["ollama pull nomic-embed-text"], Self::DatabaseLocked { .. } => vec!["lore ingest --force"], @@ -254,6 +271,9 @@ impl LoreError { Self::EmbeddingFailed { .. } => vec!["lore embed --retry-failed"], Self::MigrationFailed { .. } => vec!["lore migrate"], Self::GitLabNetworkError { .. } => vec!["lore doctor"], + Self::SurgicalPreflightFailed { .. } => { + vec!["lore issues -p <project>", "lore mrs -p <project>"] + } _ => vec![], } } @@ -293,3 +313,40 @@ impl From<&LoreError> for RobotErrorOutput { } pub type Result<T> = std::result::Result<T, LoreError>; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn surgical_preflight_failed_display() { + let err = LoreError::SurgicalPreflightFailed { + entity_type: "issue".to_string(), + iid: 42, + project: "group/repo".to_string(), + reason: "not found on GitLab".to_string(), + }; + let msg = err.to_string(); + assert!(msg.contains("issue"), "missing entity_type: {msg}"); + assert!(msg.contains("42"), "missing iid: {msg}"); + assert!(msg.contains("group/repo"), "missing project: {msg}"); + assert!(msg.contains("not found on GitLab"), "missing reason: {msg}"); + } + + #[test] + fn surgical_preflight_failed_error_code() { + let code = ErrorCode::SurgicalPreflightFailed; + assert_eq!(code.exit_code(), 6); + } + + #[test] + fn surgical_preflight_failed_code_mapping() { + let err = LoreError::SurgicalPreflightFailed { + entity_type: "merge_request".to_string(), + iid: 99, + project: "ns/proj".to_string(), + reason: "404".to_string(), + }; + assert_eq!(err.code(), ErrorCode::SurgicalPreflightFailed); + } +} diff --git a/src/core/paths.rs b/src/core/paths.rs index 7c25591..b59ee08 100644 --- a/src/core/paths.rs +++ b/src/core/paths.rs @@ -68,6 +68,36 @@ fn get_xdg_data_dir() -> PathBuf { }) } +/// Enforce restrictive permissions (0600) on the config file. +/// Warns to stderr if permissions were too open, then tightens them. +#[cfg(unix)] +pub fn ensure_config_permissions(path: &std::path::Path) { + use std::os::unix::fs::MetadataExt; + + let Ok(meta) = std::fs::metadata(path) else { + return; + }; + let mode = meta.mode() & 0o777; + if mode != 0o600 { + eprintln!( + "Warning: config file permissions were {mode:04o}, tightening to 0600: {}", + path.display() + ); + let _ = set_permissions_600(path); + } +} + +#[cfg(unix)] +fn set_permissions_600(path: &std::path::Path) -> std::io::Result<()> { + use std::os::unix::fs::PermissionsExt; + let perms = std::fs::Permissions::from_mode(0o600); + std::fs::set_permissions(path, perms) +} + +/// No-op on non-Unix platforms. +#[cfg(not(unix))] +pub fn ensure_config_permissions(_path: &std::path::Path) {} + #[cfg(test)] mod tests { use super::*; diff --git a/src/core/sync_run.rs b/src/core/sync_run.rs index a07b250..ab135df 100644 --- a/src/core/sync_run.rs +++ b/src/core/sync_run.rs @@ -20,6 +20,75 @@ impl SyncRunRecorder { Ok(Self { row_id }) } + /// Returns the database row ID of this sync run. + pub fn row_id(&self) -> i64 { + self.row_id + } + + /// Sets surgical-mode metadata on the run (mode, phase, IID manifest). + pub fn set_surgical_metadata( + &self, + conn: &Connection, + mode: &str, + phase: &str, + surgical_iids_json: &str, + ) -> Result<()> { + conn.execute( + "UPDATE sync_runs + SET mode = ?1, phase = ?2, surgical_iids_json = ?3 + WHERE id = ?4", + rusqlite::params![mode, phase, surgical_iids_json, self.row_id], + )?; + Ok(()) + } + + /// Updates the current phase and refreshes the heartbeat timestamp. + pub fn update_phase(&self, conn: &Connection, phase: &str) -> Result<()> { + let now = now_ms(); + conn.execute( + "UPDATE sync_runs SET phase = ?1, heartbeat_at = ?2 WHERE id = ?3", + rusqlite::params![phase, now, self.row_id], + )?; + Ok(()) + } + + /// Increments a counter column by 1 based on entity type and stage. + /// Unknown (entity_type, stage) combinations are silently ignored. + pub fn record_entity_result( + &self, + conn: &Connection, + entity_type: &str, + stage: &str, + ) -> Result<()> { + let column = match (entity_type, stage) { + ("issue", "fetched") => "issues_fetched", + ("issue", "ingested") => "issues_ingested", + ("mr", "fetched") => "mrs_fetched", + ("mr", "ingested") => "mrs_ingested", + ("issue" | "mr", "skipped_stale") => "skipped_stale", + ("doc", "regenerated") => "docs_regenerated", + ("doc", "embedded") => "docs_embedded", + (_, "warning") => "warnings_count", + _ => return Ok(()), + }; + // Column name is from a hardcoded match, not user input — safe to interpolate. + let sql = format!("UPDATE sync_runs SET {column} = {column} + 1 WHERE id = ?1"); + conn.execute(&sql, rusqlite::params![self.row_id])?; + Ok(()) + } + + /// Marks the run as cancelled with a reason. Consumes self (terminal state). + pub fn cancel(self, conn: &Connection, reason: &str) -> Result<()> { + let now = now_ms(); + conn.execute( + "UPDATE sync_runs + SET status = 'cancelled', error = ?1, cancelled_at = ?2, finished_at = ?3 + WHERE id = ?4", + rusqlite::params![reason, now, now, self.row_id], + )?; + Ok(()) + } + pub fn succeed( self, conn: &Connection, diff --git a/src/core/sync_run_tests.rs b/src/core/sync_run_tests.rs index b17c816..af2eeab 100644 --- a/src/core/sync_run_tests.rs +++ b/src/core/sync_run_tests.rs @@ -146,3 +146,239 @@ fn test_sync_run_recorder_fail_with_partial_metrics() { assert_eq!(parsed.len(), 1); assert_eq!(parsed[0].name, "ingest_issues"); } + +#[test] +fn sync_run_surgical_columns_exist() { + let conn = setup_test_db(); + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, phase, surgical_iids_json) + VALUES (1000, 1000, 'running', 'sync', 'surgical', 'preflight', '{\"issues\":[7],\"mrs\":[]}')", + [], + ) + .unwrap(); + let (mode, phase, iids_json): (String, String, String) = conn + .query_row( + "SELECT mode, phase, surgical_iids_json FROM sync_runs WHERE mode = 'surgical'", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(mode, "surgical"); + assert_eq!(phase, "preflight"); + assert!(iids_json.contains("7")); +} + +#[test] +fn sync_run_counter_defaults_are_zero() { + let conn = setup_test_db(); + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command) + VALUES (2000, 2000, 'running', 'sync')", + [], + ) + .unwrap(); + let row_id = conn.last_insert_rowid(); + let (issues_fetched, mrs_fetched, docs_regenerated, warnings_count): (i64, i64, i64, i64) = + conn.query_row( + "SELECT issues_fetched, mrs_fetched, docs_regenerated, warnings_count FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)), + ) + .unwrap(); + assert_eq!(issues_fetched, 0); + assert_eq!(mrs_fetched, 0); + assert_eq!(docs_regenerated, 0); + assert_eq!(warnings_count, 0); +} + +#[test] +fn sync_run_nullable_columns_default_to_null() { + let conn = setup_test_db(); + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command) + VALUES (3000, 3000, 'running', 'sync')", + [], + ) + .unwrap(); + let row_id = conn.last_insert_rowid(); + let (mode, phase, cancelled_at): (Option<String>, Option<String>, Option<i64>) = conn + .query_row( + "SELECT mode, phase, cancelled_at FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert!(mode.is_none()); + assert!(phase.is_none()); + assert!(cancelled_at.is_none()); +} + +#[test] +fn sync_run_counter_round_trip() { + let conn = setup_test_db(); + conn.execute( + "INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, issues_fetched, mrs_ingested, docs_embedded) + VALUES (4000, 4000, 'succeeded', 'sync', 'surgical', 3, 2, 5)", + [], + ) + .unwrap(); + let row_id = conn.last_insert_rowid(); + let (issues_fetched, mrs_ingested, docs_embedded): (i64, i64, i64) = conn + .query_row( + "SELECT issues_fetched, mrs_ingested, docs_embedded FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(issues_fetched, 3); + assert_eq!(mrs_ingested, 2); + assert_eq!(docs_embedded, 5); +} + +#[test] +fn surgical_lifecycle_start_metadata_succeed() { + let conn = setup_test_db(); + let recorder = SyncRunRecorder::start(&conn, "sync", "surg001").unwrap(); + let row_id = recorder.row_id(); + + recorder + .set_surgical_metadata( + &conn, + "surgical", + "preflight", + r#"{"issues":[7,8],"mrs":[101]}"#, + ) + .unwrap(); + + recorder.update_phase(&conn, "ingest").unwrap(); + recorder + .record_entity_result(&conn, "issue", "fetched") + .unwrap(); + recorder + .record_entity_result(&conn, "issue", "fetched") + .unwrap(); + recorder + .record_entity_result(&conn, "issue", "ingested") + .unwrap(); + recorder + .record_entity_result(&conn, "mr", "fetched") + .unwrap(); + recorder + .record_entity_result(&conn, "mr", "ingested") + .unwrap(); + + recorder.succeed(&conn, &[], 3, 0).unwrap(); + + #[allow(clippy::type_complexity)] + let (mode, phase, iids, issues_fetched, mrs_fetched, issues_ingested, mrs_ingested, status): ( + String, + String, + String, + i64, + i64, + i64, + i64, + String, + ) = conn + .query_row( + "SELECT mode, phase, surgical_iids_json, issues_fetched, mrs_fetched, \ + issues_ingested, mrs_ingested, status \ + FROM sync_runs WHERE id = ?1", + [row_id], + |r| { + Ok(( + r.get(0)?, + r.get(1)?, + r.get(2)?, + r.get(3)?, + r.get(4)?, + r.get(5)?, + r.get(6)?, + r.get(7)?, + )) + }, + ) + .unwrap(); + + assert_eq!(mode, "surgical"); + assert_eq!(phase, "ingest"); + assert!(iids.contains("101")); + assert_eq!(issues_fetched, 2); + assert_eq!(mrs_fetched, 1); + assert_eq!(issues_ingested, 1); + assert_eq!(mrs_ingested, 1); + assert_eq!(status, "succeeded"); +} + +#[test] +fn surgical_lifecycle_cancel() { + let conn = setup_test_db(); + let recorder = SyncRunRecorder::start(&conn, "sync", "cancel01").unwrap(); + let row_id = recorder.row_id(); + + recorder + .set_surgical_metadata(&conn, "surgical", "preflight", "{}") + .unwrap(); + recorder + .cancel(&conn, "User requested cancellation") + .unwrap(); + + let (status, error, cancelled_at, finished_at): ( + String, + Option<String>, + Option<i64>, + Option<i64>, + ) = conn + .query_row( + "SELECT status, error, cancelled_at, finished_at FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)), + ) + .unwrap(); + + assert_eq!(status, "cancelled"); + assert_eq!(error.as_deref(), Some("User requested cancellation")); + assert!(cancelled_at.is_some()); + assert!(finished_at.is_some()); +} + +#[test] +fn record_entity_result_ignores_unknown() { + let conn = setup_test_db(); + let recorder = SyncRunRecorder::start(&conn, "sync", "unk001").unwrap(); + recorder + .record_entity_result(&conn, "widget", "exploded") + .unwrap(); +} + +#[test] +fn record_entity_result_doc_counters() { + let conn = setup_test_db(); + let recorder = SyncRunRecorder::start(&conn, "sync", "cnt001").unwrap(); + let row_id = recorder.row_id(); + + recorder + .record_entity_result(&conn, "doc", "regenerated") + .unwrap(); + recorder + .record_entity_result(&conn, "doc", "regenerated") + .unwrap(); + recorder + .record_entity_result(&conn, "doc", "embedded") + .unwrap(); + recorder + .record_entity_result(&conn, "issue", "skipped_stale") + .unwrap(); + + let (docs_regen, docs_embed, skipped): (i64, i64, i64) = conn + .query_row( + "SELECT docs_regenerated, docs_embedded, skipped_stale FROM sync_runs WHERE id = ?1", + [row_id], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + + assert_eq!(docs_regen, 2); + assert_eq!(docs_embed, 1); + assert_eq!(skipped, 1); +} diff --git a/src/documents/mod.rs b/src/documents/mod.rs index 3681cb8..7ae02ac 100644 --- a/src/documents/mod.rs +++ b/src/documents/mod.rs @@ -7,7 +7,10 @@ pub use extractor::{ extract_discussion_document, extract_issue_document, extract_mr_document, extract_note_document, extract_note_document_cached, }; -pub use regenerator::{RegenerateResult, regenerate_dirty_documents}; +pub use regenerator::{ + RegenerateForSourcesResult, RegenerateResult, regenerate_dirty_documents, + regenerate_dirty_documents_for_sources, +}; pub use truncation::{ MAX_DISCUSSION_BYTES, MAX_DOCUMENT_BYTES_HARD, NoteContent, TruncationReason, TruncationResult, truncate_discussion, truncate_hard_cap, truncate_utf8, diff --git a/src/documents/regenerator.rs b/src/documents/regenerator.rs index baaadb3..f43e001 100644 --- a/src/documents/regenerator.rs +++ b/src/documents/regenerator.rs @@ -84,6 +84,60 @@ pub fn regenerate_dirty_documents( Ok(result) } +#[derive(Debug, Default)] +pub struct RegenerateForSourcesResult { + pub regenerated: usize, + pub unchanged: usize, + pub errored: usize, + pub document_ids: Vec<i64>, +} + +pub fn regenerate_dirty_documents_for_sources( + conn: &Connection, + source_keys: &[(SourceType, i64)], +) -> Result<RegenerateForSourcesResult> { + let mut result = RegenerateForSourcesResult::default(); + let mut cache = ParentMetadataCache::new(); + + for &(source_type, source_id) in source_keys { + match regenerate_one(conn, source_type, source_id, &mut cache) { + Ok(changed) => { + if changed { + result.regenerated += 1; + } else { + result.unchanged += 1; + } + clear_dirty(conn, source_type, source_id)?; + + // Try to collect the document_id if a document exists + if let Ok(doc_id) = get_document_id(conn, source_type, source_id) { + result.document_ids.push(doc_id); + } + } + Err(e) => { + warn!( + source_type = %source_type, + source_id, + error = %e, + "Failed to regenerate document for source" + ); + record_dirty_error(conn, source_type, source_id, &e.to_string())?; + result.errored += 1; + } + } + } + + debug!( + regenerated = result.regenerated, + unchanged = result.unchanged, + errored = result.errored, + document_ids = result.document_ids.len(), + "Scoped document regeneration complete" + ); + + Ok(result) +} + fn regenerate_one( conn: &Connection, source_type: SourceType, diff --git a/src/documents/regenerator_tests.rs b/src/documents/regenerator_tests.rs index 04bde8c..2b5d156 100644 --- a/src/documents/regenerator_tests.rs +++ b/src/documents/regenerator_tests.rs @@ -518,3 +518,88 @@ fn test_note_regeneration_cache_invalidates_across_parents() { assert!(beta_content.contains("parent_iid: 99")); assert!(beta_content.contains("parent_title: Issue Beta")); } + +#[test] +fn test_scoped_regen_only_processes_specified_sources() { + let conn = setup_db(); + // Insert two issues + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'First Issue', 'opened', 1000, 2000, 3000)", + [], + ).unwrap(); + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (2, 20, 1, 43, 'Second Issue', 'opened', 1000, 2000, 3000)", + [], + ).unwrap(); + + // Mark both dirty + mark_dirty(&conn, SourceType::Issue, 1).unwrap(); + mark_dirty(&conn, SourceType::Issue, 2).unwrap(); + + // Regenerate only issue 1 + let result = regenerate_dirty_documents_for_sources(&conn, &[(SourceType::Issue, 1)]).unwrap(); + + assert_eq!(result.regenerated, 1); + assert_eq!(result.errored, 0); + + // Issue 1 should be regenerated and cleared from dirty + let doc_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM documents WHERE source_type = 'issue' AND source_id = 1", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(doc_count, 1); + + // Issue 2 should still be dirty + let dirty_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'issue' AND source_id = 2", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(dirty_count, 1); +} + +#[test] +fn test_scoped_regen_returns_document_ids() { + let conn = setup_db(); + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 1000, 2000, 3000)", + [], + ).unwrap(); + mark_dirty(&conn, SourceType::Issue, 1).unwrap(); + + let result = regenerate_dirty_documents_for_sources(&conn, &[(SourceType::Issue, 1)]).unwrap(); + + assert_eq!(result.document_ids.len(), 1); + + // Verify returned ID matches the actual document + let actual_id: i64 = conn + .query_row( + "SELECT id FROM documents WHERE source_type = 'issue' AND source_id = 1", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(result.document_ids[0], actual_id); +} + +#[test] +fn test_scoped_regen_handles_missing_source() { + let conn = setup_db(); + // Don't insert any issues — source_id 999 doesn't exist + // But mark it dirty so the function tries to process it + mark_dirty(&conn, SourceType::Issue, 999).unwrap(); + + let result = + regenerate_dirty_documents_for_sources(&conn, &[(SourceType::Issue, 999)]).unwrap(); + + // Source doesn't exist, so regenerate_one returns Ok(true) deleting the doc. + // No document_id to collect since there's nothing in the documents table. + assert_eq!(result.regenerated, 1); + assert_eq!(result.errored, 0); + assert!(result.document_ids.is_empty()); +} diff --git a/src/embedding/mod.rs b/src/embedding/mod.rs index 0e4458c..fd3ac9f 100644 --- a/src/embedding/mod.rs +++ b/src/embedding/mod.rs @@ -7,5 +7,5 @@ pub mod similarity; pub use change_detector::{PendingDocument, count_pending_documents, find_pending_documents}; pub use chunking::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks}; -pub use pipeline::{EmbedResult, embed_documents}; +pub use pipeline::{EmbedForIdsResult, EmbedResult, embed_documents, embed_documents_by_ids}; pub use similarity::cosine_similarity; diff --git a/src/embedding/pipeline.rs b/src/embedding/pipeline.rs index cc84b36..ba37d2d 100644 --- a/src/embedding/pipeline.rs +++ b/src/embedding/pipeline.rs @@ -578,3 +578,207 @@ fn sha256_hash(input: &str) -> String { hasher.update(input.as_bytes()); format!("{:x}", hasher.finalize()) } + +#[derive(Debug, Default)] +pub struct EmbedForIdsResult { + pub chunks_embedded: usize, + pub docs_embedded: usize, + pub failed: usize, + pub skipped: usize, +} + +/// Embed only the documents with the given IDs, skipping any that are +/// already embedded with matching config (model, dims, chunk size, hash). +pub async fn embed_documents_by_ids( + conn: &Connection, + client: &OllamaClient, + model_name: &str, + concurrency: usize, + document_ids: &[i64], + signal: &ShutdownSignal, +) -> Result<EmbedForIdsResult> { + let mut result = EmbedForIdsResult::default(); + + if document_ids.is_empty() { + return Ok(result); + } + + if signal.is_cancelled() { + return Ok(result); + } + + // Load documents for the specified IDs, filtering out already-embedded + let pending = find_documents_by_ids(conn, document_ids, model_name)?; + + if pending.is_empty() { + result.skipped = document_ids.len(); + return Ok(result); + } + + let skipped_count = document_ids.len() - pending.len(); + result.skipped = skipped_count; + + info!( + requested = document_ids.len(), + pending = pending.len(), + skipped = skipped_count, + "Scoped embedding: processing documents by ID" + ); + + // Use the same SAVEPOINT + embed_page pattern as the main pipeline + let mut last_id: i64 = 0; + let mut processed: usize = 0; + let total = pending.len(); + let mut page_stats = EmbedResult::default(); + + conn.execute_batch("SAVEPOINT embed_by_ids")?; + let page_result = embed_page( + conn, + client, + model_name, + concurrency, + &pending, + &mut page_stats, + &mut last_id, + &mut processed, + total, + &None, + signal, + ) + .await; + + match page_result { + Ok(()) if signal.is_cancelled() => { + let _ = conn.execute_batch("ROLLBACK TO embed_by_ids; RELEASE embed_by_ids"); + info!("Rolled back scoped embed page due to cancellation"); + } + Ok(()) => { + conn.execute_batch("RELEASE embed_by_ids")?; + + // Count actual results from DB + let (chunks, docs) = count_embedded_results(conn, &pending)?; + result.chunks_embedded = chunks; + result.docs_embedded = docs; + result.failed = page_stats.failed; + } + Err(e) => { + let _ = conn.execute_batch("ROLLBACK TO embed_by_ids; RELEASE embed_by_ids"); + return Err(e); + } + } + + info!( + chunks_embedded = result.chunks_embedded, + docs_embedded = result.docs_embedded, + failed = result.failed, + skipped = result.skipped, + "Scoped embedding complete" + ); + + Ok(result) +} + +/// Load documents by specific IDs, filtering out those already embedded +/// with matching config (same logic as `find_pending_documents` but scoped). +fn find_documents_by_ids( + conn: &Connection, + document_ids: &[i64], + model_name: &str, +) -> Result<Vec<crate::embedding::change_detector::PendingDocument>> { + use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS}; + + if document_ids.is_empty() { + return Ok(Vec::new()); + } + + // Build IN clause with placeholders + let placeholders: Vec<String> = (0..document_ids.len()) + .map(|i| format!("?{}", i + 1)) + .collect(); + let in_clause = placeholders.join(", "); + + let sql = format!( + r#" + SELECT d.id, d.content_text, d.content_hash + FROM documents d + LEFT JOIN embedding_metadata em + ON em.document_id = d.id AND em.chunk_index = 0 + WHERE d.id IN ({in_clause}) + AND ( + em.document_id IS NULL + OR em.document_hash != d.content_hash + OR em.chunk_max_bytes IS NULL + OR em.chunk_max_bytes != ?{chunk_bytes_idx} + OR em.model != ?{model_idx} + OR em.dims != ?{dims_idx} + ) + ORDER BY d.id + "#, + in_clause = in_clause, + chunk_bytes_idx = document_ids.len() + 1, + model_idx = document_ids.len() + 2, + dims_idx = document_ids.len() + 3, + ); + + let mut stmt = conn.prepare(&sql)?; + + // Build params: document_ids... then chunk_max_bytes, model, dims + let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new(); + for id in document_ids { + params.push(Box::new(*id)); + } + params.push(Box::new(CHUNK_MAX_BYTES as i64)); + params.push(Box::new(model_name.to_string())); + params.push(Box::new(EXPECTED_DIMS as i64)); + + let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + let rows = stmt + .query_map(param_refs.as_slice(), |row| { + Ok(crate::embedding::change_detector::PendingDocument { + document_id: row.get(0)?, + content_text: row.get(1)?, + content_hash: row.get(2)?, + }) + })? + .collect::<std::result::Result<Vec<_>, _>>()?; + + Ok(rows) +} + +/// Count how many chunks and complete docs were embedded for the given pending docs. +fn count_embedded_results( + conn: &Connection, + pending: &[crate::embedding::change_detector::PendingDocument], +) -> Result<(usize, usize)> { + let mut total_chunks: usize = 0; + let mut total_docs: usize = 0; + + for doc in pending { + let chunk_count: i64 = conn.query_row( + "SELECT COUNT(*) FROM embedding_metadata WHERE document_id = ?1 AND last_error IS NULL", + [doc.document_id], + |row| row.get(0), + )?; + if chunk_count > 0 { + total_chunks += chunk_count as usize; + // Check if all expected chunks are present (chunk_count metadata on chunk_index=0) + let expected: Option<i64> = conn.query_row( + "SELECT chunk_count FROM embedding_metadata WHERE document_id = ?1 AND chunk_index = 0", + [doc.document_id], + |row| row.get(0), + )?; + if let Some(expected_count) = expected + && chunk_count >= expected_count + { + total_docs += 1; + } + } + } + + Ok((total_chunks, total_docs)) +} + +#[cfg(test)] +#[path = "pipeline_tests.rs"] +mod tests; diff --git a/src/embedding/pipeline_tests.rs b/src/embedding/pipeline_tests.rs new file mode 100644 index 0000000..08e272c --- /dev/null +++ b/src/embedding/pipeline_tests.rs @@ -0,0 +1,184 @@ +use std::path::Path; + +use rusqlite::Connection; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +use crate::core::db::{create_connection, run_migrations}; +use crate::core::shutdown::ShutdownSignal; +use crate::embedding::chunking::EXPECTED_DIMS; +use crate::embedding::ollama::{OllamaClient, OllamaConfig}; +use crate::embedding::pipeline::embed_documents_by_ids; + +const MODEL: &str = "nomic-embed-text"; + +fn setup_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn +} + +fn insert_test_project(conn: &Connection) -> i64 { + conn.execute( + "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) + VALUES (1, 'group/test', 'https://gitlab.example.com/group/test')", + [], + ) + .unwrap(); + conn.last_insert_rowid() +} + +fn insert_test_document( + conn: &Connection, + project_id: i64, + source_id: i64, + content: &str, + hash: &str, +) -> i64 { + conn.execute( + "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) + VALUES ('issue', ?1, ?2, ?3, ?4)", + rusqlite::params![source_id, project_id, content, hash], + ) + .unwrap(); + conn.last_insert_rowid() +} + +fn make_fake_embedding() -> Vec<f32> { + vec![0.1_f32; EXPECTED_DIMS] +} + +fn make_ollama_response(count: usize) -> serde_json::Value { + let embedding = make_fake_embedding(); + let embeddings: Vec<_> = (0..count).map(|_| embedding.clone()).collect(); + serde_json::json!({ + "model": MODEL, + "embeddings": embeddings + }) +} + +fn count_embeddings_for_doc(conn: &Connection, doc_id: i64) -> i64 { + conn.query_row( + "SELECT COUNT(*) FROM embedding_metadata WHERE document_id = ?1", + [doc_id], + |row| row.get(0), + ) + .unwrap() +} + +fn make_client(base_url: &str) -> OllamaClient { + OllamaClient::new(OllamaConfig { + base_url: base_url.to_string(), + model: MODEL.to_string(), + timeout_secs: 10, + }) +} + +#[tokio::test] +async fn test_embed_by_ids_only_embeds_specified_docs() { + let mock_server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/api/embed")) + .respond_with(ResponseTemplate::new(200).set_body_json(make_ollama_response(1))) + .mount(&mock_server) + .await; + + let conn = setup_db(); + let proj_id = insert_test_project(&conn); + let doc1 = insert_test_document(&conn, proj_id, 1, "Hello world content for doc 1", "hash_a"); + let doc2 = insert_test_document(&conn, proj_id, 2, "Hello world content for doc 2", "hash_b"); + + let signal = ShutdownSignal::new(); + let client = make_client(&mock_server.uri()); + + // Only embed doc1 + let result = embed_documents_by_ids(&conn, &client, MODEL, 1, &[doc1], &signal) + .await + .unwrap(); + + assert_eq!(result.docs_embedded, 1, "Should embed exactly 1 doc"); + assert!(result.chunks_embedded > 0, "Should have embedded chunks"); + + // doc1 should have embeddings + assert!( + count_embeddings_for_doc(&conn, doc1) > 0, + "doc1 should have embeddings" + ); + + // doc2 should have NO embeddings + assert_eq!( + count_embeddings_for_doc(&conn, doc2), + 0, + "doc2 should have no embeddings" + ); +} + +#[tokio::test] +async fn test_embed_by_ids_skips_already_embedded() { + let mock_server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/api/embed")) + .respond_with(ResponseTemplate::new(200).set_body_json(make_ollama_response(1))) + .expect(1) // Should only be called once + .mount(&mock_server) + .await; + + let conn = setup_db(); + let proj_id = insert_test_project(&conn); + let doc1 = insert_test_document(&conn, proj_id, 1, "Hello world content for doc 1", "hash_a"); + + let signal = ShutdownSignal::new(); + let client = make_client(&mock_server.uri()); + + // First embed + let result1 = embed_documents_by_ids(&conn, &client, MODEL, 1, &[doc1], &signal) + .await + .unwrap(); + assert_eq!(result1.docs_embedded, 1); + + // Second embed with same doc — should skip + let result2 = embed_documents_by_ids(&conn, &client, MODEL, 1, &[doc1], &signal) + .await + .unwrap(); + assert_eq!(result2.docs_embedded, 0, "Should embed 0 on second call"); + assert_eq!(result2.skipped, 1, "Should report 1 skipped"); + assert_eq!(result2.chunks_embedded, 0, "No new chunks"); +} + +#[tokio::test] +async fn test_embed_by_ids_empty_input() { + let conn = setup_db(); + let signal = ShutdownSignal::new(); + // Client URL doesn't matter — should never be called + let client = make_client("http://localhost:99999"); + + let result = embed_documents_by_ids(&conn, &client, MODEL, 1, &[], &signal) + .await + .unwrap(); + + assert_eq!(result.docs_embedded, 0); + assert_eq!(result.chunks_embedded, 0); + assert_eq!(result.failed, 0); + assert_eq!(result.skipped, 0); +} + +#[tokio::test] +async fn test_embed_by_ids_respects_cancellation() { + let conn = setup_db(); + let proj_id = insert_test_project(&conn); + let doc1 = insert_test_document(&conn, proj_id, 1, "Hello world content for doc 1", "hash_a"); + + let signal = ShutdownSignal::new(); + signal.cancel(); // Pre-cancel + + let client = make_client("http://localhost:99999"); + + let result = embed_documents_by_ids(&conn, &client, MODEL, 1, &[doc1], &signal) + .await + .unwrap(); + + assert_eq!(result.docs_embedded, 0, "Should embed 0 when cancelled"); + assert_eq!(result.chunks_embedded, 0, "No chunks when cancelled"); +} diff --git a/src/gitlab/client.rs b/src/gitlab/client.rs index 2c69f97..edee077 100644 --- a/src/gitlab/client.rs +++ b/src/gitlab/client.rs @@ -112,6 +112,18 @@ impl GitLabClient { self.request("/api/v4/version").await } + pub async fn get_issue_by_iid(&self, project_id: i64, iid: i64) -> Result<GitLabIssue> { + self.request(&format!("/api/v4/projects/{project_id}/issues/{iid}")) + .await + } + + pub async fn get_mr_by_iid(&self, project_id: i64, iid: i64) -> Result<GitLabMergeRequest> { + self.request(&format!( + "/api/v4/projects/{project_id}/merge_requests/{iid}" + )) + .await + } + const MAX_RETRIES: u32 = 3; async fn request<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> { @@ -763,6 +775,10 @@ fn ms_to_iso8601(ms: i64) -> Option<String> { .map(|dt| dt.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string()) } +#[cfg(test)] +#[path = "client_tests.rs"] +mod client_tests; + #[cfg(test)] mod tests { use super::*; diff --git a/src/gitlab/client_tests.rs b/src/gitlab/client_tests.rs new file mode 100644 index 0000000..209eee4 --- /dev/null +++ b/src/gitlab/client_tests.rs @@ -0,0 +1,113 @@ +use super::*; +use crate::core::error::LoreError; +use wiremock::matchers::{header, method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +#[tokio::test] +async fn get_issue_by_iid_success() { + let server = MockServer::start().await; + let issue_json = serde_json::json!({ + "id": 1001, + "iid": 42, + "project_id": 5, + "title": "Fix login bug", + "state": "opened", + "created_at": "2026-01-15T10:00:00Z", + "updated_at": "2026-02-01T14:30:00Z", + "author": { "id": 1, "username": "dev1", "name": "Developer One" }, + "web_url": "https://gitlab.example.com/group/repo/-/issues/42", + "labels": [], + "milestone": null, + "assignees": [], + "closed_at": null, + "description": "Login fails on mobile" + }); + + Mock::given(method("GET")) + .and(path("/api/v4/projects/5/issues/42")) + .and(header("PRIVATE-TOKEN", "test-token")) + .respond_with(ResponseTemplate::new(200).set_body_json(&issue_json)) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(100.0)); + let issue = client.get_issue_by_iid(5, 42).await.unwrap(); + assert_eq!(issue.iid, 42); + assert_eq!(issue.title, "Fix login bug"); +} + +#[tokio::test] +async fn get_issue_by_iid_not_found() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/5/issues/999")) + .respond_with( + ResponseTemplate::new(404) + .set_body_json(serde_json::json!({"message": "404 Not Found"})), + ) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(100.0)); + let err = client.get_issue_by_iid(5, 999).await.unwrap_err(); + assert!(matches!(err, LoreError::GitLabNotFound { .. })); +} + +#[tokio::test] +async fn get_mr_by_iid_success() { + let server = MockServer::start().await; + let mr_json = serde_json::json!({ + "id": 2001, + "iid": 101, + "project_id": 5, + "title": "Add caching layer", + "state": "merged", + "created_at": "2026-01-20T09:00:00Z", + "updated_at": "2026-02-10T16:00:00Z", + "author": { "id": 2, "username": "dev2", "name": "Developer Two" }, + "web_url": "https://gitlab.example.com/group/repo/-/merge_requests/101", + "source_branch": "feature/caching", + "target_branch": "main", + "draft": false, + "labels": [], + "milestone": null, + "assignees": [], + "reviewers": [], + "merged_by": null, + "merged_at": null, + "closed_at": null, + "description": "Adds Redis caching" + }); + + Mock::given(method("GET")) + .and(path("/api/v4/projects/5/merge_requests/101")) + .and(header("PRIVATE-TOKEN", "test-token")) + .respond_with(ResponseTemplate::new(200).set_body_json(&mr_json)) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(100.0)); + let mr = client.get_mr_by_iid(5, 101).await.unwrap(); + assert_eq!(mr.iid, 101); + assert_eq!(mr.title, "Add caching layer"); + assert_eq!(mr.source_branch, "feature/caching"); +} + +#[tokio::test] +async fn get_mr_by_iid_not_found() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/5/merge_requests/999")) + .respond_with( + ResponseTemplate::new(404) + .set_body_json(serde_json::json!({"message": "404 Not Found"})), + ) + .mount(&server) + .await; + + let client = GitLabClient::new(&server.uri(), "test-token", Some(100.0)); + let err = client.get_mr_by_iid(5, 999).await.unwrap_err(); + assert!(matches!(err, LoreError::GitLabNotFound { .. })); +} diff --git a/src/ingestion/issues.rs b/src/ingestion/issues.rs index cd912c9..48da301 100644 --- a/src/ingestion/issues.rs +++ b/src/ingestion/issues.rs @@ -140,7 +140,7 @@ fn passes_cursor_filter_with_ts(gitlab_id: i64, issue_ts: i64, cursor: &SyncCurs true } -fn process_single_issue( +pub(crate) fn process_single_issue( conn: &Connection, config: &Config, project_id: i64, diff --git a/src/ingestion/merge_requests.rs b/src/ingestion/merge_requests.rs index 0d1453e..baa5763 100644 --- a/src/ingestion/merge_requests.rs +++ b/src/ingestion/merge_requests.rs @@ -135,13 +135,13 @@ pub async fn ingest_merge_requests( Ok(result) } -struct ProcessMrResult { - labels_created: usize, - assignees_linked: usize, - reviewers_linked: usize, +pub(crate) struct ProcessMrResult { + pub(crate) labels_created: usize, + pub(crate) assignees_linked: usize, + pub(crate) reviewers_linked: usize, } -fn process_single_mr( +pub(crate) fn process_single_mr( conn: &Connection, config: &Config, project_id: i64, diff --git a/src/ingestion/mod.rs b/src/ingestion/mod.rs index aa64675..8d5f3cb 100644 --- a/src/ingestion/mod.rs +++ b/src/ingestion/mod.rs @@ -6,6 +6,7 @@ pub mod merge_requests; pub mod mr_diffs; pub mod mr_discussions; pub mod orchestrator; +pub(crate) mod surgical; pub use discussions::{IngestDiscussionsResult, ingest_issue_discussions}; pub use issues::{IngestIssuesResult, IssueForDiscussionSync, ingest_issues}; diff --git a/src/ingestion/orchestrator.rs b/src/ingestion/orchestrator.rs index cab8650..113c5d7 100644 --- a/src/ingestion/orchestrator.rs +++ b/src/ingestion/orchestrator.rs @@ -1097,7 +1097,7 @@ async fn drain_resource_events( } /// Store resource events using the provided connection (caller manages the transaction). -fn store_resource_events( +pub(crate) fn store_resource_events( conn: &Connection, project_id: i64, entity_type: &str, @@ -1406,7 +1406,7 @@ async fn drain_mr_closes_issues( Ok(result) } -fn store_closes_issues_refs( +pub(crate) fn store_closes_issues_refs( conn: &Connection, project_id: i64, mr_local_id: i64, diff --git a/src/ingestion/surgical.rs b/src/ingestion/surgical.rs new file mode 100644 index 0000000..1a8dccd --- /dev/null +++ b/src/ingestion/surgical.rs @@ -0,0 +1,432 @@ +use rusqlite::Connection; +use rusqlite::OptionalExtension; +use tracing::{debug, warn}; + +use crate::Config; +use crate::core::error::{LoreError, Result}; +use crate::documents::SourceType; +use crate::gitlab::GitLabClient; +use crate::gitlab::types::{GitLabIssue, GitLabMergeRequest}; +use crate::ingestion::dirty_tracker; +use crate::ingestion::discussions::ingest_issue_discussions; +use crate::ingestion::issues::{IssueForDiscussionSync, process_single_issue}; +use crate::ingestion::merge_requests::{MrForDiscussionSync, process_single_mr}; +use crate::ingestion::mr_diffs::upsert_mr_file_changes; +use crate::ingestion::mr_discussions::ingest_mr_discussions; +use crate::ingestion::orchestrator::{store_closes_issues_refs, store_resource_events}; + +// --------------------------------------------------------------------------- +// Result types +// --------------------------------------------------------------------------- + +#[derive(Debug)] +pub(crate) struct IngestIssueResult { + pub skipped_stale: bool, + pub dirty_source_keys: Vec<(SourceType, i64)>, +} + +#[derive(Debug)] +pub(crate) struct IngestMrResult { + pub skipped_stale: bool, + pub dirty_source_keys: Vec<(SourceType, i64)>, +} + +#[derive(Debug)] +pub(crate) struct PreflightResult { + pub issues: Vec<GitLabIssue>, + pub merge_requests: Vec<GitLabMergeRequest>, + pub failures: Vec<PreflightFailure>, +} + +#[derive(Debug)] +pub(crate) struct PreflightFailure { + pub entity_type: String, + pub iid: i64, + pub error: LoreError, +} + +// --------------------------------------------------------------------------- +// TOCTOU guard +// --------------------------------------------------------------------------- + +/// Returns `true` if the payload is stale (same age or older than what the DB +/// already has). Returns `false` when the entity is new (no DB row) or when +/// the payload is strictly newer. +pub(crate) fn is_stale(payload_updated_at: &str, db_updated_at_ms: Option<i64>) -> Result<bool> { + let Some(db_ms) = db_updated_at_ms else { + return Ok(false); + }; + + let payload_ms = chrono::DateTime::parse_from_rfc3339(payload_updated_at) + .map(|dt| dt.timestamp_millis()) + .map_err(|e| { + LoreError::Other(format!( + "Failed to parse timestamp '{}': {}", + payload_updated_at, e + )) + })?; + + Ok(payload_ms <= db_ms) +} + +// --------------------------------------------------------------------------- +// Ingestion wrappers +// --------------------------------------------------------------------------- + +/// Ingest a single issue by IID with TOCTOU guard and dirty marking. +pub(crate) fn ingest_issue_by_iid( + conn: &Connection, + config: &Config, + project_id: i64, + issue: &GitLabIssue, +) -> Result<IngestIssueResult> { + let db_updated_at = get_db_updated_at(conn, "issues", issue.iid, project_id)?; + + if is_stale(&issue.updated_at, db_updated_at)? { + debug!(iid = issue.iid, "Skipping stale issue (TOCTOU guard)"); + return Ok(IngestIssueResult { + skipped_stale: true, + dirty_source_keys: vec![], + }); + } + + process_single_issue(conn, config, project_id, issue)?; + + let local_id: i64 = conn.query_row( + "SELECT id FROM issues WHERE project_id = ? AND iid = ?", + (project_id, issue.iid), + |row| row.get(0), + )?; + + dirty_tracker::mark_dirty(conn, SourceType::Issue, local_id)?; + + Ok(IngestIssueResult { + skipped_stale: false, + dirty_source_keys: vec![(SourceType::Issue, local_id)], + }) +} + +/// Ingest a single merge request by IID with TOCTOU guard and dirty marking. +pub(crate) fn ingest_mr_by_iid( + conn: &Connection, + config: &Config, + project_id: i64, + mr: &GitLabMergeRequest, +) -> Result<IngestMrResult> { + let db_updated_at = get_db_updated_at(conn, "merge_requests", mr.iid, project_id)?; + + if is_stale(&mr.updated_at, db_updated_at)? { + debug!(iid = mr.iid, "Skipping stale MR (TOCTOU guard)"); + return Ok(IngestMrResult { + skipped_stale: true, + dirty_source_keys: vec![], + }); + } + + process_single_mr(conn, config, project_id, mr)?; + + let local_id: i64 = conn.query_row( + "SELECT id FROM merge_requests WHERE project_id = ? AND iid = ?", + (project_id, mr.iid), + |row| row.get(0), + )?; + + dirty_tracker::mark_dirty(conn, SourceType::MergeRequest, local_id)?; + + Ok(IngestMrResult { + skipped_stale: false, + dirty_source_keys: vec![(SourceType::MergeRequest, local_id)], + }) +} + +// --------------------------------------------------------------------------- +// Preflight fetch +// --------------------------------------------------------------------------- + +/// Fetch specific issues and MRs by IID from GitLab. Collects successes and +/// failures without aborting on individual 404s. +pub(crate) async fn preflight_fetch( + client: &GitLabClient, + gitlab_project_id: i64, + targets: &[(String, i64)], +) -> PreflightResult { + let mut result = PreflightResult { + issues: Vec::new(), + merge_requests: Vec::new(), + failures: Vec::new(), + }; + + for (entity_type, iid) in targets { + match entity_type.as_str() { + "issue" => match client.get_issue_by_iid(gitlab_project_id, *iid).await { + Ok(issue) => result.issues.push(issue), + Err(e) => result.failures.push(PreflightFailure { + entity_type: entity_type.clone(), + iid: *iid, + error: e, + }), + }, + "merge_request" => match client.get_mr_by_iid(gitlab_project_id, *iid).await { + Ok(mr) => result.merge_requests.push(mr), + Err(e) => result.failures.push(PreflightFailure { + entity_type: entity_type.clone(), + iid: *iid, + error: e, + }), + }, + other => { + result.failures.push(PreflightFailure { + entity_type: other.to_string(), + iid: *iid, + error: LoreError::Other(format!("Unknown entity type: {other}")), + }); + } + } + } + + result +} + +// --------------------------------------------------------------------------- +// Dependent fetch helpers (surgical mode) +// --------------------------------------------------------------------------- + +/// Counts returned from fetching dependents for a single entity. +#[derive(Debug, Default)] +pub(crate) struct DependentFetchResult { + pub resource_events_fetched: usize, + pub discussions_fetched: usize, + pub closes_issues_stored: usize, + pub file_changes_stored: usize, +} + +/// Fetch and store all dependents for a single issue: +/// resource events (state, label, milestone) and discussions. +pub(crate) async fn fetch_dependents_for_issue( + client: &GitLabClient, + conn: &Connection, + project_id: i64, + gitlab_project_id: i64, + iid: i64, + local_id: i64, + config: &Config, +) -> Result<DependentFetchResult> { + let mut result = DependentFetchResult::default(); + + // --- Resource events --- + match client + .fetch_all_resource_events(gitlab_project_id, "issue", iid) + .await + { + Ok((state_events, label_events, milestone_events)) => { + let count = state_events.len() + label_events.len() + milestone_events.len(); + let tx = conn.unchecked_transaction()?; + store_resource_events( + &tx, + project_id, + "issue", + local_id, + &state_events, + &label_events, + &milestone_events, + )?; + tx.execute( + "UPDATE issues SET resource_events_synced_for_updated_at = updated_at WHERE id = ?", + [local_id], + )?; + tx.commit()?; + result.resource_events_fetched = count; + } + Err(e) => { + warn!( + iid, + error = %e, + "Failed to fetch resource events for issue, continuing" + ); + } + } + + // --- Discussions --- + let sync_item = IssueForDiscussionSync { + local_issue_id: local_id, + iid, + updated_at: 0, // not used for filtering in surgical mode + }; + match ingest_issue_discussions( + conn, + client, + config, + gitlab_project_id, + project_id, + &[sync_item], + ) + .await + { + Ok(disc_result) => { + result.discussions_fetched = disc_result.discussions_fetched; + } + Err(e) => { + warn!( + iid, + error = %e, + "Failed to ingest discussions for issue, continuing" + ); + } + } + + Ok(result) +} + +/// Fetch and store all dependents for a single merge request: +/// resource events, discussions, closes-issues references, and file changes (diffs). +pub(crate) async fn fetch_dependents_for_mr( + client: &GitLabClient, + conn: &Connection, + project_id: i64, + gitlab_project_id: i64, + iid: i64, + local_id: i64, + config: &Config, +) -> Result<DependentFetchResult> { + let mut result = DependentFetchResult::default(); + + // --- Resource events --- + match client + .fetch_all_resource_events(gitlab_project_id, "merge_request", iid) + .await + { + Ok((state_events, label_events, milestone_events)) => { + let count = state_events.len() + label_events.len() + milestone_events.len(); + let tx = conn.unchecked_transaction()?; + store_resource_events( + &tx, + project_id, + "merge_request", + local_id, + &state_events, + &label_events, + &milestone_events, + )?; + tx.execute( + "UPDATE merge_requests SET resource_events_synced_for_updated_at = updated_at WHERE id = ?", + [local_id], + )?; + tx.commit()?; + result.resource_events_fetched = count; + } + Err(e) => { + warn!( + iid, + error = %e, + "Failed to fetch resource events for MR, continuing" + ); + } + } + + // --- Discussions --- + let sync_item = MrForDiscussionSync { + local_mr_id: local_id, + iid, + updated_at: 0, + }; + match ingest_mr_discussions( + conn, + client, + config, + gitlab_project_id, + project_id, + &[sync_item], + ) + .await + { + Ok(disc_result) => { + result.discussions_fetched = disc_result.discussions_fetched; + } + Err(e) => { + warn!( + iid, + error = %e, + "Failed to ingest discussions for MR, continuing" + ); + } + } + + // --- Closes issues --- + match client.fetch_mr_closes_issues(gitlab_project_id, iid).await { + Ok(closes_issues) => { + let count = closes_issues.len(); + let tx = conn.unchecked_transaction()?; + store_closes_issues_refs(&tx, project_id, local_id, &closes_issues)?; + tx.execute( + "UPDATE merge_requests SET closes_issues_synced_for_updated_at = updated_at WHERE id = ?", + [local_id], + )?; + tx.commit()?; + result.closes_issues_stored = count; + } + Err(e) => { + warn!( + iid, + error = %e, + "Failed to fetch closes_issues for MR, continuing" + ); + } + } + + // --- File changes (diffs) --- + match client.fetch_mr_diffs(gitlab_project_id, iid).await { + Ok(diffs) => { + let tx = conn.unchecked_transaction()?; + let stored = upsert_mr_file_changes(&tx, local_id, project_id, &diffs)?; + tx.execute( + "UPDATE merge_requests SET diffs_synced_for_updated_at = updated_at WHERE id = ?", + [local_id], + )?; + tx.commit()?; + result.file_changes_stored = stored; + } + Err(e) => { + warn!( + iid, + error = %e, + "Failed to fetch diffs for MR, continuing" + ); + } + } + + Ok(result) +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn get_db_updated_at( + conn: &Connection, + table: &str, + iid: i64, + project_id: i64, +) -> Result<Option<i64>> { + // Using a match on known table names avoids SQL injection from the table parameter. + let sql = match table { + "issues" => "SELECT updated_at FROM issues WHERE project_id = ?1 AND iid = ?2", + "merge_requests" => { + "SELECT updated_at FROM merge_requests WHERE project_id = ?1 AND iid = ?2" + } + _ => { + return Err(LoreError::Other(format!( + "Unknown table for updated_at lookup: {table}" + ))); + } + }; + + let result: Option<i64> = conn + .query_row(sql, (project_id, iid), |row| row.get(0)) + .optional()?; + + Ok(result) +} + +#[cfg(test)] +#[path = "surgical_tests.rs"] +mod tests; diff --git a/src/ingestion/surgical_tests.rs b/src/ingestion/surgical_tests.rs new file mode 100644 index 0000000..65887fa --- /dev/null +++ b/src/ingestion/surgical_tests.rs @@ -0,0 +1,645 @@ +use std::path::Path; + +use super::*; +use crate::core::config::{ + Config, EmbeddingConfig, GitLabConfig, LoggingConfig, ProjectConfig, ScoringConfig, + StorageConfig, SyncConfig, +}; +use crate::core::db::{create_connection, run_migrations}; +use crate::gitlab::types::{GitLabAuthor, GitLabMergeRequest}; + +// --------------------------------------------------------------------------- +// Test helpers +// --------------------------------------------------------------------------- + +fn setup_db() -> rusqlite::Connection { + let conn = create_connection(Path::new(":memory:")).expect("in-memory DB"); + run_migrations(&conn).expect("migrations"); + conn.execute( + "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) + VALUES (100, 'group/repo', 'https://example.com/group/repo')", + [], + ) + .expect("insert project"); + conn +} + +fn test_config() -> Config { + Config { + gitlab: GitLabConfig { + base_url: "https://gitlab.example.com".to_string(), + token_env_var: "GITLAB_TOKEN".to_string(), + token: None, + }, + projects: vec![ProjectConfig { + path: "group/repo".to_string(), + }], + default_project: None, + sync: SyncConfig::default(), + storage: StorageConfig::default(), + embedding: EmbeddingConfig::default(), + logging: LoggingConfig::default(), + scoring: ScoringConfig::default(), + } +} + +fn make_test_issue(iid: i64, updated_at: &str) -> GitLabIssue { + GitLabIssue { + id: iid * 1000, // unique gitlab_id + iid, + project_id: 100, + title: format!("Test issue {iid}"), + description: Some("Description".to_string()), + state: "opened".to_string(), + created_at: "2026-01-01T00:00:00.000+00:00".to_string(), + updated_at: updated_at.to_string(), + closed_at: None, + author: GitLabAuthor { + id: 1, + username: "testuser".to_string(), + name: "Test User".to_string(), + }, + assignees: vec![], + labels: vec![], + milestone: None, + due_date: None, + web_url: format!("https://example.com/group/repo/-/issues/{iid}"), + } +} + +fn make_test_mr(iid: i64, updated_at: &str) -> GitLabMergeRequest { + GitLabMergeRequest { + id: iid * 1000, + iid, + project_id: 100, + title: format!("Test MR {iid}"), + description: Some("MR description".to_string()), + state: "opened".to_string(), + draft: false, + work_in_progress: false, + source_branch: "feature".to_string(), + target_branch: "main".to_string(), + sha: Some("abc123".to_string()), + references: None, + detailed_merge_status: None, + merge_status_legacy: None, + created_at: "2026-01-01T00:00:00.000+00:00".to_string(), + updated_at: updated_at.to_string(), + merged_at: None, + closed_at: None, + author: GitLabAuthor { + id: 1, + username: "testuser".to_string(), + name: "Test User".to_string(), + }, + merge_user: None, + merged_by: None, + labels: vec![], + assignees: vec![], + reviewers: vec![], + web_url: format!("https://example.com/group/repo/-/merge_requests/{iid}"), + merge_commit_sha: None, + squash_commit_sha: None, + } +} + +fn get_db_updated_at_helper(conn: &rusqlite::Connection, table: &str, iid: i64) -> Option<i64> { + let sql = match table { + "issues" => "SELECT updated_at FROM issues WHERE project_id = 1 AND iid = ?1", + "merge_requests" => { + "SELECT updated_at FROM merge_requests WHERE project_id = 1 AND iid = ?1" + } + _ => return None, + }; + conn.query_row(sql, [iid], |row| row.get(0)).ok() +} + +fn get_dirty_keys(conn: &rusqlite::Connection) -> Vec<(String, i64)> { + let mut stmt = conn + .prepare("SELECT source_type, source_id FROM dirty_sources ORDER BY source_type, source_id") + .expect("prepare dirty_sources query"); + stmt.query_map([], |row| { + let st: String = row.get(0)?; + let id: i64 = row.get(1)?; + Ok((st, id)) + }) + .expect("query dirty_sources") + .collect::<std::result::Result<Vec<_>, _>>() + .expect("collect dirty_sources") +} + +// --------------------------------------------------------------------------- +// is_stale unit tests +// --------------------------------------------------------------------------- + +#[test] +fn test_is_stale_parses_iso8601() { + // 2026-02-17T12:00:00.000+00:00 -> 1771243200000 ms + let result = is_stale("2026-02-17T12:00:00.000+00:00", Some(1_771_329_600_000)); + assert!(result.is_ok()); + // Same timestamp => stale + assert!(result.unwrap()); +} + +#[test] +fn test_is_stale_handles_none_db_value() { + let result = is_stale("2026-02-17T12:00:00.000+00:00", None); + assert!(result.is_ok()); + assert!(!result.unwrap(), "no DB row means not stale"); +} + +#[test] +fn test_is_stale_with_z_suffix() { + let result = is_stale("2026-02-17T12:00:00Z", Some(1_771_329_600_000)); + assert!(result.is_ok()); + assert!(result.unwrap(), "Z suffix should parse same as +00:00"); +} + +// --------------------------------------------------------------------------- +// Issue ingestion tests +// --------------------------------------------------------------------------- + +#[test] +fn test_ingest_issue_by_iid_upserts_and_marks_dirty() { + let conn = setup_db(); + let config = test_config(); + let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + + let result = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + + assert!(!result.skipped_stale); + assert!(!result.skipped_stale); + assert!(!result.dirty_source_keys.is_empty()); + + // Verify DB row exists + let db_ts = get_db_updated_at_helper(&conn, "issues", 42); + assert!(db_ts.is_some(), "issue should exist in DB"); + + // Verify dirty marking + let dirty = get_dirty_keys(&conn); + assert!( + dirty.iter().any(|(t, _)| t == "issue"), + "dirty_sources should contain an issue entry" + ); +} + +#[test] +fn test_toctou_skips_stale_issue() { + let conn = setup_db(); + let config = test_config(); + let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + + // First ingest succeeds + let r1 = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + assert!(!r1.skipped_stale); + + // Clear dirty to check second ingest doesn't re-mark + conn.execute("DELETE FROM dirty_sources", []).unwrap(); + + // Second ingest with same timestamp should be skipped + let r2 = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + assert!(r2.skipped_stale); + assert!(r2.skipped_stale); + assert!(r2.dirty_source_keys.is_empty()); + + // No new dirty mark + let dirty = get_dirty_keys(&conn); + assert!(dirty.is_empty(), "stale skip should not create dirty marks"); +} + +#[test] +fn test_toctou_allows_newer_issue() { + let conn = setup_db(); + let config = test_config(); + + // Ingest at T1 + let issue_t1 = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + ingest_issue_by_iid(&conn, &config, 1, &issue_t1).unwrap(); + + conn.execute("DELETE FROM dirty_sources", []).unwrap(); + + // Ingest at T2 (newer) — should succeed + let issue_t2 = make_test_issue(42, "2026-02-17T13:00:00.000+00:00"); + let result = ingest_issue_by_iid(&conn, &config, 1, &issue_t2).unwrap(); + + assert!(!result.skipped_stale); + assert!(!result.skipped_stale); +} + +#[test] +fn test_ingest_issue_returns_dirty_source_keys() { + let conn = setup_db(); + let config = test_config(); + let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + + let result = ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + + assert_eq!(result.dirty_source_keys.len(), 1); + let (source_type, local_id) = &result.dirty_source_keys[0]; + assert_eq!(source_type.as_str(), "issue"); + assert!(*local_id > 0, "local_id should be positive"); +} + +#[test] +fn test_ingest_issue_updates_existing() { + let conn = setup_db(); + let config = test_config(); + + let issue_v1 = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + ingest_issue_by_iid(&conn, &config, 1, &issue_v1).unwrap(); + + let ts1 = get_db_updated_at_helper(&conn, "issues", 42).unwrap(); + + // Newer version + let issue_v2 = make_test_issue(42, "2026-02-17T14:00:00.000+00:00"); + let result = ingest_issue_by_iid(&conn, &config, 1, &issue_v2).unwrap(); + + assert!(!result.skipped_stale); + let ts2 = get_db_updated_at_helper(&conn, "issues", 42).unwrap(); + assert!(ts2 > ts1, "DB timestamp should increase after update"); +} + +// --------------------------------------------------------------------------- +// MR ingestion tests +// --------------------------------------------------------------------------- + +#[test] +fn test_ingest_mr_by_iid_upserts_and_marks_dirty() { + let conn = setup_db(); + let config = test_config(); + let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00"); + + let result = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + + assert!(!result.skipped_stale); + assert!(!result.skipped_stale); + assert!(!result.dirty_source_keys.is_empty()); + + let db_ts = get_db_updated_at_helper(&conn, "merge_requests", 101); + assert!(db_ts.is_some(), "MR should exist in DB"); + + let dirty = get_dirty_keys(&conn); + assert!( + dirty.iter().any(|(t, _)| t == "merge_request"), + "dirty_sources should contain a merge_request entry" + ); +} + +#[test] +fn test_toctou_skips_stale_mr() { + let conn = setup_db(); + let config = test_config(); + let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00"); + + let r1 = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + assert!(!r1.skipped_stale); + + conn.execute("DELETE FROM dirty_sources", []).unwrap(); + + let r2 = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + assert!(r2.skipped_stale); + assert!(r2.skipped_stale); + assert!(r2.dirty_source_keys.is_empty()); +} + +#[test] +fn test_toctou_allows_newer_mr() { + let conn = setup_db(); + let config = test_config(); + + let mr_t1 = make_test_mr(101, "2026-02-17T12:00:00.000+00:00"); + ingest_mr_by_iid(&conn, &config, 1, &mr_t1).unwrap(); + + conn.execute("DELETE FROM dirty_sources", []).unwrap(); + + let mr_t2 = make_test_mr(101, "2026-02-17T13:00:00.000+00:00"); + let result = ingest_mr_by_iid(&conn, &config, 1, &mr_t2).unwrap(); + + assert!(!result.skipped_stale); + assert!(!result.skipped_stale); +} + +#[test] +fn test_ingest_mr_returns_dirty_source_keys() { + let conn = setup_db(); + let config = test_config(); + let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00"); + + let result = ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + + assert_eq!(result.dirty_source_keys.len(), 1); + let (source_type, local_id) = &result.dirty_source_keys[0]; + assert_eq!(source_type.as_str(), "merge_request"); + assert!(*local_id > 0); +} + +#[test] +fn test_ingest_mr_updates_existing() { + let conn = setup_db(); + let config = test_config(); + + let mr_v1 = make_test_mr(101, "2026-02-17T12:00:00.000+00:00"); + ingest_mr_by_iid(&conn, &config, 1, &mr_v1).unwrap(); + + let ts1 = get_db_updated_at_helper(&conn, "merge_requests", 101).unwrap(); + + let mr_v2 = make_test_mr(101, "2026-02-17T14:00:00.000+00:00"); + let result = ingest_mr_by_iid(&conn, &config, 1, &mr_v2).unwrap(); + + assert!(!result.skipped_stale); + let ts2 = get_db_updated_at_helper(&conn, "merge_requests", 101).unwrap(); + assert!(ts2 > ts1, "DB timestamp should increase after update"); +} + +// --------------------------------------------------------------------------- +// Preflight fetch test (wiremock) +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn test_preflight_fetch_returns_issues_and_mrs() { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + let mock_server = MockServer::start().await; + + // Issue fixture + let issue_json = serde_json::json!({ + "id": 42000, + "iid": 42, + "project_id": 100, + "title": "Test issue 42", + "description": "desc", + "state": "opened", + "created_at": "2026-01-01T00:00:00.000+00:00", + "updated_at": "2026-02-17T12:00:00.000+00:00", + "author": {"id": 1, "username": "testuser", "name": "Test User"}, + "assignees": [], + "labels": [], + "web_url": "https://example.com/group/repo/-/issues/42" + }); + + // MR fixture + let mr_json = serde_json::json!({ + "id": 101000, + "iid": 101, + "project_id": 100, + "title": "Test MR 101", + "description": "mr desc", + "state": "opened", + "draft": false, + "work_in_progress": false, + "source_branch": "feature", + "target_branch": "main", + "sha": "abc123", + "created_at": "2026-01-01T00:00:00.000+00:00", + "updated_at": "2026-02-17T12:00:00.000+00:00", + "author": {"id": 1, "username": "testuser", "name": "Test User"}, + "labels": [], + "assignees": [], + "reviewers": [], + "web_url": "https://example.com/group/repo/-/merge_requests/101" + }); + + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/issues/42")) + .respond_with(ResponseTemplate::new(200).set_body_json(&issue_json)) + .mount(&mock_server) + .await; + + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/merge_requests/101")) + .respond_with(ResponseTemplate::new(200).set_body_json(&mr_json)) + .mount(&mock_server) + .await; + + let client = GitLabClient::new(&mock_server.uri(), "test-token", None); + let targets = vec![ + ("issue".to_string(), 42i64), + ("merge_request".to_string(), 101i64), + ]; + + let result = preflight_fetch(&client, 100, &targets).await; + + assert_eq!(result.issues.len(), 1); + assert_eq!(result.issues[0].iid, 42); + assert_eq!(result.merge_requests.len(), 1); + assert_eq!(result.merge_requests[0].iid, 101); + assert!(result.failures.is_empty()); +} + +// --------------------------------------------------------------------------- +// Dependent helper tests (bd-kanh) +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn test_fetch_dependents_for_issue_empty_events() { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + let mock_server = MockServer::start().await; + let conn = setup_db(); + let config = test_config(); + + // Insert an issue so we have a local_id + let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + let local_id: i64 = conn + .query_row( + "SELECT id FROM issues WHERE project_id = 1 AND iid = 42", + [], + |row| row.get(0), + ) + .unwrap(); + + // Mock empty resource event endpoints + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/issues/42/resource_state_events")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/issues/42/resource_label_events")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + Mock::given(method("GET")) + .and(path( + "/api/v4/projects/100/issues/42/resource_milestone_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + // Mock empty discussions endpoint + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/issues/42/discussions")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + let client = GitLabClient::new(&mock_server.uri(), "test-token", None); + + let result = fetch_dependents_for_issue(&client, &conn, 1, 100, 42, local_id, &config) + .await + .unwrap(); + + assert_eq!(result.resource_events_fetched, 0); + assert_eq!(result.discussions_fetched, 0); +} + +#[tokio::test] +async fn test_fetch_dependents_for_mr_empty_events() { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + let mock_server = MockServer::start().await; + let conn = setup_db(); + let config = test_config(); + + // Insert an MR so we have a local_id + let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00"); + ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + let local_id: i64 = conn + .query_row( + "SELECT id FROM merge_requests WHERE project_id = 1 AND iid = 101", + [], + |row| row.get(0), + ) + .unwrap(); + + // Mock empty resource event endpoints for MR + Mock::given(method("GET")) + .and(path( + "/api/v4/projects/100/merge_requests/101/resource_state_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + Mock::given(method("GET")) + .and(path( + "/api/v4/projects/100/merge_requests/101/resource_label_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + Mock::given(method("GET")) + .and(path( + "/api/v4/projects/100/merge_requests/101/resource_milestone_events", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + // Mock empty discussions endpoint for MR + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/merge_requests/101/discussions")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + // Mock empty closes_issues endpoint + Mock::given(method("GET")) + .and(path( + "/api/v4/projects/100/merge_requests/101/closes_issues", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + // Mock empty diffs endpoint + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/merge_requests/101/diffs")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + let client = GitLabClient::new(&mock_server.uri(), "test-token", None); + + let result = fetch_dependents_for_mr(&client, &conn, 1, 100, 101, local_id, &config) + .await + .unwrap(); + + assert_eq!(result.resource_events_fetched, 0); + assert_eq!(result.discussions_fetched, 0); + assert_eq!(result.closes_issues_stored, 0); + assert_eq!(result.file_changes_stored, 0); +} + +#[tokio::test] +async fn test_fetch_dependents_for_mr_with_closes_issues() { + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + let mock_server = MockServer::start().await; + let conn = setup_db(); + let config = test_config(); + + // Insert issue and MR so references can resolve + let issue = make_test_issue(42, "2026-02-17T12:00:00.000+00:00"); + ingest_issue_by_iid(&conn, &config, 1, &issue).unwrap(); + + let mr = make_test_mr(101, "2026-02-17T12:00:00.000+00:00"); + ingest_mr_by_iid(&conn, &config, 1, &mr).unwrap(); + let mr_local_id: i64 = conn + .query_row( + "SELECT id FROM merge_requests WHERE project_id = 1 AND iid = 101", + [], + |row| row.get(0), + ) + .unwrap(); + + // Mock empty resource events + for endpoint in [ + "resource_state_events", + "resource_label_events", + "resource_milestone_events", + ] { + Mock::given(method("GET")) + .and(path(format!( + "/api/v4/projects/100/merge_requests/101/{endpoint}" + ))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + } + + // Mock empty discussions + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/merge_requests/101/discussions")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + // Mock closes_issues with one reference + Mock::given(method("GET")) + .and(path( + "/api/v4/projects/100/merge_requests/101/closes_issues", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([ + { + "id": 42000, + "iid": 42, + "project_id": 100, + "title": "Test issue 42", + "state": "opened", + "web_url": "https://example.com/group/repo/-/issues/42" + } + ]))) + .mount(&mock_server) + .await; + + // Mock empty diffs + Mock::given(method("GET")) + .and(path("/api/v4/projects/100/merge_requests/101/diffs")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + let client = GitLabClient::new(&mock_server.uri(), "test-token", None); + + let result = fetch_dependents_for_mr(&client, &conn, 1, 100, 101, mr_local_id, &config) + .await + .unwrap(); + + assert_eq!(result.closes_issues_stored, 1); +} diff --git a/src/main.rs b/src/main.rs index 628b1ad..12aae07 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,14 +26,14 @@ use lore::cli::commands::{ run_cron_status, run_cron_uninstall, run_doctor, run_drift, run_embed, run_file_history, run_generate_docs, run_ingest, run_ingest_dry_run, run_init, run_list_issues, run_list_mrs, run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, run_timeline, - run_who, + run_token_set, run_token_show, run_who, }; use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme}; use lore::cli::robot::{RobotMeta, strip_schemas}; use lore::cli::{ Cli, Commands, CountArgs, CronAction, CronArgs, EmbedArgs, FileHistoryArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs, NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, - TraceArgs, WhoArgs, + TokenAction, TokenArgs, TraceArgs, WhoArgs, }; use lore::core::db::{ LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations, @@ -207,6 +207,7 @@ async fn main() { } Some(Commands::Trace(args)) => handle_trace(cli.config.as_deref(), args, robot_mode), Some(Commands::Cron(args)) => handle_cron(cli.config.as_deref(), args, robot_mode), + Some(Commands::Token(args)) => handle_token(cli.config.as_deref(), args, robot_mode).await, Some(Commands::Drift { entity_type, iid, @@ -2154,6 +2155,14 @@ async fn handle_sync_cmd( ) -> Result<(), Box<dyn std::error::Error>> { let dry_run = args.dry_run && !args.no_dry_run; + // Dedup and sort IIDs + let mut issue_iids = args.issue; + let mut mr_iids = args.mr; + issue_iids.sort_unstable(); + issue_iids.dedup(); + mr_iids.sort_unstable(); + mr_iids.dedup(); + let mut config = Config::load(config_override)?; if args.no_events { config.sync.fetch_resource_events = false; @@ -2172,10 +2181,56 @@ async fn handle_sync_cmd( no_events: args.no_events, robot_mode, dry_run, + issue_iids, + mr_iids, + project: args.project, + preflight_only: args.preflight_only, }; - // For dry run, skip recording and just show the preview - if dry_run { + // Validation: preflight_only requires surgical mode + if options.preflight_only && !options.is_surgical() { + return Err("--preflight-only requires --issue or --mr".into()); + } + + // Validation: full + surgical are incompatible + if options.full && options.is_surgical() { + return Err("--full and --issue/--mr are incompatible".into()); + } + + // Validation: surgical mode requires a project (via -p or config defaultProject) + if options.is_surgical() + && config + .effective_project(options.project.as_deref()) + .is_none() + { + return Err("--issue/--mr requires -p/--project (or set defaultProject in config)".into()); + } + + // Validation: hard cap on total surgical targets + let total_targets = options.issue_iids.len() + options.mr_iids.len(); + if total_targets > SyncOptions::MAX_SURGICAL_TARGETS { + return Err(format!( + "Too many surgical targets ({total_targets}); maximum is {}", + SyncOptions::MAX_SURGICAL_TARGETS + ) + .into()); + } + + // Surgical + dry-run → treat as preflight-only + let mut options = options; + if dry_run && options.is_surgical() { + options.preflight_only = true; + } + + // Resolve effective project for surgical mode: when -p is not passed but + // defaultProject is set in config, populate options.project so the surgical + // orchestrator receives the resolved project path. + if options.is_surgical() && options.project.is_none() { + options.project = config.default_project.clone(); + } + + // For non-surgical dry run, skip recording and just show the preview + if dry_run && !options.is_surgical() { let signal = ShutdownSignal::new(); run_sync(&config, options, None, &signal).await?; return Ok(()); @@ -2199,6 +2254,34 @@ async fn handle_sync_cmd( None }; + // Surgical mode: run_sync_surgical manages its own recorder, signal, and recording. + // Skip the normal recorder setup and let the dispatch handle everything. + if options.is_surgical() { + let signal = ShutdownSignal::new(); + let signal_for_handler = signal.clone(); + tokio::spawn(async move { + let _ = tokio::signal::ctrl_c().await; + eprintln!("\nInterrupted, finishing current batch... (Ctrl+C again to force quit)"); + signal_for_handler.cancel(); + let _ = tokio::signal::ctrl_c().await; + std::process::exit(130); + }); + + let start = std::time::Instant::now(); + match run_sync(&config, options, None, &signal).await { + Ok(result) => { + let elapsed = start.elapsed(); + if robot_mode { + print_sync_json(&result, elapsed.as_millis() as u64, Some(metrics)); + } else { + print_sync(&result, elapsed, Some(metrics), args.timings); + } + return Ok(()); + } + Err(e) => return Err(e.into()), + } + } + let db_path = get_db_path(config.storage.db_path.as_deref()); let recorder_conn = create_connection(&db_path)?; let run_id = uuid::Uuid::new_v4().simple().to_string(); @@ -2287,6 +2370,29 @@ fn handle_cron( } else { print_cron_install(&result); } + // Warn if no stored token — cron runs in a minimal shell with no env vars + if let Ok(config) = Config::load(config_override) + && config + .gitlab + .token + .as_ref() + .is_none_or(|t| t.trim().is_empty()) + { + if robot_mode { + eprintln!( + "{{\"warning\":\"No stored token found. Cron sync requires a stored token. Run: lore token set\"}}" + ); + } else { + eprintln!(); + eprintln!( + " {} No stored token found. Cron sync requires a stored token.", + lore::cli::render::Theme::warning() + .render(lore::cli::render::Icons::warning()), + ); + eprintln!(" Run: lore token set"); + eprintln!(); + } + } } CronAction::Uninstall => { let result = run_cron_uninstall()?; @@ -2312,6 +2418,74 @@ fn handle_cron( Ok(()) } +async fn handle_token( + config_override: Option<&str>, + args: TokenArgs, + robot_mode: bool, +) -> Result<(), Box<dyn std::error::Error>> { + let start = std::time::Instant::now(); + + match args.action { + TokenAction::Set { token } => { + let result = run_token_set(config_override, token).await?; + let elapsed_ms = start.elapsed().as_millis() as u64; + if robot_mode { + let output = serde_json::json!({ + "ok": true, + "data": { + "action": "set", + "username": result.username, + "config_path": result.config_path, + }, + "meta": { "elapsed_ms": elapsed_ms }, + }); + println!("{}", serde_json::to_string(&output)?); + } else { + println!( + " {} Token stored and validated (authenticated as @{})", + lore::cli::render::Theme::success().render(lore::cli::render::Icons::success()), + result.username + ); + println!( + " {} {}", + lore::cli::render::Theme::dim().render("config:"), + result.config_path + ); + println!(); + } + } + TokenAction::Show { unmask } => { + let result = run_token_show(config_override, unmask)?; + let elapsed_ms = start.elapsed().as_millis() as u64; + if robot_mode { + let output = serde_json::json!({ + "ok": true, + "data": { + "token": result.token, + "source": result.source, + }, + "meta": { "elapsed_ms": elapsed_ms }, + }); + println!("{}", serde_json::to_string(&output)?); + } else { + println!( + " {} {}", + lore::cli::render::Theme::dim().render("token:"), + result.token + ); + println!( + " {} {}", + lore::cli::render::Theme::dim().render("source:"), + result.source + ); + println!(); + } + } + } + + Ok(()) +} + #[derive(Serialize)] struct HealthOutput { ok: bool, @@ -2513,13 +2687,31 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e } }, "sync": { - "description": "Full sync pipeline: ingest -> generate-docs -> embed", - "flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--dry-run", "--no-dry-run"], + "description": "Full sync pipeline: ingest -> generate-docs -> embed. Supports surgical per-IID mode.", + "flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--dry-run", "--no-dry-run", "--issue <IID>", "--mr <IID>", "-p/--project <path>", "--preflight-only"], "example": "lore --robot sync", + "surgical_mode": { + "description": "Sync specific issues or MRs by IID. Runs a scoped pipeline: preflight -> TOCTOU check -> ingest -> dependents -> docs -> embed.", + "flags": ["--issue <IID> (repeatable)", "--mr <IID> (repeatable)", "-p/--project <path> (required)", "--preflight-only"], + "examples": [ + "lore --robot sync --issue 7 -p group/project", + "lore --robot sync --issue 7 --issue 42 --mr 10 -p group/project", + "lore --robot sync --issue 7 -p group/project --preflight-only" + ], + "constraints": ["--issue/--mr requires -p/--project (or defaultProject in config)", "--full and --issue/--mr are incompatible", "--preflight-only requires --issue or --mr", "Max 100 total targets"], + "entity_result_outcomes": ["synced", "skipped_stale", "not_found", "preflight_failed", "error"] + }, "response_schema": { - "ok": "bool", - "data": {"issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "resource_events_synced": "int", "resource_events_failed": "int"}, - "meta": {"elapsed_ms": "int", "stages?": "[{name:string, elapsed_ms:int, items_processed:int}]"} + "normal": { + "ok": "bool", + "data": {"issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "resource_events_synced": "int", "resource_events_failed": "int"}, + "meta": {"elapsed_ms": "int", "stages?": "[{name:string, elapsed_ms:int, items_processed:int}]"} + }, + "surgical": { + "ok": "bool", + "data": {"surgical_mode": "true", "surgical_iids": "{issues:[int], merge_requests:[int]}", "entity_results": "[{entity_type:string, iid:int, outcome:string, error?:string, toctou_reason?:string}]", "preflight_only?": "bool", "issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "discussions_fetched": "int"}, + "meta": {"elapsed_ms": "int"} + } } }, "issues": { @@ -2821,6 +3013,11 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e "lore --robot who --active --since 7d", "lore --robot who --overlap src/path/", "lore --robot who --path README.md" + ], + "surgical_sync": [ + "lore --robot sync --issue 7 -p group/project", + "lore --robot sync --issue 7 --mr 10 -p group/project", + "lore --robot sync --issue 7 -p group/project --preflight-only" ] });