diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 4b4be71..14322cc 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -189,7 +189,7 @@ {"id":"bd-32mc","title":"OBSERV: Implement log retention cleanup at startup","description":"## Background\nLog files accumulate at ~1-10 MB/day. Without cleanup, they grow unbounded. Retention runs BEFORE subscriber init so deleted file handles aren't held open by the appender.\n\n## Approach\nAdd a cleanup function, called from main.rs before the subscriber is initialized (before current line 44):\n\n```rust\n/// Delete log files older than retention_days.\n/// Matches files named lore.YYYY-MM-DD.log in the log directory.\npub fn cleanup_old_logs(log_dir: &Path, retention_days: u32) -> std::io::Result {\n if retention_days == 0 {\n return Ok(0); // 0 means file logging disabled, don't delete\n }\n let cutoff = SystemTime::now() - Duration::from_secs(u64::from(retention_days) * 86400);\n let mut deleted = 0;\n\n for entry in std::fs::read_dir(log_dir)? {\n let entry = entry?;\n let name = entry.file_name();\n let name_str = name.to_string_lossy();\n\n // Only match lore.YYYY-MM-DD.log pattern\n if !name_str.starts_with(\"lore.\") || !name_str.ends_with(\".log\") {\n continue;\n }\n\n if let Ok(metadata) = entry.metadata() {\n if let Ok(modified) = metadata.modified() {\n if modified < cutoff {\n std::fs::remove_file(entry.path())?;\n deleted += 1;\n }\n }\n }\n }\n Ok(deleted)\n}\n```\n\nPlace this function in src/core/paths.rs (next to get_log_dir) or a new src/core/log_retention.rs. Prefer paths.rs since it's small and related.\n\nCall from main.rs:\n```rust\nlet log_dir = get_log_dir(config.logging.log_dir.as_deref());\nlet _ = cleanup_old_logs(&log_dir, config.logging.retention_days);\n// THEN init subscriber\n```\n\nNote: Config must be loaded before cleanup runs. Current main.rs parses Cli at line 60, but config loading happens inside command handlers. This means we need to either:\n A) Load config early in main() before subscriber init (preferred)\n B) Defer cleanup to after config load\n\nSince the subscriber must also know log_dir, approach A is natural: load config -> cleanup -> init subscriber -> dispatch command.\n\n## Acceptance Criteria\n- [ ] Files matching lore.*.log older than retention_days are deleted\n- [ ] Files matching lore.*.log within retention_days are preserved\n- [ ] Non-matching files (e.g., other.txt) are never deleted\n- [ ] retention_days=0 skips cleanup entirely (no files deleted)\n- [ ] Errors on individual files don't prevent cleanup of remaining files\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/paths.rs (add cleanup_old_logs function)\n- src/main.rs (call cleanup before subscriber init)\n\n## TDD Loop\nRED:\n - test_log_retention_cleanup: create tempdir with lore.2026-01-01.log through lore.2026-02-04.log, run with retention_days=7, assert old deleted, recent preserved\n - test_log_retention_ignores_non_log_files: create other.txt alongside old log files, assert other.txt untouched\n - test_log_retention_zero_days: retention_days=0, assert nothing deleted\nGREEN: Implement cleanup_old_logs\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- SystemTime::now() precision varies by OS; use file modified time, not name parsing (simpler and more reliable)\n- read_dir on non-existent directory: get_log_dir creates it first, so this shouldn't happen. But handle gracefully.\n- Permissions error on individual file: log a warning, continue with remaining files (don't propagate)\n- Race condition: another process creates a file during cleanup. Not a concern -- we only delete old files.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-04T15:53:55.627901Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:15:04.452086Z","closed_at":"2026-02-04T17:15:04.452039Z","close_reason":"Implemented cleanup_old_logs() with date-pattern matching and retention_days config, runs at startup before subscriber init","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-32mc","depends_on_id":"bd-17n","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-32mc","depends_on_id":"bd-1k4","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-32mc","depends_on_id":"bd-2nx","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-32q","title":"Implement timeline seed phase: FTS5 keyword search to entity IDs","description":"## Background\n\nThe seed phase is steps 1-2 of the timeline pipeline (spec Section 3.2): SEED + HYDRATE. It converts a keyword query into entity IDs via FTS5 search and collects evidence note candidates.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 steps 1-2.\n\n## Codebase Context\n\n- FTS5 index exists: documents_fts table (migration 008)\n- documents table: id, source_type ('issue'|'merge_request'|'discussion'), source_id, project_id, created_at, content\n- discussions table: id, issue_id, merge_request_id\n- notes table: discussion_id, author_username, body, created_at, is_system, id (note_id)\n- Safe FTS query builder: src/search/fts.rs has to_fts_query(raw, FtsQueryMode::Safe) for sanitizing user input\n- projects table: path_with_namespace\n- issues/merge_requests: iid, project_id\n\n## Approach\n\nCreate `src/core/timeline_seed.rs`:\n\n```rust\nuse crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType};\nuse rusqlite::Connection;\n\npub struct SeedResult {\n pub seed_entities: Vec,\n pub evidence_notes: Vec, // NoteEvidence events\n}\n\npub fn seed_timeline(\n conn: &Connection,\n query: &str,\n project_id: Option,\n since_ms: Option,\n max_seeds: usize, // default 50\n) -> Result { ... }\n```\n\n### SQL for SEED + HYDRATE (entity discovery):\n```sql\nSELECT DISTINCT d.source_type, d.source_id, d.project_id,\n CASE d.source_type\n WHEN 'issue' THEN (SELECT iid FROM issues WHERE id = d.source_id)\n WHEN 'merge_request' THEN (SELECT iid FROM merge_requests WHERE id = d.source_id)\n WHEN 'discussion' THEN NULL -- discussions map to parent entity below\n END AS iid,\n CASE d.source_type\n WHEN 'issue' THEN (SELECT p.path_with_namespace FROM projects p JOIN issues i ON i.project_id = p.id WHERE i.id = d.source_id)\n WHEN 'merge_request' THEN (SELECT p.path_with_namespace FROM projects p JOIN merge_requests m ON m.project_id = p.id WHERE m.id = d.source_id)\n WHEN 'discussion' THEN NULL\n END AS project_path\nFROM documents_fts fts\nJOIN documents d ON d.id = fts.rowid\nWHERE documents_fts MATCH ?1\n AND (?2 IS NULL OR d.project_id = ?2)\nORDER BY rank\nLIMIT ?3\n```\n\nFor 'discussion' source_type: resolve to parent entity via discussions.issue_id or discussions.merge_request_id.\n\n### SQL for evidence notes (top 10 FTS5-matched notes):\n```sql\nSELECT n.id as note_id, n.body, n.created_at, n.author_username,\n disc.id as discussion_id,\n CASE WHEN disc.issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END as parent_type,\n COALESCE(disc.issue_id, disc.merge_request_id) AS parent_entity_id\nFROM documents_fts fts\nJOIN documents d ON d.id = fts.rowid\nJOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'\nJOIN notes n ON n.discussion_id = disc.id AND n.is_system = 0\nWHERE documents_fts MATCH ?1\nORDER BY rank\nLIMIT 10\n```\n\nEvidence notes become TimelineEvent with:\n- event_type: NoteEvidence { note_id, snippet (first 200 chars), discussion_id }\n- Use to_fts_query(query, FtsQueryMode::Safe) to sanitize user input before MATCH\n\nRegister in `src/core/mod.rs`: `pub mod timeline_seed;`\n\n## Acceptance Criteria\n\n- [ ] seed_timeline() returns entities from FTS5 search\n- [ ] Entities deduplicated (same entity from multiple docs appears once)\n- [ ] Discussion documents resolved to parent entity (issue or MR)\n- [ ] Evidence notes capped at 10\n- [ ] Evidence note snippets truncated to 200 chars (safe UTF-8 boundary)\n- [ ] Uses to_fts_query(query, FtsQueryMode::Safe) for input sanitization\n- [ ] --since filter works\n- [ ] -p filter works\n- [ ] Empty result for zero-match queries (not error)\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_seed.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_seed;`)\n\n## TDD Loop\n\nRED:\n- `test_seed_deduplicates_entities`\n- `test_seed_resolves_discussion_to_parent`\n- `test_seed_empty_query_returns_empty`\n- `test_seed_evidence_capped_at_10`\n- `test_seed_evidence_snippet_truncated`\n- `test_seed_respects_since_filter`\n\nTests need in-memory DB with migrations 001-014 + documents/FTS test data.\n\nGREEN: Implement FTS5 queries and deduplication.\n\nVERIFY: `cargo test --lib -- timeline_seed`\n\n## Edge Cases\n\n- FTS5 MATCH invalid syntax: to_fts_query(query, FtsQueryMode::Safe) sanitizes\n- Discussion orphans: LEFT JOIN handles deleted notes\n- UTF-8 truncation: use char_indices() to find safe 200-char boundary\n- Discussion source resolving to both issue_id and merge_request_id: prefer issue_id (shouldn't happen but be defensive)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.615908Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:47:07.966488Z","closed_at":"2026-02-05T21:47:07.966437Z","close_reason":"Completed: Created src/core/timeline_seed.rs with seed_timeline() function. FTS5 search to entity IDs with discussion-to-parent resolution, entity deduplication, evidence note extraction (capped, snippet-truncated). 12 tests pass. All quality gates pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-32q","depends_on_id":"bd-20e","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-32q","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-335","title":"Implement Ollama API client","description":"## Background\nThe Ollama API client provides the HTTP interface to the local Ollama embedding server. It handles health checks (is Ollama running? does the model exist?), batch embedding requests (up to 32 texts per call), and error translation to LoreError variants. This is the lowest-level embedding component — the pipeline (bd-am7) builds on top of it.\n\n## Approach\nCreate \\`src/embedding/ollama.rs\\` per PRD Section 4.2. **Uses async reqwest (not blocking).**\n\n```rust\nuse reqwest::Client; // NOTE: async Client, not reqwest::blocking\nuse serde::{Deserialize, Serialize};\nuse crate::core::error::{LoreError, Result};\n\npub struct OllamaConfig {\n pub base_url: String, // default \\\"http://localhost:11434\\\"\n pub model: String, // default \\\"nomic-embed-text\\\"\n pub timeout_secs: u64, // default 60\n}\n\nimpl Default for OllamaConfig { /* PRD defaults */ }\n\npub struct OllamaClient {\n client: Client, // async reqwest::Client\n config: OllamaConfig,\n}\n\n#[derive(Serialize)]\nstruct EmbedRequest { model: String, input: Vec }\n\n#[derive(Deserialize)]\nstruct EmbedResponse { model: String, embeddings: Vec> }\n\n#[derive(Deserialize)]\nstruct TagsResponse { models: Vec }\n\n#[derive(Deserialize)]\nstruct ModelInfo { name: String }\n\nimpl OllamaClient {\n pub fn new(config: OllamaConfig) -> Self;\n\n /// Async health check: GET /api/tags\n /// Model matched via starts_with (\\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\")\n pub async fn health_check(&self) -> Result<()>;\n\n /// Async batch embedding: POST /api/embed\n /// Input: Vec of texts, Response: Vec> of 768-dim embeddings\n pub async fn embed_batch(&self, texts: Vec) -> Result>>;\n}\n\n/// Quick health check without full client (async).\npub async fn check_ollama_health(base_url: &str) -> bool;\n```\n\n**Error mapping (per PRD):**\n- Connection refused/timeout -> LoreError::OllamaUnavailable { base_url, source: Some(e) }\n- Model not in /api/tags -> LoreError::OllamaModelNotFound { model }\n- Non-200 from /api/embed -> LoreError::EmbeddingFailed { document_id: 0, reason: format!(\\\"HTTP {}: {}\\\", status, body) }\n\n**Key PRD detail:** Model matching uses \\`starts_with\\` (not exact match) so \\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\".\n\n## Acceptance Criteria\n- [ ] Uses async reqwest::Client (not blocking)\n- [ ] health_check() is async, detects server availability and model presence\n- [ ] Model matched via starts_with (handles \\\":latest\\\" suffix)\n- [ ] embed_batch() is async, sends POST /api/embed\n- [ ] Batch size up to 32 texts\n- [ ] Returns Vec> with 768 dimensions each\n- [ ] OllamaUnavailable error includes base_url and source error\n- [ ] OllamaModelNotFound error includes model name\n- [ ] Non-200 response mapped to EmbeddingFailed with status + body\n- [ ] Timeout: 60 seconds default (configurable via OllamaConfig)\n- [ ] \\`cargo build\\` succeeds\n\n## Files\n- \\`src/embedding/ollama.rs\\` — new file\n- \\`src/embedding/mod.rs\\` — add \\`pub mod ollama;\\` and re-exports\n\n## TDD Loop\nRED: Tests (unit tests with mock, integration needs Ollama):\n- \\`test_config_defaults\\` — verify default base_url, model, timeout\n- \\`test_health_check_model_starts_with\\` — \\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\"\n- \\`test_embed_batch_parse\\` — mock response parsed correctly\n- \\`test_connection_error_maps_to_ollama_unavailable\\`\nGREEN: Implement OllamaClient\nVERIFY: \\`cargo test ollama\\`\n\n## Edge Cases\n- Ollama returns model name with version tag (\\\"nomic-embed-text:latest\\\"): starts_with handles this\n- Empty texts array: send empty batch, Ollama returns empty embeddings\n- Ollama returns wrong number of embeddings (2 texts, 1 embedding): caller (pipeline) validates\n- Non-JSON response: reqwest deserialization error -> wrap appropriately","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:26:34.025099Z","created_by":"tayloreernisse","updated_at":"2026-01-30T16:58:17.546852Z","closed_at":"2026-01-30T16:58:17.546794Z","close_reason":"Completed: OllamaClient with async health_check (starts_with model matching), embed_batch, error mapping to LoreError variants, check_ollama_health helper, 4 tests pass","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-335","depends_on_id":"bd-ljf","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} -{"id":"bd-343o","title":"Fetch and store GitLab linked issues (Related to)","description":"## Background\n\nGitLab's \"Linked items\" provides bidirectional issue linking distinct from \"closes\" and \"mentioned\" references. This data is only available via the issue links API (GET /projects/:id/issues/:iid/links). The goal is to fetch these links during sync and store them as entity_references so they appear in `lore show issue` and are queryable.\n\n**Why:** Currently `lore show issue` displays closing MRs (via `get_closing_mrs()` in show.rs:~line 1544) but has NO related issues section. This bead adds that capability.\n\n## Codebase Context\n\n- **entity_references table** (migration 011): reference_type CHECK: 'closes' | 'mentioned' | 'related'; source_method CHECK: 'api' | 'note_parse' | 'description_parse'\n- **pending_dependent_fetches** (migration 011): job_type CHECK: 'resource_events' | 'mr_closes_issues' | 'mr_diffs'. No later migrations modified this table.\n- **CRITICAL:** Adding 'issue_links' to job_type CHECK requires recreating pending_dependent_fetches table (SQLite can't ALTER CHECK constraints). Migration **027** must copy data, drop, recreate with expanded CHECK, and reinsert.\n- **Orchestrator** (src/ingestion/orchestrator.rs, 1745 lines): Three drain functions exist — drain_resource_events() (line 932), drain_mr_closes_issues() (line 1254), drain_mr_diffs() (line 1514). Follow the same claim/complete/fail pattern from dependent_queue.rs.\n- **dependent_queue.rs**: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- **show.rs** (1544 lines): Has get_closing_mrs() for closing MR display. NO related_issues section exists yet.\n- **GitLab API**: GET /projects/:id/issues/:iid/links returns link_type: \"relates_to\", \"blocks\", \"is_blocked_by\"\n- **Migration count**: 26 migrations exist (001-026). Next migration = **027**.\n\n## Approach\n\n### Phase 1: API Client (src/gitlab/client.rs)\n```rust\npub async fn fetch_issue_links(\n &self,\n project_id: i64,\n issue_iid: i64,\n) -> Result> {\n // GET /projects/:id/issues/:iid/links\n // Use fetch_all_pages() + coalesce_not_found()\n}\n```\n\n### Phase 2: Types (src/gitlab/types.rs)\n```rust\n#[derive(Debug, Deserialize)]\npub struct GitLabIssueLink {\n pub id: i64,\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub web_url: String,\n pub link_type: String, // \"relates_to\", \"blocks\", \"is_blocked_by\"\n pub link_created_at: Option,\n}\n```\n\n### Phase 3: Migration 027 (migrations/027_issue_links_job_type.sql)\nRecreate pending_dependent_fetches with expanded CHECK:\n```sql\nCREATE TABLE pending_dependent_fetches_new (\n id INTEGER PRIMARY KEY,\n project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,\n entity_type TEXT NOT NULL CHECK (entity_type IN ('issue', 'merge_request')),\n entity_iid INTEGER NOT NULL,\n entity_local_id INTEGER NOT NULL,\n job_type TEXT NOT NULL CHECK (job_type IN (\n 'resource_events', 'mr_closes_issues', 'mr_diffs', 'issue_links'\n )),\n payload_json TEXT,\n enqueued_at INTEGER NOT NULL,\n attempts INTEGER NOT NULL DEFAULT 0,\n last_error TEXT,\n next_retry_at INTEGER,\n locked_at INTEGER,\n UNIQUE(project_id, entity_type, entity_iid, job_type)\n);\nINSERT INTO pending_dependent_fetches_new SELECT * FROM pending_dependent_fetches;\nDROP TABLE pending_dependent_fetches;\nALTER TABLE pending_dependent_fetches_new RENAME TO pending_dependent_fetches;\n-- Recreate indexes from migration 011 (idx_pdf_job_type, idx_pdf_next_retry)\n```\n\nRegister in MIGRATIONS array in src/core/db.rs (entry 27).\n\n### Phase 4: Ingestion (src/ingestion/issue_links.rs NEW)\n```rust\npub async fn fetch_and_store_issue_links(\n conn: &Connection,\n client: &GitLabClient,\n project_id: i64,\n issue_local_id: i64,\n issue_iid: i64,\n) -> Result {\n // 1. Fetch links from API\n // 2. Resolve target issue to local DB id (SELECT id FROM issues WHERE project_id=? AND iid=?)\n // 3. Insert into entity_references: reference_type='related', source_method='api'\n // 4. Create bidirectional refs: A->B and B->A\n // 5. Skip self-links\n // 6. Cross-project: store with target_entity_id=NULL (unresolved)\n}\n```\n\n### Phase 5: Queue Integration (src/ingestion/orchestrator.rs)\n- Enqueue 'issue_links' job after issue ingestion (near the existing resource_events enqueue)\n- Add drain_issue_links() following drain_mr_closes_issues() pattern (lines 1254-1512)\n- Config gate: add `sync.fetchIssueLinks` (default true) to config, like existing `sync.fetchResourceEvents`\n\n### Phase 6: Display (src/cli/commands/show.rs)\nIn `lore show issue 123`, add \"Related Issues\" section after closing MRs.\nPattern: query entity_references WHERE source_entity_type='issue' AND source_entity_id= AND reference_type='related'.\n\n## Acceptance Criteria\n\n- [ ] API client fetches issue links with pagination (fetch_all_pages + coalesce_not_found)\n- [ ] Stored as entity_reference: reference_type='related', source_method='api'\n- [ ] Bidirectional: A links B creates both A->B and B->A references\n- [ ] link_type captured (relates_to, blocks, is_blocked_by) — stored as 'related' for now\n- [ ] Cross-project links stored as unresolved (target_entity_id NULL)\n- [ ] Self-links skipped\n- [ ] Migration **027** recreates pending_dependent_fetches with 'issue_links' in CHECK\n- [ ] Migration registered in MIGRATIONS array in src/core/db.rs\n- [ ] `lore show issue 123` shows related issues section\n- [ ] `lore --robot show issue 123` includes related_issues in JSON\n- [ ] Config gate: sync.fetchIssueLinks (default true, camelCase serde rename)\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n- [ ] `cargo fmt --check` passes\n\n## Files\n\n- MODIFY: src/gitlab/client.rs (add fetch_issue_links)\n- MODIFY: src/gitlab/types.rs (add GitLabIssueLink)\n- CREATE: src/ingestion/issue_links.rs\n- MODIFY: src/ingestion/mod.rs (add pub mod issue_links)\n- MODIFY: src/ingestion/orchestrator.rs (enqueue + drain_issue_links)\n- CREATE: migrations/027_issue_links_job_type.sql\n- MODIFY: src/core/db.rs (add migration 027 to MIGRATIONS array)\n- MODIFY: src/core/config.rs (add sync.fetchIssueLinks)\n- MODIFY: src/cli/commands/show.rs (display related issues)\n\n## TDD Anchor\n\nRED:\n- test_issue_link_deserialization (types.rs: deserialize GitLabIssueLink from JSON)\n- test_store_issue_links_creates_bidirectional_references (in-memory DB, insert 2 issues, store link, verify 2 rows in entity_references)\n- test_self_link_skipped (same issue_iid both sides, verify 0 rows)\n- test_cross_project_link_unresolved (target not in DB, verify target_entity_id IS NULL)\n\nGREEN: Implement API client, ingestion, migration, display.\n\nVERIFY: cargo test --lib -- issue_links\n\n## Edge Cases\n\n- Cross-project links: target not in local DB -> unresolved reference (target_entity_id NULL)\n- Self-links: skip entirely\n- UNIQUE constraint on entity_references prevents duplicate refs on re-sync\n- \"blocks\"/\"is_blocked_by\" semantics not modeled in entity_references yet — store as 'related'\n- Table recreation migration: safe because pending_dependent_fetches is transient queue data that gets re-enqueued on next sync\n- Recreated table must restore indexes: idx_pdf_job_type, idx_pdf_next_retry (check migration 011 for exact definitions)\n\n## Dependency Context\n\n- **entity_references** (migration 011): provides the target table. reference_type='related' already in CHECK.\n- **dependent_queue.rs**: provides enqueue_job/claim_jobs/complete_job/fail_job lifecycle used by drain_issue_links()\n- **orchestrator drain pattern**: drain_mr_closes_issues() (line 1254) is the closest template — fetch API data, insert entity_references, complete job","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-05T15:14:25.202900Z","created_by":"tayloreernisse","updated_at":"2026-02-17T16:50:44.934373Z","compaction_level":0,"original_size":0,"labels":["ISSUE"]} +{"id":"bd-343o","title":"Fetch and store GitLab linked issues (Related to)","description":"## Background\n\nGitLab's \"Linked items\" provides bidirectional issue linking distinct from \"closes\" and \"mentioned\" references. This data is only available via the issue links API (GET /projects/:id/issues/:iid/links). The goal is to fetch these links during sync and store them as entity_references so they appear in `lore show issue` and are queryable.\n\n**Why:** Currently `lore show issue` displays closing MRs (via `get_closing_mrs()` in show.rs:~line 1544) but has NO related issues section. This bead adds that capability.\n\n## Codebase Context\n\n- **entity_references table** (migration 011): reference_type CHECK: 'closes' | 'mentioned' | 'related'; source_method CHECK: 'api' | 'note_parse' | 'description_parse'\n- **pending_dependent_fetches** (migration 011): job_type CHECK: 'resource_events' | 'mr_closes_issues' | 'mr_diffs'. No later migrations modified this table.\n- **CRITICAL:** Adding 'issue_links' to job_type CHECK requires recreating pending_dependent_fetches table (SQLite can't ALTER CHECK constraints). Migration **027** must copy data, drop, recreate with expanded CHECK, and reinsert.\n- **Orchestrator** (src/ingestion/orchestrator.rs, 1745 lines): Three drain functions exist — drain_resource_events() (line 932), drain_mr_closes_issues() (line 1254), drain_mr_diffs() (line 1514). Follow the same claim/complete/fail pattern from dependent_queue.rs.\n- **dependent_queue.rs**: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- **show.rs** (1544 lines): Has get_closing_mrs() for closing MR display. NO related_issues section exists yet.\n- **GitLab API**: GET /projects/:id/issues/:iid/links returns link_type: \"relates_to\", \"blocks\", \"is_blocked_by\"\n- **Migration count**: 26 migrations exist (001-026). Next migration = **027**.\n\n## Approach\n\n### Phase 1: API Client (src/gitlab/client.rs)\n```rust\npub async fn fetch_issue_links(\n &self,\n project_id: i64,\n issue_iid: i64,\n) -> Result> {\n // GET /projects/:id/issues/:iid/links\n // Use fetch_all_pages() + coalesce_not_found()\n}\n```\n\n### Phase 2: Types (src/gitlab/types.rs)\n```rust\n#[derive(Debug, Deserialize)]\npub struct GitLabIssueLink {\n pub id: i64,\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub web_url: String,\n pub link_type: String, // \"relates_to\", \"blocks\", \"is_blocked_by\"\n pub link_created_at: Option,\n}\n```\n\n### Phase 3: Migration 027 (migrations/027_issue_links_job_type.sql)\nRecreate pending_dependent_fetches with expanded CHECK:\n```sql\nCREATE TABLE pending_dependent_fetches_new (\n id INTEGER PRIMARY KEY,\n project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,\n entity_type TEXT NOT NULL CHECK (entity_type IN ('issue', 'merge_request')),\n entity_iid INTEGER NOT NULL,\n entity_local_id INTEGER NOT NULL,\n job_type TEXT NOT NULL CHECK (job_type IN (\n 'resource_events', 'mr_closes_issues', 'mr_diffs', 'issue_links'\n )),\n payload_json TEXT,\n enqueued_at INTEGER NOT NULL,\n attempts INTEGER NOT NULL DEFAULT 0,\n last_error TEXT,\n next_retry_at INTEGER,\n locked_at INTEGER,\n UNIQUE(project_id, entity_type, entity_iid, job_type)\n);\nINSERT INTO pending_dependent_fetches_new SELECT * FROM pending_dependent_fetches;\nDROP TABLE pending_dependent_fetches;\nALTER TABLE pending_dependent_fetches_new RENAME TO pending_dependent_fetches;\n-- Recreate indexes from migration 011 (idx_pdf_job_type, idx_pdf_next_retry)\n```\n\nRegister in MIGRATIONS array in src/core/db.rs (entry 27).\n\n### Phase 4: Ingestion (src/ingestion/issue_links.rs NEW)\n```rust\npub async fn fetch_and_store_issue_links(\n conn: &Connection,\n client: &GitLabClient,\n project_id: i64,\n issue_local_id: i64,\n issue_iid: i64,\n) -> Result {\n // 1. Fetch links from API\n // 2. Resolve target issue to local DB id (SELECT id FROM issues WHERE project_id=? AND iid=?)\n // 3. Insert into entity_references: reference_type='related', source_method='api'\n // 4. Create bidirectional refs: A->B and B->A\n // 5. Skip self-links\n // 6. Cross-project: store with target_entity_id=NULL (unresolved)\n}\n```\n\n### Phase 5: Queue Integration (src/ingestion/orchestrator.rs)\n- Enqueue 'issue_links' job after issue ingestion (near the existing resource_events enqueue)\n- Add drain_issue_links() following drain_mr_closes_issues() pattern (lines 1254-1512)\n- Config gate: add `sync.fetchIssueLinks` (default true) to config, like existing `sync.fetchResourceEvents`\n\n### Phase 6: Display (src/cli/commands/show.rs)\nIn `lore show issue 123`, add \"Related Issues\" section after closing MRs.\nPattern: query entity_references WHERE source_entity_type='issue' AND source_entity_id= AND reference_type='related'.\n\n## Acceptance Criteria\n\n- [ ] API client fetches issue links with pagination (fetch_all_pages + coalesce_not_found)\n- [ ] Stored as entity_reference: reference_type='related', source_method='api'\n- [ ] Bidirectional: A links B creates both A->B and B->A references\n- [ ] link_type captured (relates_to, blocks, is_blocked_by) — stored as 'related' for now\n- [ ] Cross-project links stored as unresolved (target_entity_id NULL)\n- [ ] Self-links skipped\n- [ ] Migration **027** recreates pending_dependent_fetches with 'issue_links' in CHECK\n- [ ] Migration registered in MIGRATIONS array in src/core/db.rs\n- [ ] `lore show issue 123` shows related issues section\n- [ ] `lore --robot show issue 123` includes related_issues in JSON\n- [ ] Config gate: sync.fetchIssueLinks (default true, camelCase serde rename)\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n- [ ] `cargo fmt --check` passes\n\n## Files\n\n- MODIFY: src/gitlab/client.rs (add fetch_issue_links)\n- MODIFY: src/gitlab/types.rs (add GitLabIssueLink)\n- CREATE: src/ingestion/issue_links.rs\n- MODIFY: src/ingestion/mod.rs (add pub mod issue_links)\n- MODIFY: src/ingestion/orchestrator.rs (enqueue + drain_issue_links)\n- CREATE: migrations/027_issue_links_job_type.sql\n- MODIFY: src/core/db.rs (add migration 027 to MIGRATIONS array)\n- MODIFY: src/core/config.rs (add sync.fetchIssueLinks)\n- MODIFY: src/cli/commands/show.rs (display related issues)\n\n## TDD Anchor\n\nRED:\n- test_issue_link_deserialization (types.rs: deserialize GitLabIssueLink from JSON)\n- test_store_issue_links_creates_bidirectional_references (in-memory DB, insert 2 issues, store link, verify 2 rows in entity_references)\n- test_self_link_skipped (same issue_iid both sides, verify 0 rows)\n- test_cross_project_link_unresolved (target not in DB, verify target_entity_id IS NULL)\n\nGREEN: Implement API client, ingestion, migration, display.\n\nVERIFY: cargo test --lib -- issue_links\n\n## Edge Cases\n\n- Cross-project links: target not in local DB -> unresolved reference (target_entity_id NULL)\n- Self-links: skip entirely\n- UNIQUE constraint on entity_references prevents duplicate refs on re-sync\n- \"blocks\"/\"is_blocked_by\" semantics not modeled in entity_references yet — store as 'related'\n- Table recreation migration: safe because pending_dependent_fetches is transient queue data that gets re-enqueued on next sync\n- Recreated table must restore indexes: idx_pdf_job_type, idx_pdf_next_retry (check migration 011 for exact definitions)\n\n## Dependency Context\n\n- **entity_references** (migration 011): provides the target table. reference_type='related' already in CHECK.\n- **dependent_queue.rs**: provides enqueue_job/claim_jobs/complete_job/fail_job lifecycle used by drain_issue_links()\n- **orchestrator drain pattern**: drain_mr_closes_issues() (line 1254) is the closest template — fetch API data, insert entity_references, complete job","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-02-05T15:14:25.202900Z","created_by":"tayloreernisse","updated_at":"2026-02-19T14:21:46.770384Z","closed_at":"2026-02-19T14:21:46.770305Z","close_reason":"Already fully implemented by another agent: API client, types, migration 029, ingestion module with store_issue_links/watermark, orchestrator enqueue+drain, show.rs display, config gate. All 6 tests pass.","compaction_level":0,"original_size":0,"labels":["ISSUE"]} {"id":"bd-34ek","title":"OBSERV: Implement MetricsLayer custom tracing subscriber layer","description":"## Background\nMetricsLayer is a custom tracing subscriber layer that records span timing and structured fields, then materializes them into Vec. This avoids threading a mutable collector through every function signature -- spans are the single source of truth.\n\n## Approach\nAdd to src/core/metrics.rs (same file as StageTiming):\n\n```rust\nuse std::collections::HashMap;\nuse std::sync::{Arc, Mutex};\nuse std::time::Instant;\nuse tracing::span::{Attributes, Id, Record};\nuse tracing::Subscriber;\nuse tracing_subscriber::layer::{Context, Layer};\nuse tracing_subscriber::registry::LookupSpan;\n\n#[derive(Debug)]\nstruct SpanData {\n name: String,\n parent_id: Option,\n start: Instant,\n fields: HashMap,\n}\n\n#[derive(Debug, Clone)]\npub struct MetricsLayer {\n spans: Arc>>,\n completed: Arc>>,\n}\n\nimpl MetricsLayer {\n pub fn new() -> Self {\n Self {\n spans: Arc::new(Mutex::new(HashMap::new())),\n completed: Arc::new(Mutex::new(Vec::new())),\n }\n }\n\n /// Extract timing tree for a completed run.\n /// Call this after the root span closes.\n pub fn extract_timings(&self) -> Vec {\n let completed = self.completed.lock().unwrap();\n // Build tree: find root entries (no parent), attach children\n // ... tree construction logic\n }\n}\n\nimpl Layer for MetricsLayer\nwhere\n S: Subscriber + for<'a> LookupSpan<'a>,\n{\n fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, ctx: Context<'_, S>) {\n let parent_id = ctx.span(id).and_then(|s| s.parent().map(|p| p.id()));\n let mut fields = HashMap::new();\n // Visit attrs to capture initial field values\n let mut visitor = FieldVisitor(&mut fields);\n attrs.record(&mut visitor);\n\n self.spans.lock().unwrap().insert(id.into_u64(), SpanData {\n name: attrs.metadata().name().to_string(),\n parent_id,\n start: Instant::now(),\n fields,\n });\n }\n\n fn on_record(&self, id: &Id, values: &Record<'_>, _ctx: Context<'_, S>) {\n // Capture recorded fields (items_processed, items_skipped, errors)\n if let Some(data) = self.spans.lock().unwrap().get_mut(&id.into_u64()) {\n let mut visitor = FieldVisitor(&mut data.fields);\n values.record(&mut visitor);\n }\n }\n\n fn on_close(&self, id: Id, _ctx: Context<'_, S>) {\n if let Some(data) = self.spans.lock().unwrap().remove(&id.into_u64()) {\n let elapsed = data.start.elapsed();\n let timing = StageTiming {\n name: data.name,\n project: data.fields.get(\"project\").and_then(|v| v.as_str()).map(String::from),\n elapsed_ms: elapsed.as_millis() as u64,\n items_processed: data.fields.get(\"items_processed\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n items_skipped: data.fields.get(\"items_skipped\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n errors: data.fields.get(\"errors\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n sub_stages: vec![], // Will be populated during extract_timings tree construction\n };\n self.completed.lock().unwrap().push((id.into_u64(), timing));\n }\n }\n}\n```\n\nNeed a FieldVisitor struct implementing tracing::field::Visit to capture field values.\n\nRegister in subscriber stack (src/main.rs), alongside stderr and file layers:\n```rust\nlet metrics_layer = MetricsLayer::new();\nlet metrics_handle = metrics_layer.clone(); // Clone Arc for later extraction\n\nregistry()\n .with(stderr_layer.with_filter(stderr_filter))\n .with(file_layer.with_filter(file_filter))\n .with(metrics_layer) // No filter -- captures all spans\n .init();\n```\n\nPass metrics_handle to command handlers so they can call extract_timings() after the pipeline completes.\n\n## Acceptance Criteria\n- [ ] MetricsLayer captures span enter/close timing\n- [ ] on_record captures items_processed, items_skipped, errors fields\n- [ ] extract_timings() returns correctly nested Vec tree\n- [ ] Parallel spans (multiple projects) both appear as sub_stages of parent\n- [ ] Thread-safe: Arc> allows concurrent span operations\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/metrics.rs (add MetricsLayer, FieldVisitor, tree construction)\n- src/main.rs (register MetricsLayer in subscriber stack)\n\n## TDD Loop\nRED:\n - test_metrics_layer_single_span: enter/exit one span, extract, assert one StageTiming\n - test_metrics_layer_nested_spans: parent + child, assert child in parent.sub_stages\n - test_metrics_layer_parallel_spans: two sibling spans, assert both in parent.sub_stages\n - test_metrics_layer_field_recording: record items_processed=42, assert captured\nGREEN: Implement MetricsLayer with on_new_span, on_record, on_close, extract_timings\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- Span ID reuse: tracing may reuse span IDs after close. Using remove on close prevents stale data.\n- Lock contention: Mutex per operation. For high-span-count scenarios, consider parking_lot::Mutex. But lore's span count is low (<100 per run), so std::sync::Mutex is fine.\n- extract_timings tree construction: iterate completed Vec, build parent->children map, then recursively construct StageTiming tree. Root entries have parent_id matching the root span or None.\n- MetricsLayer has no filter: it sees ALL spans. To avoid noise from dependency spans, check if span name starts with known stage names, or rely on the \"stage\" field being present.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-04T15:54:31.960669Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:25:25.523811Z","closed_at":"2026-02-04T17:25:25.523730Z","close_reason":"Implemented MetricsLayer custom tracing subscriber layer with span timing capture, rate-limit/retry event detection, tree extraction, and 12 unit tests","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-34ek","depends_on_id":"bd-1o4h","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-34ek","depends_on_id":"bd-24j1","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-34ek","depends_on_id":"bd-3er","type":"parent-child","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-34o","title":"Implement MR transformer","description":"## Background\nTransforms GitLab MR API responses into normalized schema for database storage. Handles deprecated field fallbacks and extracts metadata (labels, assignees, reviewers).\n\n## Approach\nCreate new transformer module following existing issue transformer pattern:\n- `NormalizedMergeRequest` - Database-ready struct\n- `MergeRequestWithMetadata` - MR + extracted labels/assignees/reviewers\n- `transform_merge_request()` - Main transformation function\n- `extract_labels()` - Label extraction helper\n\n## Files\n- `src/gitlab/transformers/merge_request.rs` - New transformer module\n- `src/gitlab/transformers/mod.rs` - Export new module\n- `tests/mr_transformer_tests.rs` - Unit tests\n\n## Acceptance Criteria\n- [ ] `NormalizedMergeRequest` struct exists with all DB columns\n- [ ] `MergeRequestWithMetadata` contains MR + label_names + assignee_usernames + reviewer_usernames\n- [ ] `transform_merge_request()` returns `Result`\n- [ ] `draft` computed as `gitlab_mr.draft || gitlab_mr.work_in_progress`\n- [ ] `detailed_merge_status` prefers `detailed_merge_status` over `merge_status_legacy`\n- [ ] `merge_user_username` prefers `merge_user` over `merged_by`\n- [ ] `head_sha` extracted from `sha` field\n- [ ] `references_short` and `references_full` extracted from `references` Option\n- [ ] Timestamps parsed with `iso_to_ms()`, errors returned (not zeroed)\n- [ ] `last_seen_at` set to `now_ms()`\n- [ ] `cargo test mr_transformer` passes\n\n## TDD Loop\nRED: `cargo test mr_transformer` -> module not found\nGREEN: Add transformer with all fields\nVERIFY: `cargo test mr_transformer`\n\n## Struct Definitions\n```rust\n#[derive(Debug, Clone)]\npub struct NormalizedMergeRequest {\n pub gitlab_id: i64,\n pub project_id: i64,\n pub iid: i64,\n pub title: String,\n pub description: Option,\n pub state: String,\n pub draft: bool,\n pub author_username: String,\n pub source_branch: String,\n pub target_branch: String,\n pub head_sha: Option,\n pub references_short: Option,\n pub references_full: Option,\n pub detailed_merge_status: Option,\n pub merge_user_username: Option,\n pub created_at: i64,\n pub updated_at: i64,\n pub merged_at: Option,\n pub closed_at: Option,\n pub last_seen_at: i64,\n pub web_url: String,\n}\n\n#[derive(Debug, Clone)]\npub struct MergeRequestWithMetadata {\n pub merge_request: NormalizedMergeRequest,\n pub label_names: Vec,\n pub assignee_usernames: Vec,\n pub reviewer_usernames: Vec,\n}\n```\n\n## Function Signature\n```rust\npub fn transform_merge_request(\n gitlab_mr: &GitLabMergeRequest,\n local_project_id: i64,\n) -> Result\n```\n\n## Key Logic\n```rust\n// Draft: prefer draft, fallback to work_in_progress\nlet is_draft = gitlab_mr.draft || gitlab_mr.work_in_progress;\n\n// Merge status: prefer detailed_merge_status\nlet detailed_merge_status = gitlab_mr.detailed_merge_status\n .clone()\n .or_else(|| gitlab_mr.merge_status_legacy.clone());\n\n// Merge user: prefer merge_user\nlet merge_user_username = gitlab_mr.merge_user\n .as_ref()\n .map(|u| u.username.clone())\n .or_else(|| gitlab_mr.merged_by.as_ref().map(|u| u.username.clone()));\n\n// References extraction\nlet (references_short, references_full) = gitlab_mr.references\n .as_ref()\n .map(|r| (Some(r.short.clone()), Some(r.full.clone())))\n .unwrap_or((None, None));\n\n// Head SHA\nlet head_sha = gitlab_mr.sha.clone();\n```\n\n## Edge Cases\n- Invalid timestamps should return `Err`, not zero values\n- Empty labels/assignees/reviewers should return empty Vecs, not None\n- `state` must pass through as-is (including \"locked\")","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:40.849049Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:11:48.501301Z","closed_at":"2026-01-27T00:11:48.501241Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-34o","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"},{"issue_id":"bd-34o","depends_on_id":"bd-5ta","type":"blocks","created_at":"2026-02-12T19:34:39Z","created_by":"import"}]} {"id":"bd-34rr","title":"WHO: Migration 017 — composite indexes for query paths","description":"## Background\n\nWith 280K notes, the path/timestamp queries for lore who will degrade without composite indexes. Existing indexes cover note_type and position_new_path separately (migration 006) but not as composites aligned to the who query patterns. This is a non-breaking, additive-only migration.\n\n## Approach\n\nAdd as entry 17 (index 16) in the MIGRATIONS array in src/core/db.rs. LATEST_SCHEMA_VERSION auto-updates via MIGRATIONS.len() as i32.\n\n### Exact SQL for the migration entry:\n\n```sql\n-- Migration 017: Composite indexes for who query paths\n\n-- Expert/Overlap: DiffNote path prefix + timestamp filter.\n-- Leading with position_new_path (not note_type) because the partial index\n-- predicate already handles the constant filter.\nCREATE INDEX IF NOT EXISTS idx_notes_diffnote_path_created\n ON notes(position_new_path, created_at, project_id)\n WHERE note_type = 'DiffNote' AND is_system = 0;\n\n-- Active/Workload: discussion participation lookups.\nCREATE INDEX IF NOT EXISTS idx_notes_discussion_author\n ON notes(discussion_id, author_username)\n WHERE is_system = 0;\n\n-- Active (project-scoped): unresolved discussions by recency.\nCREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent\n ON discussions(project_id, last_note_at)\n WHERE resolvable = 1 AND resolved = 0;\n\n-- Active (global): unresolved discussions by recency (no project scope).\n-- Without this, (project_id, last_note_at) can't satisfy ORDER BY last_note_at DESC\n-- efficiently when project_id is unconstrained.\nCREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent_global\n ON discussions(last_note_at)\n WHERE resolvable = 1 AND resolved = 0;\n\n-- Workload: issue assignees by username.\nCREATE INDEX IF NOT EXISTS idx_issue_assignees_username\n ON issue_assignees(username, issue_id);\n```\n\n### Not added (already adequate):\n- merge_requests(author_username) — idx_mrs_author (migration 006)\n- mr_reviewers(username) — idx_mr_reviewers_username (migration 006)\n- notes(discussion_id) — idx_notes_discussion (migration 002)\n\n## Files\n\n- `src/core/db.rs` — append to MIGRATIONS array as entry index 16\n\n## TDD Loop\n\nRED: `cargo test -- test_migration` (existing migration tests should still pass)\nGREEN: Add the migration SQL string to the array\nVERIFY: `cargo test && cargo check --all-targets`\n\n## Acceptance Criteria\n\n- [ ] MIGRATIONS array has 17 entries (index 0-16)\n- [ ] LATEST_SCHEMA_VERSION is 17\n- [ ] cargo test passes (in-memory DB runs all migrations including 017)\n- [ ] No existing index names conflict\n\n## Edge Cases\n\n- The SQL uses CREATE INDEX IF NOT EXISTS — safe for idempotent reruns\n- Partial indexes (WHERE clause) keep index size small: ~33K of 280K notes for DiffNote index","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-08T02:39:49.397860Z","created_by":"tayloreernisse","updated_at":"2026-02-08T04:10:29.593561Z","closed_at":"2026-02-08T04:10:29.593519Z","close_reason":"Implemented by agent team: migration 017, CLI skeleton, all 5 query modes, human+robot output, 20 tests. All quality gates pass.","compaction_level":0,"original_size":0} @@ -266,7 +266,7 @@ {"id":"bd-91j1","title":"Comprehensive robot-docs as agent bootstrap","description":"## Background\nAgents reach for glab because they already know it from training data. lore robot-docs exists but is not comprehensive enough to serve as a zero-training bootstrap. An agent encountering lore for the first time should be able to use any command correctly after reading robot-docs output alone.\n\n## Current State (Verified 2026-02-12)\n- `handle_robot_docs()` at src/main.rs:2069\n- Called at no-args in robot mode (main.rs:165) and via Commands::RobotDocs { brief } (main.rs:229)\n- Current output top-level keys: name, version, description, activation, commands, aliases, exit_codes, clap_error_codes, error_format, workflows\n- Missing: response_schema per command, example_output per command, quick_start section, glab equivalence table\n- --brief flag exists but returns shorter version of same structure\n- main.rs is 2579 lines total\n\n## Current robot-docs Output Structure\n```json\n{\n \"name\": \"lore\",\n \"version\": \"0.6.1\",\n \"description\": \"...\",\n \"activation\": { \"flags\": [\"--robot\", \"-J\"], \"env\": \"LORE_ROBOT=1\", \"auto_detect\": \"non-TTY\" },\n \"commands\": [{ \"name\": \"...\", \"description\": \"...\", \"flags\": [...], \"example\": \"...\" }],\n \"aliases\": { ... },\n \"exit_codes\": { ... },\n \"clap_error_codes\": { ... },\n \"error_format\": { ... },\n \"workflows\": { ... }\n}\n```\n\n## Approach\n\n### 1. Add quick_start section\nTop-level key with glab-to-lore translation and lore-exclusive feature summary:\n```json\n\"quick_start\": {\n \"glab_equivalents\": [\n { \"glab\": \"glab issue list\", \"lore\": \"lore -J issues -n 50\", \"note\": \"Richer: includes labels, status, closing MRs\" },\n { \"glab\": \"glab issue view 123\", \"lore\": \"lore -J issues 123\", \"note\": \"Includes discussions, work-item status\" },\n { \"glab\": \"glab mr list\", \"lore\": \"lore -J mrs\", \"note\": \"Includes draft status, reviewers\" },\n { \"glab\": \"glab mr view 456\", \"lore\": \"lore -J mrs 456\", \"note\": \"Includes discussions, file changes\" },\n { \"glab\": \"glab api '/projects/:id/issues'\", \"lore\": \"lore -J issues -p project\", \"note\": \"Fuzzy project matching\" }\n ],\n \"lore_exclusive\": [\n \"search: FTS5 + vector hybrid search across all entities\",\n \"who: Expert/workload/reviews analysis per file path or person\",\n \"timeline: Chronological event reconstruction across entities\",\n \"stats: Database statistics with document/note/discussion counts\",\n \"count: Entity counts with state breakdowns\"\n ]\n}\n```\n\n### 2. Add response_schema per command\nFor each command in the commands array, add a `response_schema` field showing the JSON shape:\n```json\n{\n \"name\": \"issues\",\n \"response_schema\": {\n \"ok\": \"boolean\",\n \"data\": { \"type\": \"array|object\", \"fields\": [\"iid\", \"title\", \"state\", \"...\"] },\n \"meta\": { \"elapsed_ms\": \"integer\" }\n }\n}\n```\nCommands with multiple output shapes (list vs detail) need both documented.\n\n### 3. Add example_output per command\nRealistic truncated JSON for each command. Keep each example under 500 bytes.\n\n### 4. Token budget enforcement\n- --brief mode: ONLY quick_start + command names + invocation syntax. Target <4000 tokens (~16000 bytes).\n- Full mode: everything. Target <12000 tokens (~48000 bytes).\n- Measure with: `cargo run --release -- --robot robot-docs --brief | wc -c`\n\n## TDD Loop\nRED: Tests in src/main.rs or new src/cli/commands/robot_docs.rs:\n- test_robot_docs_has_quick_start: parse output JSON, assert quick_start.glab_equivalents array has >= 5 entries\n- test_robot_docs_brief_size: --brief output < 16000 bytes\n- test_robot_docs_full_size: full output < 48000 bytes\n- test_robot_docs_has_response_schemas: every command entry has response_schema key\n- test_robot_docs_commands_complete: assert all registered commands appear (issues, mrs, search, who, timeline, count, stats, sync, embed, doctor, health, ingest, generate-docs, show)\n\nGREEN: Add quick_start, response_schema, example_output to robot-docs output\n\nVERIFY:\n```bash\ncargo test robot_docs && cargo clippy --all-targets -- -D warnings\ncargo run --release -- --robot robot-docs | jq '.quick_start.glab_equivalents | length'\n# Should return >= 5\ncargo run --release -- --robot robot-docs --brief | wc -c\n# Should be < 16000\n```\n\n## Acceptance Criteria\n- [ ] robot-docs JSON has quick_start.glab_equivalents array with >= 5 entries\n- [ ] robot-docs JSON has quick_start.lore_exclusive array\n- [ ] Every command entry has response_schema showing the JSON shape\n- [ ] Every command entry has example_output with realistic truncated data\n- [ ] --brief output is under 16000 bytes (~4000 tokens)\n- [ ] Full output is under 48000 bytes (~12000 tokens)\n- [ ] An agent reading ONLY robot-docs can correctly invoke any lore command\n- [ ] cargo test passes with new robot_docs tests\n\n## Edge Cases\n- Commands with multiple output shapes (e.g., issues list vs issues detail via iid) need both schemas documented\n- --fields flag changes output shape -- document the effect in the response_schema\n- robot-docs output must be stable across versions (agents may cache it)\n- Version field should match Cargo.toml version\n\n## Files to Modify\n- src/main.rs fn handle_robot_docs() (~line 2069) — add quick_start section, response_schema, example_output\n- Consider extracting to src/cli/commands/robot_docs.rs if the function exceeds 200 lines","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-12T15:44:40.495479Z","created_by":"tayloreernisse","updated_at":"2026-02-12T16:49:01.043915Z","closed_at":"2026-02-12T16:49:01.043832Z","close_reason":"Robot-docs enhanced with quick_start (glab equivalents, lore exclusives, read/write split) and example_output for issues/mrs/search/who","compaction_level":0,"original_size":0,"labels":["cli","cli-imp","robot-mode"],"dependencies":[{"issue_id":"bd-91j1","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} {"id":"bd-9av","title":"[CP1] gi sync-status enhancement","description":"Enhance sync-status from CP0 stub to show issue cursors.\n\n## Changes to src/cli/commands/sync_status.rs\n\nUpdate the existing stub to show:\n- Last run timestamp and duration\n- Cursor positions per project (issues resource_type)\n- Entity counts (issues, discussions, notes)\n\n## Output Format\nLast sync: 2026-01-25 10:30:00 (succeeded, 45s)\n\nCursors:\n group/project-one\n issues: 2026-01-25T10:25:00Z (gitlab_id: 12345678)\n\nCounts:\n Issues: 1,234\n Discussions: 5,678\n Notes: 23,456 (4,567 system)\n\nFiles: src/cli/commands/sync_status.rs\nDone when: Shows cursor positions and counts after ingestion","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:58:27.246825Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.968507Z","closed_at":"2026-01-25T17:02:01.968507Z","deleted_at":"2026-01-25T17:02:01.968503Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-9dd","title":"Implement 'lore trace' command with human and robot output","description":"## Background\n\nThe trace command is Gate 5's capstone CLI. It answers 'Why was this code introduced?' by building file -> MR -> issue -> discussion chains.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 5.3.\n\n## Codebase Context\n\n- CLI pattern: same as file-history (Commands enum, handler in main.rs)\n- trace.rs (bd-2n4): run_trace() returns TraceResult with chains\n- Path parsing: support 'src/foo.rs:45' syntax (line number for future Tier 2)\n- merge_requests.merged_at exists (migration 006) — use COALESCE(merged_at, updated_at) for ordering\n\n## Approach\n\n### 1. TraceArgs (`src/cli/mod.rs`):\n```rust\n#[derive(Parser)]\npub struct TraceArgs {\n pub path: String, // supports :line suffix\n #[arg(short = 'p', long)] pub project: Option,\n #[arg(long)] pub discussions: bool,\n #[arg(long = \"no-follow-renames\")] pub no_follow_renames: bool,\n #[arg(short = 'n', long = \"limit\", default_value = \"20\")] pub limit: usize,\n}\n```\n\n### 2. Path parsing:\n```rust\nfn parse_trace_path(input: &str) -> (String, Option) {\n if let Some((path, line)) = input.rsplit_once(':') {\n if let Ok(n) = line.parse::() { return (path.to_string(), Some(n)); }\n }\n (input.to_string(), None)\n}\n```\nIf line present: warn 'Line-level tracing requires Tier 2. Showing file-level results.'\n\n### 3. Human output shows chains with MR -> issue -> discussion context\n\n### 4. Robot JSON:\n```json\n{\"ok\": true, \"data\": {\"path\": \"...\", \"resolved_paths\": [...], \"trace_chains\": [...]}, \"meta\": {\"tier\": \"api_only\", \"line_requested\": null}}\n```\n\n## Acceptance Criteria\n\n- [ ] `lore trace src/foo.rs` with human output\n- [ ] `lore --robot trace src/foo.rs` with JSON\n- [ ] :line suffix parses and emits Tier 2 warning\n- [ ] -p, --discussions, --no-follow-renames, -n all work\n- [ ] Rename-aware via resolve_rename_chain\n- [ ] meta.tier = 'api_only'\n- [ ] Added to VALID_COMMANDS and robot-docs\n- [ ] `cargo check --all-targets` passes\n\n## Files\n\n- `src/cli/mod.rs` (TraceArgs + Commands::Trace)\n- `src/cli/commands/trace.rs` (NEW)\n- `src/cli/commands/mod.rs` (re-export)\n- `src/main.rs` (handler + VALID_COMMANDS + robot-docs)\n\n## TDD Loop\n\nRED:\n- `test_parse_trace_path_simple` - \"src/foo.rs\" -> (path, None)\n- `test_parse_trace_path_with_line` - \"src/foo.rs:42\" -> (path, Some(42))\n- `test_parse_trace_path_windows` - \"C:/foo.rs\" -> (path, None) — don't misparse drive letter\n\nGREEN: Implement CLI wiring and handlers.\n\nVERIFY: `cargo check --all-targets`\n\n## Edge Cases\n\n- Windows paths: don't misparse C: as line number\n- No MR data: friendly message with suggestion to sync\n- Very deep rename chain: bounded by resolve_rename_chain","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:32.788530Z","created_by":"tayloreernisse","updated_at":"2026-02-19T13:46:36.973805Z","closed_at":"2026-02-19T13:46:36.973541Z","close_reason":"Implementation complete: trace CLI command with human + robot output, all tests passing","compaction_level":0,"original_size":0,"labels":["cli","gate-5","phase-b"],"dependencies":[{"issue_id":"bd-9dd","depends_on_id":"bd-1ht","type":"parent-child","created_at":"2026-02-18T17:42:00Z","created_by":"import"},{"issue_id":"bd-9dd","depends_on_id":"bd-2n4","type":"blocks","created_at":"2026-02-18T17:42:00Z","created_by":"import"}]} -{"id":"bd-9lbr","title":"lore explain: auto-generate issue/MR narrative","description":"## Background\nGiven an issue or MR, auto-generate a structured narrative of what happened: who was involved, what decisions were made, what changed, and what is unresolved. Template-based v1 (no LLM dependency), deterministic and reproducible.\n\n## Current Infrastructure (Verified 2026-02-12)\n- show.rs: IssueDetail (line 69) and MrDetail (line 14) — entity detail with discussions\n- timeline.rs: 5-stage pipeline SHIPPED — chronological event reconstruction\n- notes table: 282K rows with body, author, created_at, is_system, discussion_id\n- discussions table: links notes to parent entity (noteable_type, noteable_id), has resolved flag\n- resource_state_events table: state changes with created_at, user_username (src/core/events_db.rs)\n- resource_label_events table: label add/remove with created_at, user_username\n- entity_references table (src/core/references.rs): cross-references between entities (closing MRs, related issues). Column names: `source_entity_type`, `source_entity_id`, `target_entity_type`, `target_entity_id`, `target_project_path`, `target_entity_iid`, `reference_type`, `source_method`\n\n## Approach\nNew command: `lore explain issues N` / `lore explain mrs N`\n\n### Data Assembly (reuse existing internals as library calls)\n1. Entity detail: reuse show.rs query logic for IssueDetail/MrDetail\n2. Timeline events: reuse timeline pipeline with entity-scoped seed\n3. Discussion notes:\n```sql\nSELECT n.id, n.body, n.author_username, n.created_at\nFROM notes n\nJOIN discussions d ON n.discussion_id = d.id\nWHERE d.noteable_type = ? AND d.noteable_id = ?\n AND n.is_system = 0\nORDER BY n.created_at\n```\n4. Cross-references:\n```sql\nSELECT target_entity_type, target_entity_id, target_project_path,\n target_entity_iid, reference_type, source_method\nFROM entity_references\nWHERE (source_entity_type = ?1 AND source_entity_id = ?2)\nUNION ALL\nSELECT source_entity_type, source_entity_id, NULL,\n NULL, reference_type, source_method\nFROM entity_references\nWHERE (target_entity_type = ?1 AND target_entity_id = ?2)\n```\n\n### Key Decisions Heuristic\nNotes from assignees/author that follow state or label changes within 1 hour:\n```rust\nstruct StateOrLabelEvent {\n created_at: i64, // ms epoch\n user: String,\n description: String, // e.g. \"state: opened -> closed\" or \"label: +bug\"\n}\n\nfn extract_key_decisions(\n state_events: &[ResourceStateEvent],\n label_events: &[ResourceLabelEvent],\n notes: &[Note],\n) -> Vec {\n // Merge both event types into a unified chronological list\n let mut events: Vec = Vec::new();\n for e in state_events {\n events.push(StateOrLabelEvent {\n created_at: e.created_at,\n user: e.user_username.clone(),\n description: format!(\"state: {} -> {}\", e.from_state.as_deref().unwrap_or(\"?\"), e.to_state),\n });\n }\n for e in label_events {\n let action = if e.action == \"add\" { \"+\" } else { \"-\" };\n events.push(StateOrLabelEvent {\n created_at: e.created_at,\n user: e.user_username.clone(),\n description: format!(\"label: {}{}\", action, e.label_name.as_deref().unwrap_or(\"?\")),\n });\n }\n events.sort_by_key(|e| e.created_at);\n\n let mut decisions = Vec::new();\n let one_hour_ms: i64 = 60 * 60 * 1000;\n\n for event in &events {\n // Find notes by same actor within 60 min after the event\n for note in notes {\n if note.author_username == event.user\n && note.created_at >= event.created_at\n && note.created_at <= event.created_at + one_hour_ms\n {\n decisions.push(KeyDecision {\n timestamp: event.created_at,\n actor: event.user.clone(),\n action: event.description.clone(),\n context_note: truncate(¬e.body, 500),\n });\n break; // one note per event\n }\n }\n }\n decisions.truncate(10); // Cap at 10 key decisions\n decisions\n}\n```\n\n### Narrative Sections\n1. **Header**: title, author, opened date, state, assignees, labels, status_name\n2. **Description excerpt**: first 500 chars of description (or full if shorter)\n3. **Key decisions**: notes correlated with state/label changes (heuristic above)\n4. **Activity summary**: counts of state changes, label changes, notes, time range\n5. **Open threads**: discussions WHERE resolved = false\n6. **Related entities**: closing MRs (with state), related issues from entity_references\n7. **Timeline excerpt**: first 20 events from timeline pipeline\n\n## Robot Mode Output Schema\n```json\n{\n \"ok\": true,\n \"data\": {\n \"entity\": {\n \"type\": \"issue\", \"iid\": 3864, \"title\": \"...\", \"state\": \"opened\",\n \"author\": \"teernisse\", \"assignees\": [\"teernisse\"],\n \"labels\": [\"customer:BNSF\"], \"created_at\": \"...\", \"updated_at\": \"...\",\n \"url\": \"...\", \"status_name\": \"In progress\"\n },\n \"description_excerpt\": \"First 500 chars of description...\",\n \"key_decisions\": [{\n \"timestamp\": \"2026-01-15T...\",\n \"actor\": \"teernisse\",\n \"action\": \"state: opened -> in_progress\",\n \"context_note\": \"Starting work on the BNSF throw time integration...\"\n }],\n \"activity\": {\n \"state_changes\": 3, \"label_changes\": 5, \"notes\": 42,\n \"first_event\": \"2026-01-10T...\", \"last_event\": \"2026-02-12T...\"\n },\n \"open_threads\": [{\n \"discussion_id\": \"abc123\",\n \"started_by\": \"cseiber\",\n \"started_at\": \"2026-02-01T...\",\n \"note_count\": 5,\n \"last_note_at\": \"2026-02-10T...\"\n }],\n \"related\": {\n \"closing_mrs\": [{ \"iid\": 200, \"title\": \"...\", \"state\": \"merged\" }],\n \"related_issues\": [{ \"iid\": 3800, \"title\": \"Rail Break Card\", \"relation\": \"related\" }]\n },\n \"timeline_excerpt\": [{ \"timestamp\": \"...\", \"event_type\": \"...\", \"actor\": \"...\", \"summary\": \"...\" }]\n },\n \"meta\": { \"elapsed_ms\": 350 }\n}\n```\n\n## Clap Registration\n```rust\n// In src/main.rs Commands enum, add:\nExplain {\n /// Entity type: \"issues\" or \"mrs\"\n entity_type: String,\n /// Entity IID\n iid: i64,\n /// Scope to project (fuzzy match)\n #[arg(short, long)]\n project: Option,\n},\n```\n\n## TDD Loop\nRED: Tests in src/cli/commands/explain.rs:\n- test_explain_issue_basic: insert issue + notes + state events, run explain, assert all sections present (entity, description_excerpt, key_decisions, activity, open_threads, related, timeline_excerpt)\n- test_explain_key_decision_heuristic: insert state change event + note by same author within 30 min, assert note appears in key_decisions\n- test_explain_key_decision_ignores_unrelated_notes: insert note by different author, assert it does NOT appear in key_decisions\n- test_explain_open_threads: insert 2 discussions (1 resolved, 1 unresolved), assert only unresolved in open_threads\n- test_explain_no_notes: issue with zero notes produces header + description + empty sections\n- test_explain_mr: insert MR with merged_at, assert entity includes type=\"merge_request\"\n- test_explain_activity_counts: insert 3 state events + 2 label events + 10 notes, assert counts match\n\nGREEN: Implement explain command with section assembly\n\nVERIFY:\n```bash\ncargo test explain:: && cargo clippy --all-targets -- -D warnings\ncargo run --release -- -J explain issues 3864 | jq '.data | keys'\n# Should include: entity, description_excerpt, key_decisions, activity, open_threads, related, timeline_excerpt\n```\n\n## Acceptance Criteria\n- [ ] lore explain issues N produces structured output for any synced issue\n- [ ] lore explain mrs N produces structured output for any synced MR\n- [ ] Robot mode returns all 7 sections\n- [ ] Human mode renders readable narrative with headers and indentation\n- [ ] Key decisions heuristic: captures notes within 60 min of state/label changes by same actor\n- [ ] Works fully offline (no API calls, no LLM)\n- [ ] Performance: <500ms for issue with 50 notes\n- [ ] Command registered in main.rs and robot-docs\n- [ ] key_decisions capped at 10, timeline_excerpt capped at 20 events\n\n## Edge Cases\n- Issue with empty description: description_excerpt = \"(no description)\"\n- Issue with 500+ notes: timeline_excerpt capped at 20, key_decisions capped at 10\n- Issue not found in local DB: exit code 17 with suggestion to sync\n- Ambiguous project: exit code 18 with suggestion to use -p flag\n- MR with no review activity: activity section shows zeros\n- Cross-project references: show as unresolved with project path hint\n- Notes that are pure code blocks: include in key_decisions if correlated with events (they may contain implementation decisions)\n- ResourceStateEvent/ResourceLabelEvent field names: check src/core/events_db.rs for exact struct definitions before implementing\n\n## Dependency Context\n- **bd-2g50 (data gaps)**: BLOCKER. Provides `closed_at` field on IssueDetail for the header section. Without it, explain can still show state=\"closed\" but won't have the exact close timestamp.\n\n## Files to Create/Modify\n- NEW: src/cli/commands/explain.rs\n- src/cli/commands/mod.rs (add pub mod explain; re-export)\n- src/main.rs (register Explain subcommand in Commands enum, add handle_explain fn)\n- Reuse: show.rs queries, timeline pipeline, notes/discussions/resource_events queries from src/core/events_db.rs","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-12T15:46:41.386454Z","created_by":"tayloreernisse","updated_at":"2026-02-12T16:31:34.538422Z","compaction_level":0,"original_size":0,"labels":["cli-imp","intelligence"],"dependencies":[{"issue_id":"bd-9lbr","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-9lbr","depends_on_id":"bd-2g50","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} +{"id":"bd-9lbr","title":"lore explain: auto-generate issue/MR narrative","description":"## Background\nGiven an issue or MR, auto-generate a structured narrative of what happened: who was involved, what decisions were made, what changed, and what is unresolved. Template-based v1 (no LLM dependency), deterministic and reproducible.\n\n## Current Infrastructure (Verified 2026-02-12)\n- show.rs: IssueDetail (line 69) and MrDetail (line 14) — entity detail with discussions\n- timeline.rs: 5-stage pipeline SHIPPED — chronological event reconstruction\n- notes table: 282K rows with body, author, created_at, is_system, discussion_id\n- discussions table: links notes to parent entity (noteable_type, noteable_id), has resolved flag\n- resource_state_events table: state changes with created_at, user_username (src/core/events_db.rs)\n- resource_label_events table: label add/remove with created_at, user_username\n- entity_references table (src/core/references.rs): cross-references between entities (closing MRs, related issues). Column names: `source_entity_type`, `source_entity_id`, `target_entity_type`, `target_entity_id`, `target_project_path`, `target_entity_iid`, `reference_type`, `source_method`\n\n## Approach\nNew command: `lore explain issues N` / `lore explain mrs N`\n\n### Data Assembly (reuse existing internals as library calls)\n1. Entity detail: reuse show.rs query logic for IssueDetail/MrDetail\n2. Timeline events: reuse timeline pipeline with entity-scoped seed\n3. Discussion notes:\n```sql\nSELECT n.id, n.body, n.author_username, n.created_at\nFROM notes n\nJOIN discussions d ON n.discussion_id = d.id\nWHERE d.noteable_type = ? AND d.noteable_id = ?\n AND n.is_system = 0\nORDER BY n.created_at\n```\n4. Cross-references:\n```sql\nSELECT target_entity_type, target_entity_id, target_project_path,\n target_entity_iid, reference_type, source_method\nFROM entity_references\nWHERE (source_entity_type = ?1 AND source_entity_id = ?2)\nUNION ALL\nSELECT source_entity_type, source_entity_id, NULL,\n NULL, reference_type, source_method\nFROM entity_references\nWHERE (target_entity_type = ?1 AND target_entity_id = ?2)\n```\n\n### Key Decisions Heuristic\nNotes from assignees/author that follow state or label changes within 1 hour:\n```rust\nstruct StateOrLabelEvent {\n created_at: i64, // ms epoch\n user: String,\n description: String, // e.g. \"state: opened -> closed\" or \"label: +bug\"\n}\n\nfn extract_key_decisions(\n state_events: &[ResourceStateEvent],\n label_events: &[ResourceLabelEvent],\n notes: &[Note],\n) -> Vec {\n // Merge both event types into a unified chronological list\n let mut events: Vec = Vec::new();\n for e in state_events {\n events.push(StateOrLabelEvent {\n created_at: e.created_at,\n user: e.user_username.clone(),\n description: format!(\"state: {} -> {}\", e.from_state.as_deref().unwrap_or(\"?\"), e.to_state),\n });\n }\n for e in label_events {\n let action = if e.action == \"add\" { \"+\" } else { \"-\" };\n events.push(StateOrLabelEvent {\n created_at: e.created_at,\n user: e.user_username.clone(),\n description: format!(\"label: {}{}\", action, e.label_name.as_deref().unwrap_or(\"?\")),\n });\n }\n events.sort_by_key(|e| e.created_at);\n\n let mut decisions = Vec::new();\n let one_hour_ms: i64 = 60 * 60 * 1000;\n\n for event in &events {\n // Find notes by same actor within 60 min after the event\n for note in notes {\n if note.author_username == event.user\n && note.created_at >= event.created_at\n && note.created_at <= event.created_at + one_hour_ms\n {\n decisions.push(KeyDecision {\n timestamp: event.created_at,\n actor: event.user.clone(),\n action: event.description.clone(),\n context_note: truncate(¬e.body, 500),\n });\n break; // one note per event\n }\n }\n }\n decisions.truncate(10); // Cap at 10 key decisions\n decisions\n}\n```\n\n### Narrative Sections\n1. **Header**: title, author, opened date, state, assignees, labels, status_name\n2. **Description excerpt**: first 500 chars of description (or full if shorter)\n3. **Key decisions**: notes correlated with state/label changes (heuristic above)\n4. **Activity summary**: counts of state changes, label changes, notes, time range\n5. **Open threads**: discussions WHERE resolved = false\n6. **Related entities**: closing MRs (with state), related issues from entity_references\n7. **Timeline excerpt**: first 20 events from timeline pipeline\n\n## Robot Mode Output Schema\n```json\n{\n \"ok\": true,\n \"data\": {\n \"entity\": {\n \"type\": \"issue\", \"iid\": 3864, \"title\": \"...\", \"state\": \"opened\",\n \"author\": \"teernisse\", \"assignees\": [\"teernisse\"],\n \"labels\": [\"customer:BNSF\"], \"created_at\": \"...\", \"updated_at\": \"...\",\n \"url\": \"...\", \"status_name\": \"In progress\"\n },\n \"description_excerpt\": \"First 500 chars of description...\",\n \"key_decisions\": [{\n \"timestamp\": \"2026-01-15T...\",\n \"actor\": \"teernisse\",\n \"action\": \"state: opened -> in_progress\",\n \"context_note\": \"Starting work on the BNSF throw time integration...\"\n }],\n \"activity\": {\n \"state_changes\": 3, \"label_changes\": 5, \"notes\": 42,\n \"first_event\": \"2026-01-10T...\", \"last_event\": \"2026-02-12T...\"\n },\n \"open_threads\": [{\n \"discussion_id\": \"abc123\",\n \"started_by\": \"cseiber\",\n \"started_at\": \"2026-02-01T...\",\n \"note_count\": 5,\n \"last_note_at\": \"2026-02-10T...\"\n }],\n \"related\": {\n \"closing_mrs\": [{ \"iid\": 200, \"title\": \"...\", \"state\": \"merged\" }],\n \"related_issues\": [{ \"iid\": 3800, \"title\": \"Rail Break Card\", \"relation\": \"related\" }]\n },\n \"timeline_excerpt\": [{ \"timestamp\": \"...\", \"event_type\": \"...\", \"actor\": \"...\", \"summary\": \"...\" }]\n },\n \"meta\": { \"elapsed_ms\": 350 }\n}\n```\n\n## Clap Registration\n```rust\n// In src/main.rs Commands enum, add:\nExplain {\n /// Entity type: \"issues\" or \"mrs\"\n entity_type: String,\n /// Entity IID\n iid: i64,\n /// Scope to project (fuzzy match)\n #[arg(short, long)]\n project: Option,\n},\n```\n\n## TDD Loop\nRED: Tests in src/cli/commands/explain.rs:\n- test_explain_issue_basic: insert issue + notes + state events, run explain, assert all sections present (entity, description_excerpt, key_decisions, activity, open_threads, related, timeline_excerpt)\n- test_explain_key_decision_heuristic: insert state change event + note by same author within 30 min, assert note appears in key_decisions\n- test_explain_key_decision_ignores_unrelated_notes: insert note by different author, assert it does NOT appear in key_decisions\n- test_explain_open_threads: insert 2 discussions (1 resolved, 1 unresolved), assert only unresolved in open_threads\n- test_explain_no_notes: issue with zero notes produces header + description + empty sections\n- test_explain_mr: insert MR with merged_at, assert entity includes type=\"merge_request\"\n- test_explain_activity_counts: insert 3 state events + 2 label events + 10 notes, assert counts match\n\nGREEN: Implement explain command with section assembly\n\nVERIFY:\n```bash\ncargo test explain:: && cargo clippy --all-targets -- -D warnings\ncargo run --release -- -J explain issues 3864 | jq '.data | keys'\n# Should include: entity, description_excerpt, key_decisions, activity, open_threads, related, timeline_excerpt\n```\n\n## Acceptance Criteria\n- [ ] lore explain issues N produces structured output for any synced issue\n- [ ] lore explain mrs N produces structured output for any synced MR\n- [ ] Robot mode returns all 7 sections\n- [ ] Human mode renders readable narrative with headers and indentation\n- [ ] Key decisions heuristic: captures notes within 60 min of state/label changes by same actor\n- [ ] Works fully offline (no API calls, no LLM)\n- [ ] Performance: <500ms for issue with 50 notes\n- [ ] Command registered in main.rs and robot-docs\n- [ ] key_decisions capped at 10, timeline_excerpt capped at 20 events\n\n## Edge Cases\n- Issue with empty description: description_excerpt = \"(no description)\"\n- Issue with 500+ notes: timeline_excerpt capped at 20, key_decisions capped at 10\n- Issue not found in local DB: exit code 17 with suggestion to sync\n- Ambiguous project: exit code 18 with suggestion to use -p flag\n- MR with no review activity: activity section shows zeros\n- Cross-project references: show as unresolved with project path hint\n- Notes that are pure code blocks: include in key_decisions if correlated with events (they may contain implementation decisions)\n- ResourceStateEvent/ResourceLabelEvent field names: check src/core/events_db.rs for exact struct definitions before implementing\n\n## Dependency Context\n- **bd-2g50 (data gaps)**: BLOCKER. Provides `closed_at` field on IssueDetail for the header section. Without it, explain can still show state=\"closed\" but won't have the exact close timestamp.\n\n## Files to Create/Modify\n- NEW: src/cli/commands/explain.rs\n- src/cli/commands/mod.rs (add pub mod explain; re-export)\n- src/main.rs (register Explain subcommand in Commands enum, add handle_explain fn)\n- Reuse: show.rs queries, timeline pipeline, notes/discussions/resource_events queries from src/core/events_db.rs","status":"in_progress","priority":2,"issue_type":"feature","created_at":"2026-02-12T15:46:41.386454Z","created_by":"tayloreernisse","updated_at":"2026-02-19T14:22:13.501482Z","compaction_level":0,"original_size":0,"labels":["cli-imp","intelligence"],"dependencies":[{"issue_id":"bd-9lbr","depends_on_id":"bd-13lp","type":"parent-child","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-9lbr","depends_on_id":"bd-2g50","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} {"id":"bd-9wl5","title":"NOTE-2G: Parent metadata change propagation to note documents","description":"## Background\nNote documents inherit labels and title from parent issue/MR. When parent metadata changes, note documents become stale. The existing pipeline already marks discussion documents dirty on parent changes — note documents need the same treatment.\n\n## Approach\nFind where ingestion detects parent entity changes and marks discussion documents dirty. The dirty marking for discussions happens in:\n- src/ingestion/discussions.rs line 127: mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)\n- src/ingestion/mr_discussions.rs line 162 and 362: mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)\n\nThese fire when a discussion is upserted (which happens when parent entity is re-ingested). For note documents, we need to additionally mark all non-system notes of that discussion as dirty:\n\nAfter each mark_dirty_tx for Discussion, add:\n // Mark child note documents dirty (they inherit parent metadata)\n let note_ids: Vec = tx.prepare(\"SELECT id FROM notes WHERE discussion_id = ? AND is_system = 0\")?\n .query_map([local_discussion_id], |r| r.get(0))?\n .collect::, _>>()?;\n for note_id in note_ids {\n dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, note_id)?;\n }\n\nAlternative (more efficient, set-based):\n INSERT INTO dirty_sources (source_type, source_id, queued_at)\n SELECT 'note', n.id, ?1\n FROM notes n\n WHERE n.discussion_id = ?2 AND n.is_system = 0\n ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0\n\nUse the set-based approach for better performance with large discussions.\n\n## Files\n- MODIFY: src/ingestion/discussions.rs (add note dirty marking after line 127)\n- MODIFY: src/ingestion/mr_discussions.rs (add note dirty marking after lines 162 and 362)\n\n## TDD Anchor\nRED: test_parent_title_change_marks_notes_dirty — change issue title, re-ingest discussions, assert note documents appear in dirty_sources.\nGREEN: Add set-based INSERT INTO dirty_sources after discussion dirty marking.\nVERIFY: cargo test parent_title_change_marks_notes -- --nocapture\nTests: test_parent_label_change_marks_notes_dirty (modify issue labels, re-ingest, check dirty queue)\n\n## Acceptance Criteria\n- [ ] Discussion upsert for issue marks child non-system note documents dirty\n- [ ] Discussion upsert for MR marks child non-system note documents dirty (both call sites)\n- [ ] Only non-system notes marked dirty (is_system = 0 filter)\n- [ ] Set-based SQL (not per-note loop) for performance\n- [ ] Both tests pass\n\n## Dependency Context\n- Depends on NOTE-2D (bd-2ezb): dirty tracking infrastructure for notes must exist (dirty_sources accepts source_type='note', regenerator handles it)\n\n## Edge Cases\n- Discussion with 0 non-system notes: set-based INSERT is a no-op\n- Discussion with 100+ notes: set-based approach handles efficiently in one SQL statement\n- Concurrent discussion ingestion: ON CONFLICT DO UPDATE handles race safely","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-12T17:02:40.292874Z","created_by":"tayloreernisse","updated_at":"2026-02-12T18:13:15.717576Z","closed_at":"2026-02-12T18:13:15.717528Z","close_reason":"Implemented by agent swarm","compaction_level":0,"original_size":0,"labels":["per-note","search"]} {"id":"bd-a6yb","title":"Implement responsive breakpoints for all TUI screens","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-19T04:52:55.561576Z","created_by":"tayloreernisse","updated_at":"2026-02-19T05:10:12.531731Z","closed_at":"2026-02-19T05:10:12.531557Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-a6yb","depends_on_id":"bd-3t6r","type":"blocks","created_at":"2026-02-19T04:53:02.566163Z","created_by":"tayloreernisse"}]} {"id":"bd-am7","title":"Implement embedding pipeline with chunking","description":"## Background\nThe embedding pipeline takes documents, chunks them (paragraph-boundary splitting with overlap), sends chunks to Ollama for embedding via async HTTP, and stores vectors in sqlite-vec + metadata. It uses keyset pagination, concurrent HTTP requests via FuturesUnordered, per-batch transactions, and dimension validation.\n\n## Approach\nCreate \\`src/embedding/pipeline.rs\\` per PRD Section 4.4. **The pipeline is async.**\n\n**Constants (per PRD):**\n```rust\nconst BATCH_SIZE: usize = 32; // texts per Ollama API call\nconst DB_PAGE_SIZE: usize = 500; // keyset pagination page size\nconst EXPECTED_DIMS: usize = 768; // nomic-embed-text dimensions\nconst CHUNK_MAX_CHARS: usize = 32_000; // max chars per chunk\nconst CHUNK_OVERLAP_CHARS: usize = 500; // overlap between chunks\n```\n\n**Core async function:**\n```rust\npub async fn embed_documents(\n conn: &Connection,\n client: &OllamaClient,\n selection: EmbedSelection,\n concurrency: usize, // max in-flight HTTP requests\n progress_callback: Option>,\n) -> Result\n```\n\n**EmbedSelection:** Pending | RetryFailed\n**EmbedResult:** { embedded, failed, skipped }\n\n**Algorithm (per PRD):**\n1. count_pending_documents(conn, selection) for progress total\n2. Keyset pagination loop: find_pending_documents(conn, DB_PAGE_SIZE, last_id, selection)\n3. For each page:\n a. Begin transaction\n b. For each doc: clear_document_embeddings(&tx, doc.id), split_into_chunks(&doc.content)\n c. Build ChunkWork items with doc_hash + chunk_hash\n d. Commit clearing transaction\n4. Batch ChunkWork texts into Ollama calls (BATCH_SIZE=32)\n5. Use **FuturesUnordered** for concurrent HTTP, cap at \\`concurrency\\`\n6. collect_writes() in per-batch transactions: validate dims (768), store LE bytes, write metadata\n7. On error: record_embedding_error per chunk (not abort)\n8. Advance keyset cursor\n\n**ChunkWork struct:**\n```rust\nstruct ChunkWork {\n doc_id: i64,\n chunk_index: usize,\n doc_hash: String, // SHA-256 of FULL document (staleness detection)\n chunk_hash: String, // SHA-256 of THIS chunk (provenance)\n text: String,\n}\n```\n\n**Splitting:** split_into_chunks(content) -> Vec<(usize, String)>\n- Documents <= CHUNK_MAX_CHARS: single chunk (index 0)\n- Longer: split at paragraph boundaries (\\\\n\\\\n), fallback to sentence/word, with CHUNK_OVERLAP_CHARS overlap\n\n**Storage:** embeddings as raw LE bytes, rowid = encode_rowid(doc_id, chunk_idx)\n**Staleness detection:** uses document_hash (not chunk_hash) because it's document-level\n\nAlso create \\`src/embedding/change_detector.rs\\` (referenced in PRD module structure):\n```rust\npub fn detect_embedding_changes(conn: &Connection) -> Result>;\n```\n\n## Acceptance Criteria\n- [ ] Pipeline is async (uses FuturesUnordered for concurrent HTTP)\n- [ ] concurrency parameter caps in-flight HTTP requests\n- [ ] progress_callback reports (processed, total)\n- [ ] New documents embedded, changed re-embedded, unchanged skipped\n- [ ] clear_document_embeddings before re-embedding (range delete vec0 + metadata)\n- [ ] Chunking at paragraph boundaries with 500-char overlap\n- [ ] Short documents (<32k chars) produce exactly 1 chunk\n- [ ] Embeddings stored as raw LE bytes in vec0\n- [ ] Rowids encoded via encode_rowid(doc_id, chunk_index)\n- [ ] Dimension validation: 768 floats per embedding (mismatch -> record error, not store)\n- [ ] Per-batch transactions for writes\n- [ ] Errors recorded in embedding_metadata per chunk (last_error, attempt_count)\n- [ ] Keyset pagination (d.id > last_id, not OFFSET)\n- [ ] Pending detection uses document_hash (not chunk_hash)\n- [ ] \\`cargo build\\` succeeds\n\n## Files\n- \\`src/embedding/pipeline.rs\\` — new file (async)\n- \\`src/embedding/change_detector.rs\\` — new file\n- \\`src/embedding/mod.rs\\` — add \\`pub mod pipeline; pub mod change_detector;\\` + re-exports\n\n## TDD Loop\nRED: Unit tests for chunking:\n- \\`test_short_document_single_chunk\\` — <32k produces [(0, full_content)]\n- \\`test_long_document_multiple_chunks\\` — >32k splits at paragraph boundaries\n- \\`test_chunk_overlap\\` — adjacent chunks share 500-char overlap\n- \\`test_no_paragraph_boundary\\` — falls back to char boundary\nIntegration tests need Ollama or mock.\nGREEN: Implement split_into_chunks, embed_documents (async)\nVERIFY: \\`cargo test pipeline\\`\n\n## Edge Cases\n- Empty document content_text: skip (don't embed)\n- No paragraph boundaries: split at CHUNK_MAX_CHARS with overlap\n- Ollama error for one batch: record error per chunk, continue with next batch\n- Dimension mismatch (model returns 512 instead of 768): record error, don't store corrupt data\n- Document deleted between pagination and embedding: skip gracefully","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:26:34.093701Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:58:58.908585Z","closed_at":"2026-01-30T17:58:58.908525Z","close_reason":"Implemented embedding pipeline: chunking at paragraph boundaries with 500-char overlap, change detector (keyset pagination, hash-based staleness), async embed via Ollama with batch processing, dimension validation, per-chunk error recording, LE byte vector storage. 7 chunking tests pass. 289 total tests.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-am7","depends_on_id":"bd-1y8","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-am7","depends_on_id":"bd-2ac","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"},{"issue_id":"bd-am7","depends_on_id":"bd-335","type":"blocks","created_at":"2026-02-12T19:34:59Z","created_by":"import"}]} diff --git a/.beads/last-touched b/.beads/last-touched index 417fea9..220e7cb 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -bd-2fc +bd-9lbr diff --git a/crates/lore-tui/src/lib.rs b/crates/lore-tui/src/lib.rs index 8dcf5eb..1b44e2d 100644 --- a/crates/lore-tui/src/lib.rs +++ b/crates/lore-tui/src/lib.rs @@ -5,7 +5,7 @@ //! Built on FrankenTUI (Elm architecture): Model, update, view. //! The `lore` CLI spawns `lore-tui` via PATH lookup at runtime. -use anyhow::Result; +use anyhow::{Context, Result}; // Phase 0 modules. pub mod clock; // Clock trait: SystemClock + FakeClock (bd-2lg6) @@ -71,9 +71,40 @@ pub struct LaunchOptions { /// 2. **Data readiness** — check whether the database has any entity data. /// If empty, start on the Bootstrap screen; otherwise start on Dashboard. pub fn launch_tui(options: LaunchOptions) -> Result<()> { - let _options = options; - // Phase 1 will wire this to LoreApp + App::fullscreen().run() - eprintln!("lore-tui: browse mode not yet implemented (Phase 1)"); + // 1. Resolve database path. + let db_path = lore::core::paths::get_db_path(None); + if !db_path.exists() { + anyhow::bail!( + "No lore database found at {}.\n\ + Run 'lore init' to create a config, then 'lore sync' to fetch data.", + db_path.display() + ); + } + + // 2. Open DB and run schema preflight. + let db = db::DbManager::open(&db_path) + .with_context(|| format!("opening database at {}", db_path.display()))?; + db.with_reader(|conn| schema_preflight(conn))?; + + // 3. Check data readiness — bootstrap screen if empty. + let start_on_bootstrap = db.with_reader(|conn| { + let readiness = action::check_data_readiness(conn)?; + Ok(!readiness.has_any_data()) + })?; + + // 4. Build the app model. + let mut app = app::LoreApp::new(); + app.db = Some(db); + if start_on_bootstrap { + app.navigation.reset_to(message::Screen::Bootstrap); + } + + // 5. Enter the FrankenTUI event loop. + ftui::App::fullscreen(app) + .with_mouse() + .run() + .context("running TUI event loop")?; + Ok(()) } diff --git a/migrations/029_issue_links_job_type.sql b/migrations/029_issue_links_job_type.sql new file mode 100644 index 0000000..75db249 --- /dev/null +++ b/migrations/029_issue_links_job_type.sql @@ -0,0 +1,43 @@ +-- Migration 029: Expand pending_dependent_fetches CHECK to include 'issue_links' job type. +-- Also adds issue_links_synced_for_updated_at watermark to issues table. +-- SQLite cannot ALTER CHECK constraints, so we recreate the table. + +-- Step 1: Recreate pending_dependent_fetches with expanded CHECK +CREATE TABLE pending_dependent_fetches_new ( + id INTEGER PRIMARY KEY, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + entity_type TEXT NOT NULL CHECK (entity_type IN ('issue', 'merge_request')), + entity_iid INTEGER NOT NULL, + entity_local_id INTEGER NOT NULL, + job_type TEXT NOT NULL CHECK (job_type IN ( + 'resource_events', 'mr_closes_issues', 'mr_diffs', 'issue_links' + )), + payload_json TEXT, + enqueued_at INTEGER NOT NULL, + locked_at INTEGER, + attempts INTEGER NOT NULL DEFAULT 0, + next_retry_at INTEGER, + last_error TEXT +); + +INSERT INTO pending_dependent_fetches_new + SELECT * FROM pending_dependent_fetches; + +DROP TABLE pending_dependent_fetches; + +ALTER TABLE pending_dependent_fetches_new RENAME TO pending_dependent_fetches; + +-- Recreate indexes from migration 011 +CREATE UNIQUE INDEX uq_pending_fetches + ON pending_dependent_fetches(project_id, entity_type, entity_iid, job_type); +CREATE INDEX idx_pending_fetches_claimable + ON pending_dependent_fetches(job_type, locked_at) WHERE locked_at IS NULL; +CREATE INDEX idx_pending_fetches_retryable + ON pending_dependent_fetches(next_retry_at) WHERE locked_at IS NULL AND next_retry_at IS NOT NULL; + +-- Step 2: Add watermark column for issue link sync tracking +ALTER TABLE issues ADD COLUMN issue_links_synced_for_updated_at INTEGER; + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (29, strftime('%s', 'now') * 1000, 'Expand dependent fetch queue for issue links'); diff --git a/src/cli/autocorrect.rs b/src/cli/autocorrect.rs index 76311be..df9c482 100644 --- a/src/cli/autocorrect.rs +++ b/src/cli/autocorrect.rs @@ -125,6 +125,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[ "--no-events", "--no-file-changes", "--no-status", + "--no-issue-links", "--dry-run", "--no-dry-run", "--timings", diff --git a/src/cli/commands/explain.rs b/src/cli/commands/explain.rs new file mode 100644 index 0000000..a665a8b --- /dev/null +++ b/src/cli/commands/explain.rs @@ -0,0 +1,1177 @@ +//! `lore explain` — auto-generate issue/MR narrative. +//! +//! Assembles data from multiple tables (issues, notes, discussions, +//! resource_state_events, resource_label_events, entity_references) +//! into a coherent story. Template-based (no LLM), deterministic. + +use rusqlite::Connection; +use serde::Serialize; + +use crate::core::config::Config; +use crate::core::db::{create_connection, get_db_path}; +use crate::core::error::Result; + +use super::show::{ClosingMrRef, RelatedIssueRef}; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +/// Full explanation response for an entity. +#[derive(Debug, Serialize)] +pub struct ExplainResponse { + pub entity: EntitySummary, + pub description_excerpt: String, + pub key_decisions: Vec, + pub activity: ActivitySummary, + pub open_threads: Vec, + pub related: RelatedEntities, + pub timeline_excerpt: Vec, +} + +#[derive(Debug, Serialize)] +pub struct EntitySummary { + #[serde(rename = "type")] + pub entity_type: String, + pub iid: i64, + pub title: String, + pub state: String, + pub author: String, + pub assignees: Vec, + pub labels: Vec, + pub created_at: i64, + pub updated_at: i64, + pub url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub status_name: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct KeyDecision { + pub timestamp: i64, + pub actor: String, + pub action: String, + pub context_note: String, +} + +#[derive(Debug, Serialize)] +pub struct ActivitySummary { + pub state_changes: usize, + pub label_changes: usize, + pub notes: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub first_event: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub last_event: Option, +} + +#[derive(Debug, Serialize)] +pub struct OpenThread { + pub discussion_id: String, + pub started_by: String, + pub started_at: i64, + pub note_count: usize, + pub last_note_at: i64, +} + +#[derive(Debug, Serialize)] +pub struct RelatedEntities { + pub closing_mrs: Vec, + pub related_issues: Vec, +} + +#[derive(Debug, Serialize)] +pub struct TimelineEvent { + pub timestamp: i64, + pub event_type: String, + pub actor: String, + pub summary: String, +} + +// --------------------------------------------------------------------------- +// Internal event types for the key decisions heuristic +// --------------------------------------------------------------------------- + +#[derive(Debug)] +struct StateOrLabelEvent { + created_at: i64, + user: String, + description: String, +} + +#[derive(Debug)] +struct NoteRow { + author_username: String, + body: String, + created_at: i64, +} + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const MAX_KEY_DECISIONS: usize = 10; +const MAX_TIMELINE_EVENTS: usize = 20; +const DECISION_WINDOW_MS: i64 = 60 * 60 * 1000; // 1 hour +const DESCRIPTION_EXCERPT_LEN: usize = 500; + +// --------------------------------------------------------------------------- +// Public entry point +// --------------------------------------------------------------------------- + +/// Run the explain command for an issue or MR. +pub fn run_explain( + config: &Config, + entity_type: &str, + iid: i64, + project_filter: Option<&str>, +) -> Result { + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + let effective_project = config.effective_project(project_filter); + + match entity_type { + "issues" | "issue" => explain_issue(&conn, iid, effective_project), + "mrs" | "mr" | "merge_requests" | "merge_request" => { + explain_mr(&conn, iid, effective_project) + } + _ => Err(crate::core::error::LoreError::Other(format!( + "Unknown entity type '{entity_type}'. Use 'issues' or 'mrs'." + ))), + } +} + +// --------------------------------------------------------------------------- +// Issue explain +// --------------------------------------------------------------------------- + +fn explain_issue( + conn: &Connection, + iid: i64, + project_filter: Option<&str>, +) -> Result { + let issue = find_issue_row(conn, iid, project_filter)?; + + let entity = EntitySummary { + entity_type: "issue".to_string(), + iid: issue.iid, + title: issue.title.clone(), + state: issue.state.clone(), + author: issue.author_username.clone(), + assignees: get_issue_assignees(conn, issue.id)?, + labels: get_issue_labels(conn, issue.id)?, + created_at: issue.created_at, + updated_at: issue.updated_at, + url: issue.web_url.clone(), + status_name: issue.status_name.clone(), + }; + + let description_excerpt = truncate_description(issue.description.as_deref()); + + let state_events = query_state_events(conn, Some(issue.id), None)?; + let label_events = query_label_events(conn, Some(issue.id), None)?; + let notes = query_non_system_notes(conn, "Issue", issue.id)?; + + let key_decisions = extract_key_decisions(&state_events, &label_events, ¬es); + + let activity = build_activity_summary(&state_events, &label_events, ¬es); + + let open_threads = query_open_threads(conn, "Issue", issue.id)?; + + let closing_mrs = query_closing_mrs(conn, issue.id)?; + let related_issues = query_related_issues(conn, issue.id)?; + + let timeline_excerpt = build_timeline_excerpt(&state_events, &label_events, ¬es); + + Ok(ExplainResponse { + entity, + description_excerpt, + key_decisions, + activity, + open_threads, + related: RelatedEntities { + closing_mrs, + related_issues, + }, + timeline_excerpt, + }) +} + +// --------------------------------------------------------------------------- +// MR explain +// --------------------------------------------------------------------------- + +fn explain_mr( + conn: &Connection, + iid: i64, + project_filter: Option<&str>, +) -> Result { + let mr = find_mr_row(conn, iid, project_filter)?; + + let entity = EntitySummary { + entity_type: "merge_request".to_string(), + iid: mr.iid, + title: mr.title.clone(), + state: mr.state.clone(), + author: mr.author_username.clone(), + assignees: get_mr_assignees(conn, mr.id)?, + labels: get_mr_labels(conn, mr.id)?, + created_at: mr.created_at, + updated_at: mr.updated_at, + url: mr.web_url.clone(), + status_name: None, + }; + + let description_excerpt = truncate_description(mr.description.as_deref()); + + let state_events = query_state_events(conn, None, Some(mr.id))?; + let label_events = query_label_events(conn, None, Some(mr.id))?; + let notes = query_non_system_notes(conn, "MergeRequest", mr.id)?; + + let key_decisions = extract_key_decisions(&state_events, &label_events, ¬es); + + let activity = build_activity_summary(&state_events, &label_events, ¬es); + + let open_threads = query_open_threads(conn, "MergeRequest", mr.id)?; + + // MRs don't have closing_mrs or related_issues in the same sense, + // but we can find what issues this MR closes. + let closing_mrs = Vec::new(); + let related_issues = query_issues_closed_by_mr(conn, mr.id)?; + + let timeline_excerpt = build_timeline_excerpt(&state_events, &label_events, ¬es); + + Ok(ExplainResponse { + entity, + description_excerpt, + key_decisions, + activity, + open_threads, + related: RelatedEntities { + closing_mrs, + related_issues, + }, + timeline_excerpt, + }) +} + +// --------------------------------------------------------------------------- +// Key decisions heuristic +// --------------------------------------------------------------------------- + +fn extract_key_decisions( + state_events: &[StateOrLabelEvent], + label_events: &[StateOrLabelEvent], + notes: &[NoteRow], +) -> Vec { + let mut events: Vec<&StateOrLabelEvent> = state_events.iter().chain(label_events).collect(); + events.sort_by_key(|e| e.created_at); + + let mut decisions = Vec::new(); + + for event in &events { + // Find a note by the same actor within 60 min after the event + for note in notes { + if note.author_username == event.user + && note.created_at >= event.created_at + && note.created_at <= event.created_at + DECISION_WINDOW_MS + { + decisions.push(KeyDecision { + timestamp: event.created_at, + actor: event.user.clone(), + action: event.description.clone(), + context_note: truncate_str(¬e.body, DESCRIPTION_EXCERPT_LEN), + }); + break; // one note per event + } + } + } + + decisions.truncate(MAX_KEY_DECISIONS); + decisions +} + +// --------------------------------------------------------------------------- +// Activity summary +// --------------------------------------------------------------------------- + +fn build_activity_summary( + state_events: &[StateOrLabelEvent], + label_events: &[StateOrLabelEvent], + notes: &[NoteRow], +) -> ActivitySummary { + let mut all_timestamps: Vec = Vec::new(); + for e in state_events { + all_timestamps.push(e.created_at); + } + for e in label_events { + all_timestamps.push(e.created_at); + } + for n in notes { + all_timestamps.push(n.created_at); + } + + let first_event = all_timestamps.iter().copied().min(); + let last_event = all_timestamps.iter().copied().max(); + + ActivitySummary { + state_changes: state_events.len(), + label_changes: label_events.len(), + notes: notes.len(), + first_event, + last_event, + } +} + +// --------------------------------------------------------------------------- +// Timeline excerpt +// --------------------------------------------------------------------------- + +fn build_timeline_excerpt( + state_events: &[StateOrLabelEvent], + label_events: &[StateOrLabelEvent], + notes: &[NoteRow], +) -> Vec { + let mut events: Vec = Vec::new(); + + for e in state_events { + events.push(TimelineEvent { + timestamp: e.created_at, + event_type: "state_change".to_string(), + actor: e.user.clone(), + summary: e.description.clone(), + }); + } + + for e in label_events { + events.push(TimelineEvent { + timestamp: e.created_at, + event_type: "label_change".to_string(), + actor: e.user.clone(), + summary: e.description.clone(), + }); + } + + for n in notes { + events.push(TimelineEvent { + timestamp: n.created_at, + event_type: "note".to_string(), + actor: n.author_username.clone(), + summary: truncate_str(&n.body, 200), + }); + } + + events.sort_by_key(|e| e.timestamp); + events.truncate(MAX_TIMELINE_EVENTS); + events +} + +// --------------------------------------------------------------------------- +// Database queries +// --------------------------------------------------------------------------- + +#[derive(Debug)] +struct IssueRow { + id: i64, + iid: i64, + title: String, + description: Option, + state: String, + author_username: String, + created_at: i64, + updated_at: i64, + web_url: Option, + status_name: Option, +} + +fn find_issue_row( + conn: &Connection, + iid: i64, + project_filter: Option<&str>, +) -> Result { + let (sql, params): (&str, Vec>) = match project_filter { + Some(project) => { + let project_id = resolve_project(conn, project)?; + ( + "SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username, + i.created_at, i.updated_at, i.web_url, + ws.status_name + FROM issues i + LEFT JOIN work_item_status ws ON ws.issue_id = i.id + WHERE i.iid = ?1 AND i.project_id = ?2", + vec![ + Box::new(iid) as Box, + Box::new(project_id), + ], + ) + } + None => ( + "SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username, + i.created_at, i.updated_at, i.web_url, + ws.status_name + FROM issues i + LEFT JOIN work_item_status ws ON ws.issue_id = i.id + WHERE i.iid = ?1", + vec![Box::new(iid) as Box], + ), + }; + + let params_ref: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + conn.query_row(sql, params_ref.as_slice(), |row| { + Ok(IssueRow { + id: row.get(0)?, + iid: row.get(1)?, + title: row.get(2)?, + description: row.get(3)?, + state: row.get(4)?, + author_username: row.get(5)?, + created_at: row.get(6)?, + updated_at: row.get(7)?, + web_url: row.get(8)?, + status_name: row.get(9)?, + }) + }) + .map_err(|e| match e { + rusqlite::Error::QueryReturnedNoRows => { + crate::core::error::LoreError::NotFound(format!("Issue #{iid} not found in local database. Run 'lore sync' first.")) + } + other => crate::core::error::LoreError::Database(other), + }) +} + +#[derive(Debug)] +struct MrRow { + id: i64, + iid: i64, + title: String, + description: Option, + state: String, + author_username: String, + created_at: i64, + updated_at: i64, + web_url: Option, +} + +fn find_mr_row( + conn: &Connection, + iid: i64, + project_filter: Option<&str>, +) -> Result { + let (sql, params): (&str, Vec>) = match project_filter { + Some(project) => { + let project_id = resolve_project(conn, project)?; + ( + "SELECT id, iid, title, description, state, author_username, + created_at, updated_at, web_url + FROM merge_requests + WHERE iid = ?1 AND project_id = ?2", + vec![ + Box::new(iid) as Box, + Box::new(project_id), + ], + ) + } + None => ( + "SELECT id, iid, title, description, state, author_username, + created_at, updated_at, web_url + FROM merge_requests + WHERE iid = ?1", + vec![Box::new(iid) as Box], + ), + }; + + let params_ref: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + conn.query_row(sql, params_ref.as_slice(), |row| { + Ok(MrRow { + id: row.get(0)?, + iid: row.get(1)?, + title: row.get(2)?, + description: row.get(3)?, + state: row.get(4)?, + author_username: row.get(5)?, + created_at: row.get(6)?, + updated_at: row.get(7)?, + web_url: row.get(8)?, + }) + }) + .map_err(|e| match e { + rusqlite::Error::QueryReturnedNoRows => { + crate::core::error::LoreError::NotFound(format!("MR !{iid} not found in local database. Run 'lore sync' first.")) + } + other => crate::core::error::LoreError::Database(other), + }) +} + +fn resolve_project(conn: &Connection, project: &str) -> Result { + // Try exact match first, then suffix, then substring + let id: std::result::Result = conn.query_row( + "SELECT id FROM projects WHERE path_with_namespace = ?1 + UNION ALL + SELECT id FROM projects WHERE path_with_namespace LIKE '%/' || ?1 + UNION ALL + SELECT id FROM projects WHERE path_with_namespace LIKE '%' || ?1 || '%' + LIMIT 1", + [project], + |row| row.get(0), + ); + + id.map_err(|_| { + crate::core::error::LoreError::NotFound(format!( + "Project matching '{project}' not found." + )) + }) +} + +fn get_issue_labels(conn: &Connection, issue_id: i64) -> Result> { + let mut stmt = conn.prepare_cached( + "SELECT label FROM issue_labels WHERE issue_id = ? ORDER BY label", + )?; + let labels: Vec = stmt + .query_map([issue_id], |row| row.get(0))? + .collect::, _>>()?; + Ok(labels) +} + +fn get_issue_assignees(conn: &Connection, issue_id: i64) -> Result> { + let mut stmt = conn.prepare_cached( + "SELECT username FROM issue_assignees WHERE issue_id = ? ORDER BY username", + )?; + let assignees: Vec = stmt + .query_map([issue_id], |row| row.get(0))? + .collect::, _>>()?; + Ok(assignees) +} + +fn get_mr_labels(conn: &Connection, mr_id: i64) -> Result> { + let mut stmt = conn.prepare_cached( + "SELECT label FROM mr_labels WHERE merge_request_id = ? ORDER BY label", + )?; + let labels: Vec = stmt + .query_map([mr_id], |row| row.get(0))? + .collect::, _>>()?; + Ok(labels) +} + +fn get_mr_assignees(conn: &Connection, mr_id: i64) -> Result> { + let mut stmt = conn.prepare_cached( + "SELECT username FROM mr_assignees WHERE merge_request_id = ? ORDER BY username", + )?; + let assignees: Vec = stmt + .query_map([mr_id], |row| row.get(0))? + .collect::, _>>()?; + Ok(assignees) +} + +fn query_state_events( + conn: &Connection, + issue_id: Option, + mr_id: Option, +) -> Result> { + let mut stmt = conn.prepare_cached( + "SELECT state, actor_username, created_at + FROM resource_state_events + WHERE (issue_id = ?1 OR ?1 IS NULL) + AND (merge_request_id = ?2 OR ?2 IS NULL) + ORDER BY created_at", + )?; + + let events: Vec = stmt + .query_map(rusqlite::params![issue_id, mr_id], |row| { + let state: String = row.get(0)?; + let actor: Option = row.get(1)?; + let created_at: i64 = row.get(2)?; + Ok(StateOrLabelEvent { + created_at, + user: actor.unwrap_or_default(), + description: format!("state -> {state}"), + }) + })? + .collect::, _>>()?; + + Ok(events) +} + +fn query_label_events( + conn: &Connection, + issue_id: Option, + mr_id: Option, +) -> Result> { + let mut stmt = conn.prepare_cached( + "SELECT action, label_name, actor_username, created_at + FROM resource_label_events + WHERE (issue_id = ?1 OR ?1 IS NULL) + AND (merge_request_id = ?2 OR ?2 IS NULL) + ORDER BY created_at", + )?; + + let events: Vec = stmt + .query_map(rusqlite::params![issue_id, mr_id], |row| { + let action: String = row.get(0)?; + let label_name: Option = row.get(1)?; + let actor: Option = row.get(2)?; + let created_at: i64 = row.get(3)?; + let prefix = if action == "add" { "+" } else { "-" }; + let label = label_name.unwrap_or_default(); + Ok(StateOrLabelEvent { + created_at, + user: actor.unwrap_or_default(), + description: format!("label: {prefix}{label}"), + }) + })? + .collect::, _>>()?; + + Ok(events) +} + +fn query_non_system_notes( + conn: &Connection, + noteable_type: &str, + entity_id: i64, +) -> Result> { + let entity_col = match noteable_type { + "Issue" => "issue_id", + "MergeRequest" => "merge_request_id", + _ => return Ok(Vec::new()), + }; + + let sql = format!( + "SELECT n.author_username, n.body, n.created_at + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + WHERE d.noteable_type = ?1 AND d.{entity_col} = ?2 + AND n.is_system = 0 + ORDER BY n.created_at" + ); + + let mut stmt = conn.prepare(&sql)?; + let notes: Vec = stmt + .query_map(rusqlite::params![noteable_type, entity_id], |row| { + Ok(NoteRow { + author_username: row.get(0)?, + body: row.get(1)?, + created_at: row.get(2)?, + }) + })? + .collect::, _>>()?; + + Ok(notes) +} + +fn query_open_threads( + conn: &Connection, + noteable_type: &str, + entity_id: i64, +) -> Result> { + let entity_col = match noteable_type { + "Issue" => "issue_id", + "MergeRequest" => "merge_request_id", + _ => return Ok(Vec::new()), + }; + + let sql = format!( + "SELECT d.gitlab_id, + MIN(n.author_username) AS started_by, + MIN(n.created_at) AS started_at, + COUNT(n.id) AS note_count, + MAX(n.created_at) AS last_note_at + FROM discussions d + JOIN notes n ON n.discussion_id = d.id + WHERE d.noteable_type = ?1 AND d.{entity_col} = ?2 + AND d.resolved = 0 + AND n.is_system = 0 + GROUP BY d.id + HAVING COUNT(n.id) > 0 + ORDER BY started_at" + ); + + let mut stmt = conn.prepare(&sql)?; + let threads: Vec = stmt + .query_map(rusqlite::params![noteable_type, entity_id], |row| { + Ok(OpenThread { + discussion_id: row.get::<_, String>(0)?, + started_by: row.get(1)?, + started_at: row.get(2)?, + note_count: row.get::<_, i64>(3)? as usize, + last_note_at: row.get(4)?, + }) + })? + .collect::, _>>()?; + + Ok(threads) +} + +fn query_closing_mrs(conn: &Connection, issue_id: i64) -> Result> { + let mut stmt = conn.prepare( + "SELECT mr.iid, mr.title, mr.state, mr.web_url + FROM entity_references er + JOIN merge_requests mr ON mr.id = er.source_entity_id + WHERE er.target_entity_type = 'issue' + AND er.target_entity_id = ? + AND er.source_entity_type = 'merge_request' + AND er.reference_type = 'closes' + ORDER BY mr.iid", + )?; + + let mrs: Vec = stmt + .query_map([issue_id], |row| { + Ok(ClosingMrRef { + iid: row.get(0)?, + title: row.get(1)?, + state: row.get(2)?, + web_url: row.get(3)?, + }) + })? + .collect::, _>>()?; + + Ok(mrs) +} + +fn query_related_issues(conn: &Connection, issue_id: i64) -> Result> { + let mut stmt = conn.prepare( + "SELECT DISTINCT i.iid, i.title, i.state, i.web_url, NULL AS project_path + FROM entity_references er + JOIN issues i ON i.id = er.target_entity_id + WHERE er.source_entity_type = 'issue' + AND er.source_entity_id = ?1 + AND er.target_entity_type = 'issue' + AND er.reference_type = 'related' + AND er.target_entity_id IS NOT NULL + UNION + SELECT DISTINCT i.iid, i.title, i.state, i.web_url, NULL AS project_path + FROM entity_references er + JOIN issues i ON i.id = er.source_entity_id + WHERE er.target_entity_type = 'issue' + AND er.target_entity_id = ?1 + AND er.source_entity_type = 'issue' + AND er.reference_type = 'related' + ORDER BY iid", + )?; + + let related: Vec = stmt + .query_map([issue_id], |row| { + Ok(RelatedIssueRef { + iid: row.get(0)?, + title: row.get(1)?, + state: row.get(2)?, + web_url: row.get(3)?, + project_path: row.get(4)?, + }) + })? + .collect::, _>>()?; + + Ok(related) +} + +fn query_issues_closed_by_mr(conn: &Connection, mr_id: i64) -> Result> { + let mut stmt = conn.prepare( + "SELECT i.iid, i.title, i.state, i.web_url + FROM entity_references er + JOIN issues i ON i.id = er.target_entity_id + WHERE er.source_entity_type = 'merge_request' + AND er.source_entity_id = ? + AND er.target_entity_type = 'issue' + AND er.reference_type = 'closes' + ORDER BY i.iid", + )?; + + let issues: Vec = stmt + .query_map([mr_id], |row| { + Ok(RelatedIssueRef { + iid: row.get(0)?, + title: row.get(1)?, + state: row.get(2)?, + web_url: row.get(3)?, + project_path: None, + }) + })? + .collect::, _>>()?; + + Ok(issues) +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn truncate_description(desc: Option<&str>) -> String { + match desc { + Some(d) if !d.is_empty() => truncate_str(d, DESCRIPTION_EXCERPT_LEN), + _ => "(no description)".to_string(), + } +} + +fn truncate_str(s: &str, max_len: usize) -> String { + if s.len() <= max_len { + s.to_string() + } else { + // Find a char boundary near max_len + let truncated = &s[..s.floor_char_boundary(max_len)]; + format!("{truncated}...") + } +} + +// --------------------------------------------------------------------------- +// Output formatting +// --------------------------------------------------------------------------- + +pub fn print_explain_json(response: &ExplainResponse, elapsed_ms: u64) { + let output = serde_json::json!({ + "ok": true, + "data": response, + "meta": { "elapsed_ms": elapsed_ms } + }); + println!("{}", serde_json::to_string(&output).unwrap_or_default()); +} + +pub fn print_explain_human(response: &ExplainResponse) { + use crate::core::time::format_ms_relative; + + // Header + let entity = &response.entity; + let type_label = if entity.entity_type == "issue" { + format!("Issue #{}", entity.iid) + } else { + format!("MR !{}", entity.iid) + }; + + println!("{type_label}: {}", entity.title); + println!( + "State: {} | Author: {} | Created: {}", + entity.state, + entity.author, + format_ms_relative(entity.created_at), + ); + + if !entity.assignees.is_empty() { + println!("Assignees: {}", entity.assignees.join(", ")); + } + if !entity.labels.is_empty() { + println!("Labels: {}", entity.labels.join(", ")); + } + if let Some(status) = &entity.status_name { + println!("Status: {status}"); + } + println!(); + + // Description excerpt + println!("--- Description ---"); + println!("{}", response.description_excerpt); + println!(); + + // Key decisions + if !response.key_decisions.is_empty() { + println!("--- Key Decisions ({}) ---", response.key_decisions.len()); + for d in &response.key_decisions { + println!( + " {} | {} | {}", + format_ms_relative(d.timestamp), + d.actor, + d.action, + ); + // Show first line of the context note + let first_line = d.context_note.lines().next().unwrap_or(""); + if !first_line.is_empty() { + println!(" > {first_line}"); + } + } + println!(); + } + + // Activity + let a = &response.activity; + println!( + "--- Activity: {} state changes, {} label changes, {} notes ---", + a.state_changes, a.label_changes, a.notes, + ); + if let (Some(first), Some(last)) = (a.first_event, a.last_event) { + println!( + " Span: {} to {}", + format_ms_relative(first), + format_ms_relative(last), + ); + } + println!(); + + // Open threads + if !response.open_threads.is_empty() { + println!("--- Open Threads ({}) ---", response.open_threads.len()); + for t in &response.open_threads { + println!( + " {} started by {} ({} notes, last {})", + t.discussion_id, + t.started_by, + t.note_count, + format_ms_relative(t.last_note_at), + ); + } + println!(); + } + + // Related entities + let r = &response.related; + if !r.closing_mrs.is_empty() { + println!("--- Closing MRs ---"); + for mr in &r.closing_mrs { + println!(" !{} {} [{}]", mr.iid, mr.title, mr.state); + } + println!(); + } + if !r.related_issues.is_empty() { + println!("--- Related Issues ---"); + for issue in &r.related_issues { + println!(" #{} {} [{}]", issue.iid, issue.title, issue.state); + } + println!(); + } + + // Timeline excerpt + if !response.timeline_excerpt.is_empty() { + println!( + "--- Timeline (first {}) ---", + response.timeline_excerpt.len() + ); + for e in &response.timeline_excerpt { + println!( + " {} | {} | {} | {}", + format_ms_relative(e.timestamp), + e.event_type, + e.actor, + e.summary.lines().next().unwrap_or(""), + ); + } + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::db::{create_connection, run_migrations}; + + fn setup_test_db() -> (tempfile::NamedTempFile, Connection) { + let tmp = tempfile::NamedTempFile::new().unwrap(); + let conn = create_connection(tmp.path()).unwrap(); + run_migrations(&conn).unwrap(); + + // Insert a project + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) + VALUES (1, 42, 'group/repo', 'https://gitlab.example.com/group/repo')", + [], + ) + .unwrap(); + + (tmp, conn) + } + + fn insert_issue(conn: &Connection, id: i64, iid: i64, title: &str) { + conn.execute( + "INSERT INTO issues (id, project_id, gitlab_id, iid, title, state, author_username, created_at, updated_at) + VALUES (?1, 1, ?1, ?2, ?3, 'opened', 'alice', 1000000, 2000000)", + rusqlite::params![id, iid, title], + ) + .unwrap(); + } + + fn insert_mr(conn: &Connection, id: i64, iid: i64, title: &str) { + conn.execute( + "INSERT INTO merge_requests (id, project_id, gitlab_id, iid, title, state, author_username, source_branch, target_branch, created_at, updated_at) + VALUES (?1, 1, ?1, ?2, ?3, 'merged', 'bob', 'feat', 'main', 1000000, 2000000)", + rusqlite::params![id, iid, title], + ) + .unwrap(); + } + + fn insert_discussion(conn: &Connection, id: i64, gitlab_id: &str, noteable_type: &str, entity_id: i64, resolved: bool) { + let (issue_id, mr_id) = match noteable_type { + "Issue" => (Some(entity_id), None), + "MergeRequest" => (None, Some(entity_id)), + _ => panic!("bad noteable_type"), + }; + conn.execute( + "INSERT INTO discussions (id, gitlab_id, project_id, noteable_type, issue_id, merge_request_id, resolved, individual_note) + VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, 0)", + rusqlite::params![id, gitlab_id, noteable_type, issue_id, mr_id, resolved], + ) + .unwrap(); + } + + fn insert_note(conn: &Connection, id: i64, discussion_id: i64, author: &str, body: &str, created_at: i64, is_system: bool) { + conn.execute( + "INSERT INTO notes (id, gitlab_id, discussion_id, author_username, body, created_at, updated_at, is_system, noteable_type) + VALUES (?1, ?1, ?2, ?3, ?4, ?5, ?5, ?6, 'Issue')", + rusqlite::params![id, discussion_id, author, body, created_at, is_system], + ) + .unwrap(); + } + + fn insert_state_event(conn: &Connection, id: i64, issue_id: Option, mr_id: Option, state: &str, actor: &str, created_at: i64) { + conn.execute( + "INSERT INTO resource_state_events (gitlab_id, project_id, issue_id, merge_request_id, state, actor_username, created_at) + VALUES (?1, 1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![id, issue_id, mr_id, state, actor, created_at], + ) + .unwrap(); + } + + fn insert_label_event(conn: &Connection, id: i64, issue_id: Option, mr_id: Option, action: &str, label: &str, actor: &str, created_at: i64) { + conn.execute( + "INSERT INTO resource_label_events (gitlab_id, project_id, issue_id, merge_request_id, action, label_name, actor_username, created_at) + VALUES (?1, 1, ?2, ?3, ?4, ?5, ?6, ?7)", + rusqlite::params![id, issue_id, mr_id, action, label, actor, created_at], + ) + .unwrap(); + } + + // ----------------------------------------------------------------------- + // Tests + // ----------------------------------------------------------------------- + + #[test] + fn test_explain_issue_basic() { + let (_tmp, conn) = setup_test_db(); + insert_issue(&conn, 1, 42, "Fix auth flow"); + + // Add a discussion with a note + insert_discussion(&conn, 1, "disc-1", "Issue", 1, false); + insert_note(&conn, 1, 1, "alice", "Working on this now", 1500000, false); + + let response = explain_issue(&conn, 42, Some("group/repo")).unwrap(); + + assert_eq!(response.entity.entity_type, "issue"); + assert_eq!(response.entity.iid, 42); + assert_eq!(response.entity.title, "Fix auth flow"); + assert_eq!(response.entity.state, "opened"); + assert_eq!(response.entity.author, "alice"); + assert_eq!(response.description_excerpt, "(no description)"); + assert_eq!(response.activity.notes, 1); + assert_eq!(response.open_threads.len(), 1); + assert!(!response.timeline_excerpt.is_empty()); + } + + #[test] + fn test_explain_key_decision_heuristic() { + let (_tmp, conn) = setup_test_db(); + insert_issue(&conn, 1, 42, "Bug fix"); + + // State change by alice at t=1000000 + insert_state_event(&conn, 1, Some(1), None, "closed", "alice", 1_000_000); + + // Note by alice at t=1000000 + 30min (within 60min window) + insert_discussion(&conn, 1, "disc-1", "Issue", 1, true); + insert_note(&conn, 1, 1, "alice", "Fixed by reverting the config change", 1_000_000 + 30 * 60 * 1000, false); + + let response = explain_issue(&conn, 42, Some("group/repo")).unwrap(); + + assert_eq!(response.key_decisions.len(), 1); + let decision = &response.key_decisions[0]; + assert_eq!(decision.actor, "alice"); + assert!(decision.action.contains("closed")); + assert!(decision.context_note.contains("reverting")); + } + + #[test] + fn test_explain_key_decision_ignores_unrelated_notes() { + let (_tmp, conn) = setup_test_db(); + insert_issue(&conn, 1, 42, "Bug fix"); + + // State change by alice at t=1000000 + insert_state_event(&conn, 1, Some(1), None, "closed", "alice", 1_000_000); + + // Note by BOB (different actor) within window + insert_discussion(&conn, 1, "disc-1", "Issue", 1, true); + insert_note(&conn, 1, 1, "bob", "Why was this closed?", 1_000_000 + 10 * 60 * 1000, false); + + let response = explain_issue(&conn, 42, Some("group/repo")).unwrap(); + + // Bob's note should NOT be correlated since he didn't make the state change + assert_eq!(response.key_decisions.len(), 0); + } + + #[test] + fn test_explain_open_threads() { + let (_tmp, conn) = setup_test_db(); + insert_issue(&conn, 1, 42, "Feature request"); + + // Resolved discussion + insert_discussion(&conn, 1, "disc-resolved", "Issue", 1, true); + insert_note(&conn, 1, 1, "alice", "This is resolved", 1500000, false); + + // Unresolved discussion + insert_discussion(&conn, 2, "disc-open", "Issue", 1, false); + insert_note(&conn, 2, 2, "bob", "What about edge cases?", 1600000, false); + insert_note(&conn, 3, 2, "alice", "Good point, investigating", 1700000, false); + + let response = explain_issue(&conn, 42, Some("group/repo")).unwrap(); + + // Only the unresolved thread should appear + assert_eq!(response.open_threads.len(), 1); + assert_eq!(response.open_threads[0].discussion_id, "disc-open"); + assert_eq!(response.open_threads[0].started_by, "alice"); // MIN(author) + assert_eq!(response.open_threads[0].note_count, 2); + } + + #[test] + fn test_explain_no_notes() { + let (_tmp, conn) = setup_test_db(); + insert_issue(&conn, 1, 42, "Empty issue"); + + let response = explain_issue(&conn, 42, Some("group/repo")).unwrap(); + + assert_eq!(response.entity.iid, 42); + assert_eq!(response.description_excerpt, "(no description)"); + assert!(response.key_decisions.is_empty()); + assert_eq!(response.activity.notes, 0); + assert!(response.open_threads.is_empty()); + assert!(response.timeline_excerpt.is_empty()); + } + + #[test] + fn test_explain_mr_basic() { + let (_tmp, conn) = setup_test_db(); + insert_mr(&conn, 1, 99, "Add authentication"); + + let response = explain_mr(&conn, 99, Some("group/repo")).unwrap(); + + assert_eq!(response.entity.entity_type, "merge_request"); + assert_eq!(response.entity.iid, 99); + assert_eq!(response.entity.title, "Add authentication"); + assert_eq!(response.entity.state, "merged"); + } + + #[test] + fn test_explain_activity_counts() { + let (_tmp, conn) = setup_test_db(); + insert_issue(&conn, 1, 42, "Complex issue"); + + // 3 state events + insert_state_event(&conn, 1, Some(1), None, "closed", "alice", 1_000_000); + insert_state_event(&conn, 2, Some(1), None, "opened", "bob", 2_000_000); + insert_state_event(&conn, 3, Some(1), None, "closed", "alice", 3_000_000); + + // 2 label events + insert_label_event(&conn, 1, Some(1), None, "add", "bug", "alice", 1_100_000); + insert_label_event(&conn, 2, Some(1), None, "remove", "bug", "bob", 2_100_000); + + // 4 notes (across 2 discussions) + insert_discussion(&conn, 1, "disc-1", "Issue", 1, true); + insert_note(&conn, 1, 1, "alice", "Note 1", 1_200_000, false); + insert_note(&conn, 2, 1, "bob", "Note 2", 1_300_000, false); + insert_discussion(&conn, 2, "disc-2", "Issue", 1, false); + insert_note(&conn, 3, 2, "alice", "Note 3", 2_200_000, false); + insert_note(&conn, 4, 2, "bob", "Note 4", 2_300_000, false); + + let response = explain_issue(&conn, 42, Some("group/repo")).unwrap(); + + assert_eq!(response.activity.state_changes, 3); + assert_eq!(response.activity.label_changes, 2); + assert_eq!(response.activity.notes, 4); + assert!(response.activity.first_event.is_some()); + assert!(response.activity.last_event.is_some()); + } +} diff --git a/src/cli/commands/ingest.rs b/src/cli/commands/ingest.rs index 70e9c45..b4b3a07 100644 --- a/src/cli/commands/ingest.rs +++ b/src/cli/commands/ingest.rs @@ -590,6 +590,9 @@ async fn run_ingest_inner( } } ProgressEvent::StatusEnrichmentSkipped => {} + ProgressEvent::IssueLinksFetchStarted { .. } + | ProgressEvent::IssueLinkFetched { .. } + | ProgressEvent::IssueLinksFetchComplete { .. } => {} }) }; diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index 4b44a20..946c6dd 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -3,6 +3,7 @@ pub mod count; pub mod doctor; pub mod drift; pub mod embed; +pub mod explain; pub mod file_history; pub mod generate_docs; pub mod ingest; @@ -29,6 +30,7 @@ pub use count::{ pub use doctor::{DoctorChecks, print_doctor_results, run_doctor}; pub use drift::{DriftResponse, print_drift_human, print_drift_json, run_drift}; pub use embed::{print_embed, print_embed_json, run_embed}; +pub use explain::{ExplainResponse, print_explain_human, print_explain_json, run_explain}; pub use file_history::{print_file_history, print_file_history_json, run_file_history}; pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs}; pub use ingest::{ diff --git a/src/cli/commands/show.rs b/src/cli/commands/show.rs index 41de18d..7592f96 100644 --- a/src/cli/commands/show.rs +++ b/src/cli/commands/show.rs @@ -65,6 +65,16 @@ pub struct ClosingMrRef { pub web_url: Option, } +#[derive(Debug, Clone, Serialize)] +pub struct RelatedIssueRef { + pub iid: i64, + pub title: String, + pub state: String, + pub web_url: Option, + /// For unresolved cross-project refs + pub project_path: Option, +} + #[derive(Debug, Serialize)] pub struct IssueDetail { pub id: i64, @@ -87,6 +97,7 @@ pub struct IssueDetail { pub user_notes_count: i64, pub merge_requests_count: usize, pub closing_merge_requests: Vec, + pub related_issues: Vec, pub discussions: Vec, pub status_name: Option, pub status_category: Option, @@ -125,6 +136,8 @@ pub fn run_show_issue( let closing_mrs = get_closing_mrs(&conn, issue.id)?; + let related_issues = get_related_issues(&conn, issue.id)?; + let discussions = get_issue_discussions(&conn, issue.id)?; let references_full = format!("{}#{}", issue.project_path, issue.iid); @@ -151,6 +164,7 @@ pub fn run_show_issue( user_notes_count: issue.user_notes_count, merge_requests_count, closing_merge_requests: closing_mrs, + related_issues, discussions, status_name: issue.status_name, status_category: issue.status_category, @@ -321,6 +335,54 @@ fn get_closing_mrs(conn: &Connection, issue_id: i64) -> Result Ok(mrs) } +fn get_related_issues(conn: &Connection, issue_id: i64) -> Result> { + // Resolved local references: source or target side + let mut stmt = conn.prepare( + "SELECT DISTINCT i.iid, i.title, i.state, i.web_url, NULL AS project_path + FROM entity_references er + JOIN issues i ON i.id = er.target_entity_id + WHERE er.source_entity_type = 'issue' + AND er.source_entity_id = ?1 + AND er.target_entity_type = 'issue' + AND er.reference_type = 'related' + AND er.target_entity_id IS NOT NULL + UNION + SELECT DISTINCT i.iid, i.title, i.state, i.web_url, NULL AS project_path + FROM entity_references er + JOIN issues i ON i.id = er.source_entity_id + WHERE er.target_entity_type = 'issue' + AND er.target_entity_id = ?1 + AND er.source_entity_type = 'issue' + AND er.reference_type = 'related' + UNION + SELECT er.target_entity_iid AS iid, NULL AS title, NULL AS state, NULL AS web_url, + er.target_project_path AS project_path + FROM entity_references er + WHERE er.source_entity_type = 'issue' + AND er.source_entity_id = ?1 + AND er.target_entity_type = 'issue' + AND er.reference_type = 'related' + AND er.target_entity_id IS NULL + ORDER BY iid", + )?; + + let related: Vec = stmt + .query_map([issue_id], |row| { + Ok(RelatedIssueRef { + iid: row.get(0)?, + title: row.get::<_, Option>(1)?.unwrap_or_default(), + state: row + .get::<_, Option>(2)? + .unwrap_or_else(|| "unknown".to_string()), + web_url: row.get(3)?, + project_path: row.get(4)?, + }) + })? + .collect::, _>>()?; + + Ok(related) +} + fn get_issue_discussions(conn: &Connection, issue_id: i64) -> Result> { let mut disc_stmt = conn.prepare( "SELECT id, individual_note FROM discussions @@ -729,6 +791,38 @@ pub fn print_show_issue(issue: &IssueDetail) { } } + // Related Issues section + if !issue.related_issues.is_empty() { + println!( + "{}", + render::section_divider(&format!("Related Issues ({})", issue.related_issues.len())) + ); + for rel in &issue.related_issues { + let (icon, style) = match rel.state.as_str() { + "opened" => (Icons::issue_opened(), Theme::success()), + "closed" => (Icons::issue_closed(), Theme::dim()), + _ => (Icons::issue_opened(), Theme::muted()), + }; + if let Some(project_path) = &rel.project_path { + println!( + " {} {}#{} {}", + Theme::muted().render(icon), + project_path, + rel.iid, + Theme::muted().render("(cross-project, unresolved)"), + ); + } else { + println!( + " {} #{} {} {}", + style.render(icon), + rel.iid, + rel.title, + style.render(&rel.state), + ); + } + } + } + // Description section println!("{}", render::section_divider("Description")); if let Some(desc) = &issue.description { diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 5309b00..6b9f7c8 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -804,6 +804,10 @@ pub struct SyncArgs { #[arg(long = "no-status")] pub no_status: bool, + /// Skip issue link fetching (overrides config) + #[arg(long = "no-issue-links")] + pub no_issue_links: bool, + /// Preview what would be synced without making changes #[arg(long, overrides_with = "no_dry_run")] pub dry_run: bool, diff --git a/src/core/config.rs b/src/core/config.rs index eee368f..bde93fd 100644 --- a/src/core/config.rs +++ b/src/core/config.rs @@ -55,6 +55,9 @@ pub struct SyncConfig { #[serde(rename = "fetchWorkItemStatus", default = "default_true")] pub fetch_work_item_status: bool, + + #[serde(rename = "fetchIssueLinks", default = "default_true")] + pub fetch_issue_links: bool, } fn default_true() -> bool { @@ -74,6 +77,7 @@ impl Default for SyncConfig { fetch_resource_events: true, fetch_mr_file_changes: true, fetch_work_item_status: true, + fetch_issue_links: true, } } } diff --git a/src/core/db.rs b/src/core/db.rs index 0a2f063..76b6eab 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -97,6 +97,10 @@ const MIGRATIONS: &[(&str, &str)] = &[ "028", include_str!("../../migrations/028_surgical_sync_runs.sql"), ), + ( + "029", + include_str!("../../migrations/029_issue_links_job_type.sql"), + ), ]; pub fn create_connection(db_path: &Path) -> Result { diff --git a/src/gitlab/client.rs b/src/gitlab/client.rs index 4e664ac..c6a55c3 100644 --- a/src/gitlab/client.rs +++ b/src/gitlab/client.rs @@ -627,6 +627,15 @@ impl GitLabClient { self.fetch_all_pages(&path).await } + pub async fn fetch_issue_links( + &self, + gitlab_project_id: i64, + issue_iid: i64, + ) -> Result> { + let path = format!("/api/v4/projects/{gitlab_project_id}/issues/{issue_iid}/links"); + coalesce_not_found(self.fetch_all_pages(&path).await) + } + pub async fn fetch_mr_diffs( &self, gitlab_project_id: i64, diff --git a/src/gitlab/types.rs b/src/gitlab/types.rs index eb37219..f1a08b9 100644 --- a/src/gitlab/types.rs +++ b/src/gitlab/types.rs @@ -263,6 +263,21 @@ pub struct GitLabMergeRequest { pub squash_commit_sha: Option, } +/// Linked issue returned by GitLab's issue links API. +/// GET /projects/:id/issues/:iid/links +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct GitLabIssueLink { + pub id: i64, + pub iid: i64, + pub project_id: i64, + pub title: String, + pub state: String, + pub web_url: String, + /// "relates_to", "blocks", or "is_blocked_by" + pub link_type: String, + pub link_created_at: Option, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct WorkItemStatus { pub name: String, diff --git a/src/ingestion/issue_links.rs b/src/ingestion/issue_links.rs new file mode 100644 index 0000000..e63daac --- /dev/null +++ b/src/ingestion/issue_links.rs @@ -0,0 +1,397 @@ +use rusqlite::Connection; +use tracing::debug; + +use crate::core::error::Result; +use crate::core::references::{ + EntityReference, insert_entity_reference, resolve_issue_local_id, resolve_project_path, +}; +use crate::gitlab::types::GitLabIssueLink; + +/// Store issue links as bidirectional entity_references. +/// +/// For each linked issue: +/// - Creates A -> B reference (source -> target) +/// - Creates B -> A reference (target -> source) +/// - Skips self-links +/// - Stores unresolved cross-project links (target_entity_id = NULL) +pub fn store_issue_links( + conn: &Connection, + project_id: i64, + source_issue_local_id: i64, + source_issue_iid: i64, + links: &[GitLabIssueLink], +) -> Result { + let mut stored = 0; + + for link in links { + // Skip self-links + if link.iid == source_issue_iid + && link.project_id == resolve_gitlab_project_id(conn, project_id)?.unwrap_or(-1) + { + debug!(source_iid = source_issue_iid, "Skipping self-link"); + continue; + } + + let target_local_id = + if link.project_id == resolve_gitlab_project_id(conn, project_id)?.unwrap_or(-1) { + resolve_issue_local_id(conn, project_id, link.iid)? + } else { + // Cross-project link: try to find in our DB + resolve_issue_by_gitlab_project(conn, link.project_id, link.iid)? + }; + + let (target_id, target_path, target_iid) = if let Some(local_id) = target_local_id { + (Some(local_id), None, None) + } else { + let path = resolve_project_path(conn, link.project_id)?; + let fallback = path.unwrap_or_else(|| format!("gitlab_project:{}", link.project_id)); + (None, Some(fallback), Some(link.iid)) + }; + + // Forward reference: source -> target + let forward = EntityReference { + project_id, + source_entity_type: "issue", + source_entity_id: source_issue_local_id, + target_entity_type: "issue", + target_entity_id: target_id, + target_project_path: target_path.as_deref(), + target_entity_iid: target_iid, + reference_type: "related", + source_method: "api", + }; + + if insert_entity_reference(conn, &forward)? { + stored += 1; + } + + // Reverse reference: target -> source (only if target is resolved locally) + if let Some(target_local) = target_id { + let reverse = EntityReference { + project_id, + source_entity_type: "issue", + source_entity_id: target_local, + target_entity_type: "issue", + target_entity_id: Some(source_issue_local_id), + target_project_path: None, + target_entity_iid: None, + reference_type: "related", + source_method: "api", + }; + + if insert_entity_reference(conn, &reverse)? { + stored += 1; + } + } + } + + Ok(stored) +} + +/// Resolve the gitlab_project_id for a local project_id. +fn resolve_gitlab_project_id(conn: &Connection, project_id: i64) -> Result> { + use rusqlite::OptionalExtension; + + let result = conn + .query_row( + "SELECT gitlab_project_id FROM projects WHERE id = ?1", + [project_id], + |row| row.get(0), + ) + .optional()?; + + Ok(result) +} + +/// Resolve an issue local ID by gitlab_project_id and iid (cross-project). +fn resolve_issue_by_gitlab_project( + conn: &Connection, + gitlab_project_id: i64, + issue_iid: i64, +) -> Result> { + use rusqlite::OptionalExtension; + + let result = conn + .query_row( + "SELECT i.id FROM issues i + JOIN projects p ON p.id = i.project_id + WHERE p.gitlab_project_id = ?1 AND i.iid = ?2", + rusqlite::params![gitlab_project_id, issue_iid], + |row| row.get(0), + ) + .optional()?; + + Ok(result) +} + +/// Update the issue_links watermark after successful sync. +pub fn update_issue_links_watermark(conn: &Connection, issue_local_id: i64) -> Result<()> { + conn.execute( + "UPDATE issues SET issue_links_synced_for_updated_at = updated_at WHERE id = ?", + [issue_local_id], + )?; + Ok(()) +} + +/// Update the issue_links watermark within a transaction. +pub fn update_issue_links_watermark_tx( + tx: &rusqlite::Transaction<'_>, + issue_local_id: i64, +) -> Result<()> { + tx.execute( + "UPDATE issues SET issue_links_synced_for_updated_at = updated_at WHERE id = ?", + [issue_local_id], + )?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::db::{create_connection, run_migrations}; + use std::path::Path; + + fn setup_test_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + + // Insert a project + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) + VALUES (1, 100, 'group/project', 'https://gitlab.example.com/group/project')", + [], + ) + .unwrap(); + + // Insert two issues + conn.execute( + "INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username, created_at, updated_at, last_seen_at) + VALUES (10, 1001, 1, 1, 'Issue One', 'opened', 'alice', 1000, 2000, 3000)", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username, created_at, updated_at, last_seen_at) + VALUES (20, 1002, 2, 1, 'Issue Two', 'opened', 'bob', 1000, 2000, 3000)", + [], + ) + .unwrap(); + + conn + } + + #[test] + fn test_store_issue_links_creates_bidirectional_references() { + let conn = setup_test_db(); + + let links = vec![GitLabIssueLink { + id: 999, + iid: 2, + project_id: 100, // same project + title: "Issue Two".to_string(), + state: "opened".to_string(), + web_url: "https://gitlab.example.com/group/project/-/issues/2".to_string(), + link_type: "relates_to".to_string(), + link_created_at: None, + }]; + + let stored = store_issue_links(&conn, 1, 10, 1, &links).unwrap(); + assert_eq!(stored, 2, "Should create 2 references (forward + reverse)"); + + // Verify forward reference: issue 10 (iid 1) -> issue 20 (iid 2) + let forward_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM entity_references + WHERE source_entity_type = 'issue' AND source_entity_id = 10 + AND target_entity_type = 'issue' AND target_entity_id = 20 + AND reference_type = 'related' AND source_method = 'api'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(forward_count, 1); + + // Verify reverse reference: issue 20 (iid 2) -> issue 10 (iid 1) + let reverse_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM entity_references + WHERE source_entity_type = 'issue' AND source_entity_id = 20 + AND target_entity_type = 'issue' AND target_entity_id = 10 + AND reference_type = 'related' AND source_method = 'api'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(reverse_count, 1); + } + + #[test] + fn test_self_link_skipped() { + let conn = setup_test_db(); + + let links = vec![GitLabIssueLink { + id: 999, + iid: 1, // same iid as source + project_id: 100, + title: "Issue One".to_string(), + state: "opened".to_string(), + web_url: "https://gitlab.example.com/group/project/-/issues/1".to_string(), + link_type: "relates_to".to_string(), + link_created_at: None, + }]; + + let stored = store_issue_links(&conn, 1, 10, 1, &links).unwrap(); + assert_eq!(stored, 0, "Self-link should be skipped"); + + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM entity_references WHERE project_id = 1", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(count, 0); + } + + #[test] + fn test_cross_project_link_unresolved() { + let conn = setup_test_db(); + + // Link to an issue in a different project (not in our DB) + let links = vec![GitLabIssueLink { + id: 999, + iid: 42, + project_id: 200, // different project, not in DB + title: "External Issue".to_string(), + state: "opened".to_string(), + web_url: "https://gitlab.example.com/other/project/-/issues/42".to_string(), + link_type: "relates_to".to_string(), + link_created_at: None, + }]; + + let stored = store_issue_links(&conn, 1, 10, 1, &links).unwrap(); + assert_eq!( + stored, 1, + "Should create 1 forward reference (no reverse for unresolved)" + ); + + // Verify unresolved reference + let (target_id, target_path, target_iid): (Option, Option, Option) = conn + .query_row( + "SELECT target_entity_id, target_project_path, target_entity_iid + FROM entity_references + WHERE source_entity_type = 'issue' AND source_entity_id = 10", + [], + |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)), + ) + .unwrap(); + + assert!(target_id.is_none(), "Target should be unresolved"); + assert_eq!( + target_path.as_deref(), + Some("gitlab_project:200"), + "Should store gitlab_project fallback" + ); + assert_eq!(target_iid, Some(42)); + } + + #[test] + fn test_duplicate_links_idempotent() { + let conn = setup_test_db(); + + let links = vec![GitLabIssueLink { + id: 999, + iid: 2, + project_id: 100, + title: "Issue Two".to_string(), + state: "opened".to_string(), + web_url: "https://gitlab.example.com/group/project/-/issues/2".to_string(), + link_type: "relates_to".to_string(), + link_created_at: None, + }]; + + // Store twice + let stored1 = store_issue_links(&conn, 1, 10, 1, &links).unwrap(); + let stored2 = store_issue_links(&conn, 1, 10, 1, &links).unwrap(); + + assert_eq!(stored1, 2); + assert_eq!( + stored2, 0, + "Second insert should be idempotent (INSERT OR IGNORE)" + ); + + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM entity_references WHERE project_id = 1 AND reference_type = 'related'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(count, 2, "Should still have exactly 2 references"); + } + + #[test] + fn test_issue_link_deserialization() { + let json = r#"[ + { + "id": 123, + "iid": 42, + "project_id": 100, + "title": "Linked Issue", + "state": "opened", + "web_url": "https://gitlab.example.com/group/project/-/issues/42", + "link_type": "relates_to", + "link_created_at": "2026-01-15T10:30:00.000Z" + }, + { + "id": 456, + "iid": 99, + "project_id": 200, + "title": "Blocking Issue", + "state": "closed", + "web_url": "https://gitlab.example.com/other/project/-/issues/99", + "link_type": "blocks", + "link_created_at": null + } + ]"#; + + let links: Vec = serde_json::from_str(json).unwrap(); + assert_eq!(links.len(), 2); + assert_eq!(links[0].iid, 42); + assert_eq!(links[0].link_type, "relates_to"); + assert_eq!( + links[0].link_created_at.as_deref(), + Some("2026-01-15T10:30:00.000Z") + ); + assert_eq!(links[1].link_type, "blocks"); + assert!(links[1].link_created_at.is_none()); + } + + #[test] + fn test_update_issue_links_watermark() { + let conn = setup_test_db(); + + // Initially NULL + let wm: Option = conn + .query_row( + "SELECT issue_links_synced_for_updated_at FROM issues WHERE id = 10", + [], + |row| row.get(0), + ) + .unwrap(); + assert!(wm.is_none()); + + // Update watermark + update_issue_links_watermark(&conn, 10).unwrap(); + + // Should now equal updated_at (2000) + let wm: Option = conn + .query_row( + "SELECT issue_links_synced_for_updated_at FROM issues WHERE id = 10", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(wm, Some(2000)); + } +} diff --git a/src/ingestion/mod.rs b/src/ingestion/mod.rs index 8d5f3cb..c45e702 100644 --- a/src/ingestion/mod.rs +++ b/src/ingestion/mod.rs @@ -1,6 +1,7 @@ pub mod dirty_tracker; pub mod discussion_queue; pub mod discussions; +pub mod issue_links; pub mod issues; pub mod merge_requests; pub mod mr_diffs; diff --git a/src/ingestion/orchestrator.rs b/src/ingestion/orchestrator.rs index 113c5d7..305de25 100644 --- a/src/ingestion/orchestrator.rs +++ b/src/ingestion/orchestrator.rs @@ -45,6 +45,9 @@ pub enum ProgressEvent { MrDiffsFetchStarted { total: usize }, MrDiffFetched { current: usize, total: usize }, MrDiffsFetchComplete { fetched: usize, failed: usize }, + IssueLinksFetchStarted { total: usize }, + IssueLinkFetched { current: usize, total: usize }, + IssueLinksFetchComplete { fetched: usize, failed: usize }, StatusEnrichmentStarted { total: usize }, StatusEnrichmentPageFetched { items_so_far: usize }, StatusEnrichmentWriting { total: usize }, @@ -64,6 +67,8 @@ pub struct IngestProjectResult { pub issues_skipped_discussion_sync: usize, pub resource_events_fetched: usize, pub resource_events_failed: usize, + pub issue_links_fetched: usize, + pub issue_links_failed: usize, pub statuses_enriched: usize, pub statuses_cleared: usize, pub statuses_seen: usize, @@ -357,6 +362,27 @@ pub async fn ingest_project_issues_with_progress( } } + // ── Issue Links ────────────────────────────────────────────────── + if config.sync.fetch_issue_links && !signal.is_cancelled() { + let enqueued = enqueue_issue_links(conn, project_id)?; + if enqueued > 0 { + debug!(enqueued, "Enqueued issue_links jobs"); + } + + let drain_result = drain_issue_links( + conn, + client, + config, + project_id, + gitlab_project_id, + &progress, + signal, + ) + .await?; + result.issue_links_fetched = drain_result.fetched; + result.issue_links_failed = drain_result.failed; + } + debug!( summary = crate::ingestion::nonzero_summary(&[ ("fetched", result.issues_fetched), @@ -368,6 +394,8 @@ pub async fn ingest_project_issues_with_progress( ("skipped", result.issues_skipped_discussion_sync), ("events", result.resource_events_fetched), ("event errors", result.resource_events_failed), + ("links", result.issue_links_fetched), + ("link errors", result.issue_links_failed), ]), "Project complete" ); @@ -1441,6 +1469,233 @@ pub(crate) fn store_closes_issues_refs( Ok(()) } +// ─── Issue Links ──────────────────────────────────────────────────────────── + +fn enqueue_issue_links(conn: &Connection, project_id: i64) -> Result { + // Remove stale jobs for issues that haven't changed since their last issue_links sync + conn.execute( + "DELETE FROM pending_dependent_fetches \ + WHERE project_id = ?1 AND entity_type = 'issue' AND job_type = 'issue_links' \ + AND entity_local_id IN ( \ + SELECT id FROM issues \ + WHERE project_id = ?1 \ + AND updated_at <= COALESCE(issue_links_synced_for_updated_at, 0) \ + )", + [project_id], + )?; + + let mut stmt = conn.prepare_cached( + "SELECT id, iid FROM issues \ + WHERE project_id = ?1 \ + AND updated_at > COALESCE(issue_links_synced_for_updated_at, 0)", + )?; + let entities: Vec<(i64, i64)> = stmt + .query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))? + .collect::, _>>()?; + + let mut enqueued = 0; + for (local_id, iid) in &entities { + if enqueue_job( + conn, + project_id, + "issue", + *iid, + *local_id, + "issue_links", + None, + )? { + enqueued += 1; + } + } + + Ok(enqueued) +} + +struct PrefetchedIssueLinks { + job_id: i64, + entity_iid: i64, + entity_local_id: i64, + result: std::result::Result< + Vec, + crate::core::error::LoreError, + >, +} + +async fn prefetch_issue_links( + client: &GitLabClient, + gitlab_project_id: i64, + job_id: i64, + entity_iid: i64, + entity_local_id: i64, +) -> PrefetchedIssueLinks { + let result = client + .fetch_issue_links(gitlab_project_id, entity_iid) + .await; + PrefetchedIssueLinks { + job_id, + entity_iid, + entity_local_id, + result, + } +} + +#[instrument( + skip(conn, client, config, progress, signal), + fields(project_id, gitlab_project_id, items_processed, errors) +)] +async fn drain_issue_links( + conn: &Connection, + client: &GitLabClient, + config: &Config, + project_id: i64, + gitlab_project_id: i64, + progress: &Option, + signal: &ShutdownSignal, +) -> Result { + let mut result = DrainResult::default(); + let batch_size = config.sync.dependent_concurrency as usize; + + let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?; + if reclaimed > 0 { + debug!(reclaimed, "Reclaimed stale issue_links locks"); + } + + let claimable_counts = count_claimable_jobs(conn, project_id)?; + let total_pending = claimable_counts.get("issue_links").copied().unwrap_or(0); + + if total_pending == 0 { + return Ok(result); + } + + let emit = |event: ProgressEvent| { + if let Some(cb) = progress { + cb(event); + } + }; + + emit(ProgressEvent::IssueLinksFetchStarted { + total: total_pending, + }); + + let mut processed = 0; + let mut seen_job_ids = std::collections::HashSet::new(); + + loop { + if signal.is_cancelled() { + debug!("Shutdown requested during issue_links drain"); + break; + } + + let jobs = claim_jobs(conn, "issue_links", project_id, batch_size)?; + if jobs.is_empty() { + break; + } + + // Phase 1: Concurrent HTTP fetches + let futures: Vec<_> = jobs + .iter() + .filter(|j| seen_job_ids.insert(j.id)) + .map(|j| { + prefetch_issue_links( + client, + gitlab_project_id, + j.id, + j.entity_iid, + j.entity_local_id, + ) + }) + .collect(); + + if futures.is_empty() { + warn!("All claimed issue_links jobs were already processed"); + break; + } + + let prefetched = futures::future::join_all(futures).await; + + // Phase 2: Serial DB writes + for p in prefetched { + match p.result { + Ok(links) => { + let tx = conn.unchecked_transaction()?; + let store_result = crate::ingestion::issue_links::store_issue_links( + &tx, + project_id, + p.entity_local_id, + p.entity_iid, + &links, + ); + + match store_result { + Ok(stored) => { + complete_job_tx(&tx, p.job_id)?; + crate::ingestion::issue_links::update_issue_links_watermark_tx( + &tx, + p.entity_local_id, + )?; + tx.commit()?; + result.fetched += 1; + if stored > 0 { + debug!( + entity_iid = p.entity_iid, + stored, "Stored issue link references" + ); + } + } + Err(e) => { + drop(tx); + warn!( + entity_iid = p.entity_iid, + error = %e, + "Failed to store issue link references" + ); + fail_job(conn, p.job_id, &e.to_string())?; + result.failed += 1; + } + } + } + Err(e) => { + let is_not_found = matches!(&e, crate::core::error::LoreError::NotFound(_)); + if is_not_found { + debug!( + entity_iid = p.entity_iid, + "Issue not found for links (probably deleted)" + ); + let tx = conn.unchecked_transaction()?; + complete_job_tx(&tx, p.job_id)?; + tx.commit()?; + result.skipped_not_found += 1; + } else { + warn!( + entity_iid = p.entity_iid, + error = %e, + "HTTP error fetching issue links" + ); + fail_job(conn, p.job_id, &e.to_string())?; + result.failed += 1; + } + } + } + + processed += 1; + emit(ProgressEvent::IssueLinkFetched { + current: processed, + total: total_pending, + }); + } + } + + emit(ProgressEvent::IssueLinksFetchComplete { + fetched: result.fetched, + failed: result.failed, + }); + + tracing::Span::current().record("items_processed", result.fetched); + tracing::Span::current().record("errors", result.failed); + + Ok(result) +} + // ─── MR Diffs (file changes) ──────────────────────────────────────────────── fn enqueue_mr_diffs_jobs(conn: &Connection, project_id: i64) -> Result { diff --git a/src/main.rs b/src/main.rs index 3f2e804..1dda1a2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2231,6 +2231,9 @@ async fn handle_sync_cmd( if args.no_status { config.sync.fetch_work_item_status = false; } + if args.no_issue_links { + config.sync.fetch_issue_links = false; + } // Dedup surgical IIDs let mut issue_iids = args.issue; let mut mr_iids = args.mr; @@ -2593,7 +2596,7 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box generate-docs -> embed. Supports surgical per-IID sync with --issue/--mr.", - "flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--dry-run", "--no-dry-run", "--issue ", "--mr ", "-p/--project ", "--preflight-only"], + "flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--no-issue-links", "--dry-run", "--no-dry-run", "--issue ", "--mr ", "-p/--project ", "--preflight-only"], "example": "lore --robot sync", "notes": { "surgical_sync": "Pass --issue and/or --mr (repeatable) with -p to sync specific entities instead of a full pipeline. Incompatible with --full.",