From 95b7183add541116f3426ddf35ea005f767c1603 Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Sun, 8 Feb 2026 13:35:14 -0500 Subject: [PATCH] feat(who): expand expert + overlap queries with mr_file_changes and mr_reviewers Chain: bd-jec (config flag) -> bd-2yo (fetch MR diffs) -> bd-3qn6 (rewrite who queries) - Add fetch_mr_file_changes config option and --no-file-changes CLI flag - Add GitLab MR diffs API fetch pipeline with watermark-based sync - Create migration 020 for diffs_synced_for_updated_at watermark column - Rewrite query_expert() and query_overlap() to use 4-signal UNION ALL: DiffNote reviewers, DiffNote MR authors, file-change authors, file-change reviewers - Deduplicate across signal types via COUNT(DISTINCT CASE WHEN ... THEN mr_id END) - Add insert_file_change test helper, 8 new who tests, all 397 tests pass - Also includes: list performance migration 019, autocorrect module, README updates Co-Authored-By: Claude Opus 4.6 --- .beads/issues.jsonl | 5 +- .beads/last-touched | 2 +- README.md | 194 +++++-- migrations/019_list_performance.sql | 13 + migrations/020_mr_diffs_watermark.sql | 7 + src/cli/autocorrect.rs | 802 ++++++++++++++++++++++++++ src/cli/commands/ingest.rs | 14 + src/cli/commands/list.rs | 28 +- src/cli/commands/who.rs | 587 ++++++++++++------- src/cli/mod.rs | 20 +- src/core/config.rs | 4 + src/core/db.rs | 8 + src/gitlab/client.rs | 11 +- src/gitlab/mod.rs | 2 +- src/gitlab/types.rs | 12 + src/ingestion/mod.rs | 1 + src/ingestion/mr_diffs.rs | 268 +++++++++ src/ingestion/orchestrator.rs | 261 +++++++++ src/main.rs | 191 +++++- 19 files changed, 2139 insertions(+), 291 deletions(-) create mode 100644 migrations/019_list_performance.sql create mode 100644 migrations/020_mr_diffs_watermark.sql create mode 100644 src/cli/autocorrect.rs create mode 100644 src/ingestion/mr_diffs.rs diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index e7a565d..0684e69 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -96,7 +96,7 @@ {"id":"bd-2ug","title":"[CP1] gi ingest --type=issues command","description":"CLI command to orchestrate issue ingestion.\n\n## Module\nsrc/cli/commands/ingest.rs\n\n## Clap Definition\n#[derive(Subcommand)]\npub enum Commands {\n Ingest {\n #[arg(long, value_parser = [\"issues\", \"merge_requests\"])]\n r#type: String,\n \n #[arg(long)]\n project: Option,\n \n #[arg(long)]\n force: bool,\n },\n}\n\n## Implementation\n1. Acquire app lock with heartbeat (respect --force for stale lock)\n2. Create sync_run record (status='running')\n3. For each configured project (or filtered --project):\n - Call orchestrator to ingest issues and discussions\n - Show progress (spinner or progress bar)\n4. Update sync_run (status='succeeded', metrics_json with counts)\n5. Release lock\n\n## Output Format\nIngesting issues...\n\n group/project-one: 1,234 issues fetched, 45 new labels\n\nFetching discussions (312 issues with updates)...\n\n group/project-one: 312 issues → 1,234 discussions, 5,678 notes\n\nTotal: 1,234 issues, 1,234 discussions, 5,678 notes (excluding 1,234 system notes)\nSkipped discussion sync for 922 unchanged issues.\n\n## Error Handling\n- Lock acquisition failure: exit with DatabaseLockError message\n- Network errors: show GitLabNetworkError, exit non-zero\n- Rate limiting: respect backoff, show progress\n\nFiles: src/cli/commands/ingest.rs, src/cli/commands/mod.rs\nTests: tests/integration/sync_runs_tests.rs\nDone when: Full issue + discussion ingestion works end-to-end","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:57:58.552504Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.875613Z","deleted_at":"2026-01-25T17:02:01.875607Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-2um","title":"[CP1] Epic: Issue Ingestion","description":"Ingest all issues, labels, and issue discussions from configured GitLab repositories with resumable cursor-based incremental sync. This checkpoint establishes the core data ingestion pattern that will be reused for MRs in Checkpoint 2.\n\n## Success Criteria\n- gi ingest --type=issues fetches all issues (count matches GitLab UI)\n- Labels extracted from issue payloads (name-only)\n- Label linkage reflects current GitLab state (removed labels unlinked on re-sync)\n- Issue discussions fetched per-issue (dependent sync)\n- Cursor-based sync is resumable (re-running fetches 0 new items)\n- Discussion sync skips unchanged issues (per-issue watermark)\n- Sync tracking records all runs (sync_runs table)\n- Single-flight lock prevents concurrent runs\n\n## Internal Gates\n- **Gate A**: Issues only - cursor + upsert + raw payloads + list/count/show working\n- **Gate B**: Labels correct - stale-link removal verified; label count matches GitLab\n- **Gate C**: Dependent discussion sync - watermark prevents redundant refetch; concurrency bounded\n- **Gate D**: Resumability proof - kill mid-run, rerun; bounded redo and no redundant discussion refetch\n\n## Reference\ndocs/prd/checkpoint-1.md","status":"closed","priority":1,"issue_type":"epic","created_at":"2026-01-25T17:02:38.075224Z","created_by":"tayloreernisse","updated_at":"2026-01-25T23:27:15.347364Z","closed_at":"2026-01-25T23:27:15.347317Z","close_reason":"CP1 Issue Ingestion complete: all sub-tasks done, 71 tests pass, CLI commands working","compaction_level":0,"original_size":0} {"id":"bd-2y79","title":"Add work item status via GraphQL enrichment","description":"## Background\n\nGitLab 18.2+ added native work item status (To do, In progress, Done, Won't do, Duplicate) but it is only available via GraphQL, not the REST API. This enriches synced issues with status information by making a supplementary GraphQL call after the REST ingestion.\n\n**Spec reference:** Not in Phase B spec. This is an enhancement discovered during Phase B planning.\n\n## Codebase Context — Migration Numbering\n\n- LATEST_SCHEMA_VERSION = 14 (MIGRATIONS array in db.rs includes 001-014)\n- Migration 015 exists on disk (commit SHAs) but is NOT registered in db.rs yet — bd-1oo fixes this\n- bd-1oo creates migration 016 (mr_file_changes)\n- **This bead uses migration 017** for issue status columns\n- bd-343o (linked issues) would use migration 018 — coordinate numbering\n- No GraphQL client exists yet — this would be the first GraphQL usage in the codebase\n\n## Other Codebase Context\n\n- src/core/config.rs SyncConfig has: fetch_resource_events, and will get fetch_mr_file_changes (bd-jec)\n- src/gitlab/client.rs: all API methods use reqwest + fetch_all_pages() + coalesce_not_found()\n- src/cli/commands/show.rs: print_show_issue() displays issue detail with colored sections\n- src/ingestion/orchestrator.rs: ingest_project_issues() returns IngestProjectResult with progress callbacks\n\n## Approach\n\n### Phase 1: GraphQL Client (src/gitlab/graphql.rs NEW)\n\nMinimal GraphQL client — single function, not a full framework:\n```rust\npub async fn graphql_query(\n base_url: &str,\n token: &str,\n query: &str,\n variables: serde_json::Value,\n) -> Result {\n // POST to {base_url}/api/graphql\n // Content-Type: application/json\n // Headers: PRIVATE-TOKEN: {token}\n // Body: {\"query\": \"...\", \"variables\": {...}}\n // Parse response, check for errors array\n}\n```\n\nAdd `pub mod graphql;` to `src/gitlab/mod.rs`.\n\n### Phase 2: Status Types (src/gitlab/types.rs)\n\n```rust\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct WorkItemStatus {\n pub name: String, // \"To do\", \"In progress\", \"Done\", etc.\n pub category: String, // \"todo\", \"in_progress\", \"done\"\n pub color: Option,\n pub icon_name: Option,\n}\n```\n\n### Phase 3: Batch Fetch Query\n\n```graphql\nquery IssueStatuses($projectPath: ID!, $iids: [String!]) {\n project(fullPath: $projectPath) {\n issues(iids: $iids) {\n nodes {\n iid\n state\n workItemType { name }\n widgets {\n ... on WorkItemWidgetStatus {\n status { name category color iconName }\n }\n }\n }\n }\n }\n}\n```\n\nBatch in groups of 50 IIDs to avoid query complexity limits.\n\n### Phase 4: Migration 017 (migrations/017_issue_status.sql)\n\n```sql\nALTER TABLE issues ADD COLUMN status_name TEXT;\nALTER TABLE issues ADD COLUMN status_category TEXT;\n\nINSERT INTO schema_version (version, applied_at, description)\nVALUES (17, strftime('%s', 'now') * 1000, 'Issue work item status columns');\n```\n\nRegister in db.rs MIGRATIONS array.\n\n### Phase 5: Enrichment Step (src/ingestion/orchestrator.rs)\n\nAfter REST issue ingestion, call GraphQL to fetch statuses for all synced issues.\n\n### Phase 6: Display (src/cli/commands/show.rs)\n\nAdd status line to print_show_issue():\n```\nStatus: In progress\n```\n\n### Phase 7: Graceful Degradation\n\n- If GraphQL endpoint returns 404 or 403: skip silently (older GitLab)\n- If work item status widget not present: skip\n- Never fail the sync pipeline due to GraphQL errors\n\n## Acceptance Criteria\n\n- [ ] GraphQL client can POST queries and handle errors\n- [ ] Status fetched in batches of 50 IIDs\n- [ ] Migration 017 adds status_name and status_category columns to issues table\n- [ ] `lore show issue 123` shows status in human output (when available)\n- [ ] `lore --robot show issue 123` includes status_name, status_category in JSON\n- [ ] Graceful degradation: older GitLab versions do not cause errors\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- src/gitlab/graphql.rs (NEW — minimal GraphQL client)\n- src/gitlab/mod.rs (add pub mod graphql)\n- src/gitlab/types.rs (add WorkItemStatus struct)\n- migrations/017_issue_status.sql (NEW)\n- src/core/db.rs (add migration 017 to MIGRATIONS array)\n- src/ingestion/orchestrator.rs (call enrich_issue_statuses after issue sync)\n- src/cli/commands/show.rs (display status in issue output)\n\n## TDD Loop\n\nRED:\n- test_graphql_query_success — mock server returns valid GraphQL response\n- test_graphql_query_error — mock server returns errors array -> Result::Err\n- test_work_item_status_deserialize — parse GraphQL response into WorkItemStatus\n- test_enrichment_graceful_degradation — 403 response -> Ok(0) not Err\n\nGREEN: Implement GraphQL client, enrichment step, migration.\n\nVERIFY: cargo test --lib -- graphql\n\n## Edge Cases\n\n- GitLab < 18.2: GraphQL endpoint exists but work item status widget missing -> skip\n- GraphQL rate limiting: respect Retry-After header\n- Issue with no status widget: status_name = NULL in DB\n- Token with only read_api scope: GraphQL may require different scopes — test and document\n- Must run after bd-1oo (migration numbering: 015 registered, 016 created, then 017)\n","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-05T18:32:39.287957Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:55:27.192737Z","compaction_level":0,"original_size":0,"labels":["api","phase-b"]} -{"id":"bd-2yo","title":"Fetch MR diffs API and populate mr_file_changes","description":"## Background\n\nThis bead fetches MR diff metadata from the GitLab API and populates the mr_file_changes table created by migration 016. It extracts only file-level metadata (paths, change type) and discards actual diff content.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.3 (Ingestion).\n\n## Codebase Context\n\n- pending_dependent_fetches already has `job_type='mr_diffs'` in CHECK constraint (migration 011)\n- dependent_queue.rs has: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- Orchestrator pattern: enqueue after entity ingestion, drain after primary ingestion completes\n- GitLab client uses fetch_all_pages() for pagination\n- Existing drain patterns in orchestrator.rs: drain_resource_events() and drain_mr_closes_issues() — follow same pattern\n- config.sync.fetch_mr_file_changes flag guards enqueue (see bd-jec)\n- mr_file_changes table created by migration 016 (bd-1oo) — NOT 015 (015 is commit SHAs)\n- merge_commit_sha and squash_commit_sha already captured during MR ingestion (src/ingestion/merge_requests.rs lines 184, 205-206, 230-231) — no work needed for those fields\n\n## Approach\n\n### 1. API Client — add to `src/gitlab/client.rs`:\n\n```rust\npub async fn fetch_mr_diffs(\n &self,\n project_id: i64,\n mr_iid: i64,\n) -> Result> {\n let path = format\\!(\"/projects/{project_id}/merge_requests/{mr_iid}/diffs\");\n self.fetch_all_pages(&path, &[(\"per_page\", \"100\")]).await\n .or_else(|e| coalesce_not_found(e, Vec::new()))\n}\n```\n\n### 2. Types — add to `src/gitlab/types.rs`:\n\n```rust\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct GitLabMrDiff {\n pub old_path: String,\n pub new_path: String,\n pub new_file: bool,\n pub renamed_file: bool,\n pub deleted_file: bool,\n // Ignore: diff, a_mode, b_mode, generated_file (not stored)\n}\n```\n\nAdd `GitLabMrDiff` to `src/gitlab/mod.rs` re-exports.\n\n### 3. Change Type Derivation (in new file):\n\n```rust\nfn derive_change_type(diff: &GitLabMrDiff) -> &'static str {\n if diff.new_file { \"added\" }\n else if diff.renamed_file { \"renamed\" }\n else if diff.deleted_file { \"deleted\" }\n else { \"modified\" }\n}\n```\n\n### 4. DB Storage — new `src/ingestion/mr_diffs.rs`:\n\n```rust\npub fn upsert_mr_file_changes(\n conn: &Connection,\n mr_local_id: i64,\n project_id: i64,\n diffs: &[GitLabMrDiff],\n) -> Result {\n // DELETE FROM mr_file_changes WHERE merge_request_id = ?\n // INSERT each diff row with derived change_type\n // DELETE+INSERT is simpler than UPSERT for array replacement\n}\n```\n\nAdd `pub mod mr_diffs;` to `src/ingestion/mod.rs`.\n\n### 5. Queue Integration — in orchestrator.rs:\n\n```rust\n// After MR upsert, if config.sync.fetch_mr_file_changes:\nenqueue_job(conn, project_id, \"merge_request\", mr_iid, mr_local_id, \"mr_diffs\")?;\n```\n\nAdd `drain_mr_diffs()` following the drain_mr_closes_issues() pattern. Call it after drain_mr_closes_issues() in the sync pipeline.\n\n## Acceptance Criteria\n\n- [ ] `fetch_mr_diffs()` calls GET /projects/:id/merge_requests/:iid/diffs with pagination\n- [ ] GitLabMrDiff type added to src/gitlab/types.rs and re-exported from src/gitlab/mod.rs\n- [ ] Change type derived: new_file->added, renamed_file->renamed, deleted_file->deleted, else->modified\n- [ ] mr_file_changes rows have correct old_path, new_path, change_type\n- [ ] Old rows deleted before insert (clean replacement per MR)\n- [ ] Jobs only enqueued when config.sync.fetch_mr_file_changes is true\n- [ ] 404/403 API errors handled gracefully (empty result, not failure)\n- [ ] drain_mr_diffs() added to orchestrator.rs sync pipeline\n- [ ] `pub mod mr_diffs;` added to src/ingestion/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/gitlab/client.rs` (add fetch_mr_diffs method)\n- `src/gitlab/types.rs` (add GitLabMrDiff struct)\n- `src/gitlab/mod.rs` (re-export GitLabMrDiff)\n- `src/ingestion/mr_diffs.rs` (NEW — upsert_mr_file_changes + derive_change_type)\n- `src/ingestion/mod.rs` (add pub mod mr_diffs)\n- `src/ingestion/orchestrator.rs` (enqueue mr_diffs jobs + drain_mr_diffs)\n\n## TDD Loop\n\nRED:\n- `test_derive_change_type_added` - new_file=true -> \"added\"\n- `test_derive_change_type_renamed` - renamed_file=true -> \"renamed\"\n- `test_derive_change_type_deleted` - deleted_file=true -> \"deleted\"\n- `test_derive_change_type_modified` - all false -> \"modified\"\n- `test_upsert_replaces_existing` - second upsert replaces first\n\nGREEN: Implement API client, type derivation, DB ops, orchestrator wiring.\n\nVERIFY: `cargo test --lib -- mr_diffs`\n\n## Edge Cases\n\n- MR with 500+ files: paginate properly via fetch_all_pages\n- Binary files: handled as modified (renamed_file/new_file/deleted_file all false)\n- File renamed AND modified: renamed_file=true takes precedence\n- Draft MRs: still fetch diffs\n- Deleted MR: 404 -> empty vec via coalesce_not_found()\n- merge_commit_sha/squash_commit_sha: already handled in merge_requests.rs ingestion — NOT part of this bead\n","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:08.939514Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:54:27.131504Z","compaction_level":0,"original_size":0,"labels":["api","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-2yo","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:08.941359Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-1oo","type":"blocks","created_at":"2026-02-02T21:34:16.555239Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-jec","type":"blocks","created_at":"2026-02-02T21:34:16.656402Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-tir","type":"blocks","created_at":"2026-02-02T21:34:16.605198Z","created_by":"tayloreernisse"}]} +{"id":"bd-2yo","title":"Fetch MR diffs API and populate mr_file_changes","description":"## Background\n\nThis bead fetches MR diff metadata from the GitLab API and populates the mr_file_changes table created by migration 016. It extracts only file-level metadata (paths, change type) and discards actual diff content.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.3 (Ingestion).\n\n## Codebase Context\n\n- pending_dependent_fetches already has `job_type='mr_diffs'` in CHECK constraint (migration 011)\n- dependent_queue.rs has: enqueue_job(), claim_jobs(), complete_job(), fail_job() with exponential backoff\n- Orchestrator pattern: enqueue after entity ingestion, drain after primary ingestion completes\n- GitLab client uses fetch_all_pages() for pagination\n- Existing drain patterns in orchestrator.rs: drain_resource_events() and drain_mr_closes_issues() — follow same pattern\n- config.sync.fetch_mr_file_changes flag guards enqueue (see bd-jec)\n- mr_file_changes table created by migration 016 (bd-1oo) — NOT 015 (015 is commit SHAs)\n- merge_commit_sha and squash_commit_sha already captured during MR ingestion (src/ingestion/merge_requests.rs lines 184, 205-206, 230-231) — no work needed for those fields\n\n## Approach\n\n### 1. API Client — add to `src/gitlab/client.rs`:\n\n```rust\npub async fn fetch_mr_diffs(\n &self,\n project_id: i64,\n mr_iid: i64,\n) -> Result> {\n let path = format\\!(\"/projects/{project_id}/merge_requests/{mr_iid}/diffs\");\n self.fetch_all_pages(&path, &[(\"per_page\", \"100\")]).await\n .or_else(|e| coalesce_not_found(e, Vec::new()))\n}\n```\n\n### 2. Types — add to `src/gitlab/types.rs`:\n\n```rust\n#[derive(Debug, Clone, Deserialize, Serialize)]\npub struct GitLabMrDiff {\n pub old_path: String,\n pub new_path: String,\n pub new_file: bool,\n pub renamed_file: bool,\n pub deleted_file: bool,\n // Ignore: diff, a_mode, b_mode, generated_file (not stored)\n}\n```\n\nAdd `GitLabMrDiff` to `src/gitlab/mod.rs` re-exports.\n\n### 3. Change Type Derivation (in new file):\n\n```rust\nfn derive_change_type(diff: &GitLabMrDiff) -> &'static str {\n if diff.new_file { \"added\" }\n else if diff.renamed_file { \"renamed\" }\n else if diff.deleted_file { \"deleted\" }\n else { \"modified\" }\n}\n```\n\n### 4. DB Storage — new `src/ingestion/mr_diffs.rs`:\n\n```rust\npub fn upsert_mr_file_changes(\n conn: &Connection,\n mr_local_id: i64,\n project_id: i64,\n diffs: &[GitLabMrDiff],\n) -> Result {\n // DELETE FROM mr_file_changes WHERE merge_request_id = ?\n // INSERT each diff row with derived change_type\n // DELETE+INSERT is simpler than UPSERT for array replacement\n}\n```\n\nAdd `pub mod mr_diffs;` to `src/ingestion/mod.rs`.\n\n### 5. Queue Integration — in orchestrator.rs:\n\n```rust\n// After MR upsert, if config.sync.fetch_mr_file_changes:\nenqueue_job(conn, project_id, \"merge_request\", mr_iid, mr_local_id, \"mr_diffs\")?;\n```\n\nAdd `drain_mr_diffs()` following the drain_mr_closes_issues() pattern. Call it after drain_mr_closes_issues() in the sync pipeline.\n\n## Acceptance Criteria\n\n- [ ] `fetch_mr_diffs()` calls GET /projects/:id/merge_requests/:iid/diffs with pagination\n- [ ] GitLabMrDiff type added to src/gitlab/types.rs and re-exported from src/gitlab/mod.rs\n- [ ] Change type derived: new_file->added, renamed_file->renamed, deleted_file->deleted, else->modified\n- [ ] mr_file_changes rows have correct old_path, new_path, change_type\n- [ ] Old rows deleted before insert (clean replacement per MR)\n- [ ] Jobs only enqueued when config.sync.fetch_mr_file_changes is true\n- [ ] 404/403 API errors handled gracefully (empty result, not failure)\n- [ ] drain_mr_diffs() added to orchestrator.rs sync pipeline\n- [ ] `pub mod mr_diffs;` added to src/ingestion/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/gitlab/client.rs` (add fetch_mr_diffs method)\n- `src/gitlab/types.rs` (add GitLabMrDiff struct)\n- `src/gitlab/mod.rs` (re-export GitLabMrDiff)\n- `src/ingestion/mr_diffs.rs` (NEW — upsert_mr_file_changes + derive_change_type)\n- `src/ingestion/mod.rs` (add pub mod mr_diffs)\n- `src/ingestion/orchestrator.rs` (enqueue mr_diffs jobs + drain_mr_diffs)\n\n## TDD Loop\n\nRED:\n- `test_derive_change_type_added` - new_file=true -> \"added\"\n- `test_derive_change_type_renamed` - renamed_file=true -> \"renamed\"\n- `test_derive_change_type_deleted` - deleted_file=true -> \"deleted\"\n- `test_derive_change_type_modified` - all false -> \"modified\"\n- `test_upsert_replaces_existing` - second upsert replaces first\n\nGREEN: Implement API client, type derivation, DB ops, orchestrator wiring.\n\nVERIFY: `cargo test --lib -- mr_diffs`\n\n## Edge Cases\n\n- MR with 500+ files: paginate properly via fetch_all_pages\n- Binary files: handled as modified (renamed_file/new_file/deleted_file all false)\n- File renamed AND modified: renamed_file=true takes precedence\n- Draft MRs: still fetch diffs\n- Deleted MR: 404 -> empty vec via coalesce_not_found()\n- merge_commit_sha/squash_commit_sha: already handled in merge_requests.rs ingestion — NOT part of this bead\n","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:08.939514Z","created_by":"tayloreernisse","updated_at":"2026-02-08T18:27:05.993580Z","closed_at":"2026-02-08T18:27:05.993482Z","close_reason":"Implemented: GitLabMrDiff type, fetch_mr_diffs client method, upsert_mr_file_changes in new mr_diffs.rs module, enqueue_mr_diffs_jobs + drain_mr_diffs in orchestrator, migration 020 for diffs_synced_for_updated_at watermark, progress events, autocorrect registry. All 390 tests pass, clippy clean.","compaction_level":0,"original_size":0,"labels":["api","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-2yo","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:08.941359Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-1oo","type":"blocks","created_at":"2026-02-02T21:34:16.555239Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-jec","type":"blocks","created_at":"2026-02-02T21:34:16.656402Z","created_by":"tayloreernisse"},{"issue_id":"bd-2yo","depends_on_id":"bd-tir","type":"blocks","created_at":"2026-02-02T21:34:16.605198Z","created_by":"tayloreernisse"}]} {"id":"bd-2yq","title":"[CP1] Issue transformer with label extraction","description":"Transform GitLab issue payloads to normalized database schema.\n\nFunctions to implement:\n- transformIssue(gitlabIssue, localProjectId) → NormalizedIssue\n- extractLabels(gitlabIssue, localProjectId) → Label[]\n\nTransformation rules:\n- Convert ISO timestamps to ms epoch using isoToMs()\n- Set last_seen_at to nowMs()\n- Handle labels vs labels_details (prefer details when available)\n- Handle missing optional fields gracefully\n\nFiles: src/gitlab/transformers/issue.ts\nTests: tests/unit/issue-transformer.test.ts\nDone when: Unit tests pass for payload transformation and label extraction","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:19:09.660448Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.152259Z","deleted_at":"2026-01-25T15:21:35.152254Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-2ys","title":"[CP1] Cargo.toml updates - async-stream and futures","description":"## Background\n\nThe GitLab client pagination methods require async streaming capabilities. The `async-stream` crate provides the `stream!` macro for creating async iterators, and `futures` provides `StreamExt` for consuming them with `.next()` and other combinators.\n\n## Approach\n\nAdd these dependencies to Cargo.toml:\n\n```toml\n[dependencies]\nasync-stream = \"0.3\"\nfutures = { version = \"0.3\", default-features = false, features = [\"alloc\"] }\n```\n\nUse minimal features on `futures` to avoid pulling unnecessary code.\n\n## Acceptance Criteria\n\n- [ ] `async-stream = \"0.3\"` is in Cargo.toml [dependencies]\n- [ ] `futures` with `alloc` feature is in Cargo.toml [dependencies]\n- [ ] `cargo check` succeeds after adding dependencies\n\n## Files\n\n- Cargo.toml (edit)\n\n## TDD Loop\n\nRED: Not applicable (dependency addition)\nGREEN: Add lines to Cargo.toml\nVERIFY: `cargo check`\n\n## Edge Cases\n\n- If `futures` is already present, merge features rather than duplicate\n- Use exact version pins for reproducibility","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.104664Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:25:10.274787Z","closed_at":"2026-01-25T22:25:10.274727Z","close_reason":"Added async-stream 0.3 and futures 0.3 (alloc feature) to Cargo.toml, cargo check passes","compaction_level":0,"original_size":0} {"id":"bd-2zl","title":"Epic: Gate 1 - Resource Events Ingestion","description":"## Background\nGate 1 transforms gitlore from a snapshot engine into a temporal data store by ingesting structured event data from GitLab Resource Events APIs (state, label, milestone changes). This is the foundation — Gates 2-5 all depend on the event tables and dependent fetch queue that Gate 1 establishes.\n\nCurrently, when an issue is closed or a label changes, gitlore overwrites the current state. The transition is lost. Gate 1 captures these transitions as discrete events with timestamps, actors, and provenance, enabling temporal queries like \"when did this issue become critical?\" and \"who closed this MR?\"\n\n## Architecture\n- **Three new tables:** resource_state_events, resource_label_events, resource_milestone_events (migration 011, already shipped as bd-hu3)\n- **Generic dependent fetch queue:** pending_dependent_fetches table replaces per-type queue tables. Supports job_types: resource_events, mr_closes_issues, mr_diffs. Used by Gates 1, 2, and 4.\n- **Opt-in via config:** sync.fetchResourceEvents (default true). --no-events CLI flag to skip.\n- **Incremental:** Only changed entities enqueued. --full re-enqueues all.\n- **Crash recovery:** locked_at column with 5-minute stale lock reclaim.\n\n## Children (Execution Order)\n1. **bd-hu3** [CLOSED] — Migration 011: event tables + entity_references + dependent fetch queue\n2. **bd-2e8** [CLOSED] — fetchResourceEvents config flag\n3. **bd-2fm** [CLOSED] — GitLab Resource Event serde types\n4. **bd-sqw** [CLOSED] — Resource Events API endpoints in GitLab client\n5. **bd-1uc** [CLOSED] — DB upsert functions for resource events\n6. **bd-tir** [CLOSED] — Generic dependent fetch queue (enqueue + drain)\n7. **bd-1ep** [CLOSED] — Wire resource event fetching into sync pipeline\n8. **bd-3sh** [CLOSED] — lore count events command\n9. **bd-1m8** [CLOSED] — lore stats --check for event integrity + queue health\n\n## Gate Completion Criteria\n- [ ] All 9 children closed\n- [ ] `lore sync` fetches resource events for changed entities\n- [ ] `lore sync --no-events` skips event fetching\n- [ ] Event fetch failures queued for retry with exponential backoff\n- [ ] Stale locks auto-reclaimed on next sync run\n- [ ] `lore count events` shows counts by type (state/label/milestone)\n- [ ] `lore stats --check` validates referential integrity + queue health\n- [ ] Robot mode JSON for all new commands\n- [ ] Integration test: full sync cycle with events enabled\n\n## Dependencies\n- None (Gate 1 is the foundation)\n- Downstream: Gate 2 (bd-1se) depends on event tables and dependent fetch queue","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-02-02T21:30:49.136036Z","created_by":"tayloreernisse","updated_at":"2026-02-05T16:06:52.080788Z","closed_at":"2026-02-05T16:06:52.080725Z","close_reason":"Already implemented: migration 011 exists, events_db.rs has upsert functions, client.rs has fetch_*_state_events, orchestrator.rs has drain_resource_events. Full Gate 1 functionality is live.","compaction_level":0,"original_size":0,"labels":["epic","gate-1","phase-b"]} @@ -143,6 +143,7 @@ {"id":"bd-3pz","title":"OBSERV Epic: Phase 4 - Sync History Enrichment","description":"Wire up sync_runs INSERT/UPDATE lifecycle (table exists but nothing writes to it), schema migration 014, enhanced sync-status with recent runs and metrics.\n\nDepends on: Phase 3 (needs Vec to store in metrics_json)\nUnblocks: nothing (terminal phase)\n\nFiles: migrations/014_sync_runs_enrichment.sql (new), src/core/sync_run.rs (new), src/cli/commands/sync.rs, src/cli/commands/ingest.rs, src/cli/commands/sync_status.rs\n\nAcceptance criteria (PRD Section 6.4):\n- lore sync creates sync_runs row with status=running, updated to succeeded/failed\n- sync_runs.run_id matches log files and robot JSON\n- metrics_json contains serialized Vec\n- lore sync-status shows last 10 runs with metrics\n- Failed syncs record error and partial metrics\n- Migration 014 applies cleanly","status":"closed","priority":2,"issue_type":"epic","created_at":"2026-02-04T15:53:27.469149Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:43:07.375047Z","closed_at":"2026-02-04T17:43:07.375Z","close_reason":"Phase 4 complete: migration 014, SyncRunRecorder, wiring, sync-status enhancement","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-3pz","depends_on_id":"bd-3er","type":"blocks","created_at":"2026-02-04T15:55:19.153053Z","created_by":"tayloreernisse"}]} {"id":"bd-3q2","title":"Implement search filters module","description":"## Background\nSearch filters are applied post-retrieval to narrow results by source type, author, project, date, labels, and file paths. The filter module must preserve ranking order from the search pipeline (FTS/RRF scores). It uses SQLite's JSON1 extension (json_each) to pass ranked document IDs efficiently and maintain their original order.\n\n## Approach\nCreate `src/search/filters.rs` per PRD Section 3.3. The full implementation is specified in the PRD including the SQL query.\n\n**Key types:**\n- `SearchFilters` struct with all filter fields + `has_any_filter()` + `clamp_limit()`\n- `PathFilter` enum: `Prefix(String)` (trailing `/`) or `Exact(String)`\n\n**Core function:**\n```rust\npub fn apply_filters(\n conn: &Connection,\n document_ids: &[i64],\n filters: &SearchFilters,\n) -> Result>\n```\n\n**SQL pattern (JSON1 for ordered ID passing):**\n```sql\nSELECT d.id\nFROM json_each(?) AS j\nJOIN documents d ON d.id = j.value\nWHERE 1=1\n AND d.source_type = ? -- if source_type filter set\n AND d.author_username = ? -- if author filter set\n -- ... dynamic WHERE clauses\nORDER BY j.key -- preserves ranking order\nLIMIT ?\n```\n\n**Filter logic:**\n- Labels: AND logic via `EXISTS (SELECT 1 FROM document_labels dl WHERE dl.document_id = d.id AND dl.label_name = ?)`\n- Path prefix: `LIKE ? ESCAPE '\\\\'` with escaped wildcards\n- Path exact: `= ?`\n- Limit: clamped to [1, 100], default 20\n\n## Acceptance Criteria\n- [ ] source_type filter works (issue, merge_request, discussion)\n- [ ] author filter: exact username match\n- [ ] project_id filter: restricts to single project\n- [ ] after filter: created_at >= value\n- [ ] updated_after filter: updated_at >= value\n- [ ] labels filter: AND logic (all specified labels must be present)\n- [ ] path exact filter: matches exact path string\n- [ ] path prefix filter: trailing `/` triggers LIKE with escaped wildcards\n- [ ] Ranking order preserved (ORDER BY j.key from json_each)\n- [ ] Limit clamped: 0 -> 20 (default), 200 -> 100 (max)\n- [ ] Empty document_ids returns empty Vec\n- [ ] Multiple filters compose correctly (all applied via AND)\n- [ ] `cargo test filters` passes\n\n## Files\n- `src/search/filters.rs` — new file\n- `src/search/mod.rs` — add `pub use filters::{SearchFilters, PathFilter, apply_filters};`\n\n## TDD Loop\nRED: Tests in `filters.rs` `#[cfg(test)] mod tests`:\n- `test_no_filters` — all docs returned up to limit\n- `test_source_type_filter` — only issues returned\n- `test_author_filter` — exact match\n- `test_labels_and_logic` — must have ALL specified labels\n- `test_path_exact` — matches exact path\n- `test_path_prefix` — trailing slash matches prefix\n- `test_limit_clamping` — 0 -> 20, 200 -> 100\n- `test_ranking_preserved` — output order matches input order\n- `test_has_any_filter` — true when any filter set, false when default\nGREEN: Implement apply_filters with dynamic SQL\nVERIFY: `cargo test filters`\n\n## Edge Cases\n- Path containing SQL LIKE wildcards (`%`, `_`): must be escaped before LIKE\n- Empty labels list: no label filter applied (not \"must have zero labels\")\n- `has_any_filter()` returns false for default SearchFilters (no filters set)\n- Large document_ids array (1000+): JSON1 handles efficiently","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:26:13.042512Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:24:38.402483Z","closed_at":"2026-01-30T17:24:38.402302Z","close_reason":"Completed: SearchFilters with has_any_filter/clamp_limit, PathFilter enum, apply_filters with dynamic SQL + json_each ordering, escape_like, 8 tests pass","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3q2","depends_on_id":"bd-36p","type":"blocks","created_at":"2026-01-30T15:29:24.412357Z","created_by":"tayloreernisse"}]} {"id":"bd-3qm","title":"[CP1] Final validation - tests, smoke tests, integrity checks","description":"Run all tests and perform data integrity checks.\n\nValidation steps:\n1. Run all unit tests (vitest)\n2. Run all integration tests\n3. Run ESLint\n4. Run TypeScript strict check\n5. Manual smoke tests per PRD table\n6. Data integrity SQL checks:\n - Issue count matches GitLab\n - Every issue has raw_payload\n - Labels in junction exist in labels table\n - sync_cursors has entry per project\n - Re-run fetches 0 new items\n - Discussion count > 0\n - Every discussion has >= 1 note\n - individual_note=true has exactly 1 note\n\nFiles: All CP1 files\nDone when: All gate criteria from Definition of Done pass","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:20:51.994183Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.152852Z","deleted_at":"2026-01-25T15:21:35.152849Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} +{"id":"bd-3qn6","title":"Rewrite who --path to use mr_file_changes for authorship signal","description":"## Problem\n\nwho --path currently only queries DiffNote records (notes.position_new_path), so it only finds people who left inline review comments on that exact file. This is highly misleading -- it reports 'no experts' for files that have been actively authored and reviewed, just without inline comments on that specific path.\n\n## Solution\n\nRewrite query_expert() to incorporate mr_file_changes as a primary signal source:\n\n1. MR authorship signal: JOIN mr_file_changes to find MR authors who touched the file (strongest signal)\n2. MR reviewer signal: JOIN mr_file_changes + merge_request_reviewers to find reviewers of MRs that touched the file (even without DiffNotes on that file)\n3. DiffNote signal: Keep existing DiffNote query as a supplementary signal (inline comments show deep familiarity)\n\n### Scoring weights (to tune):\n- MR author who touched the file: 15 points per MR\n- MR reviewer of MR touching the file: 10 points per MR\n- DiffNote reviewer on that file: 20 points per MR + 1 per note (existing)\n- DiffNote MR author: 12 points per MR (existing)\n\n### Path matching:\n- Reuse build_path_query() but extend DB probes to also check mr_file_changes.new_path\n- For prefix matching, LIKE on mr_file_changes.new_path\n\n### Also fix:\n- build_path_query() probes should check mr_file_changes in addition to notes, so path resolution works even when no DiffNotes exist\n\n## Acceptance Criteria\n- who --path returns results for files touched in MRs even without DiffNotes\n- Existing DiffNote-based scoring still contributes\n- build_path_query probes mr_file_changes for path existence\n- Tests cover: MR-only authorship, DiffNote-only, combined scoring\n- Robot mode JSON output unchanged (same schema)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-08T18:16:41.991344Z","created_by":"tayloreernisse","updated_at":"2026-02-08T18:34:25.704024Z","closed_at":"2026-02-08T18:34:25.703965Z","close_reason":"Rewrote query_expert() and query_overlap() in who.rs to incorporate mr_file_changes + mr_reviewers as signal sources alongside existing DiffNote data. Uses 4-branch UNION ALL with COUNT(DISTINCT CASE) for proper deduplication across signal types. 8 new tests, all 397 pass.","compaction_level":0,"original_size":0,"labels":["cli","phase-b","who"],"dependencies":[{"issue_id":"bd-3qn6","depends_on_id":"bd-2yo","type":"blocks","created_at":"2026-02-08T18:16:41.994443Z","created_by":"tayloreernisse"}]} {"id":"bd-3qs","title":"Implement lore generate-docs CLI command","description":"## Background\nThe generate-docs CLI command is the user-facing wrapper around the document regeneration pipeline. It has two modes: incremental (default, processes dirty_sources queue only) and full (seeds dirty_sources with ALL entities, then drains). Both modes use the same regenerator codepath to avoid logic divergence. Full mode uses keyset pagination (WHERE id > last_id) for seeding to avoid O(n^2) OFFSET degradation on large tables.\n\n## Approach\nCreate `src/cli/commands/generate_docs.rs` per PRD Section 2.4.\n\n**Core function:**\n```rust\npub fn run_generate_docs(\n config: &Config,\n full: bool,\n project_filter: Option<&str>,\n) -> Result\n```\n\n**Full mode seeding (keyset pagination):**\n```rust\nconst FULL_MODE_CHUNK_SIZE: usize = 2000;\n\n// For each source type (issues, MRs, discussions):\nlet mut last_id: i64 = 0;\nloop {\n let tx = conn.transaction()?;\n let inserted = tx.execute(\n \"INSERT INTO dirty_sources (source_type, source_id, queued_at, ...)\n SELECT 'issue', id, ?, 0, NULL, NULL, NULL\n FROM issues WHERE id > ? ORDER BY id LIMIT ?\n ON CONFLICT(source_type, source_id) DO NOTHING\",\n params![now_ms(), last_id, FULL_MODE_CHUNK_SIZE],\n )?;\n if inserted == 0 { tx.commit()?; break; }\n // Advance keyset cursor...\n tx.commit()?;\n}\n```\n\n**After draining (full mode only):**\n```sql\nINSERT INTO documents_fts(documents_fts) VALUES('optimize')\n```\n\n**CLI args:**\n```rust\n#[derive(Args)]\npub struct GenerateDocsArgs {\n #[arg(long)]\n full: bool,\n #[arg(long)]\n project: Option,\n}\n```\n\n**Output:** Human-readable table + JSON robot mode.\n\n## Acceptance Criteria\n- [ ] Default mode (no --full): processes only existing dirty_sources entries\n- [ ] --full mode: seeds dirty_sources with ALL issues, MRs, and discussions\n- [ ] Full mode uses keyset pagination (WHERE id > last_id, not OFFSET)\n- [ ] Full mode chunk size is 2000\n- [ ] Full mode does FTS optimize after completion\n- [ ] Both modes use regenerate_dirty_documents() (same codepath)\n- [ ] Progress bar shown in human mode (via indicatif)\n- [ ] JSON output in robot mode with GenerateDocsResult\n- [ ] GenerateDocsResult has issues/mrs/discussions/total/truncated/skipped counts\n- [ ] `cargo build` succeeds\n\n## Files\n- `src/cli/commands/generate_docs.rs` — new file\n- `src/cli/commands/mod.rs` — add `pub mod generate_docs;`\n- `src/cli/mod.rs` — add GenerateDocsArgs, wire up generate-docs subcommand\n- `src/main.rs` — add generate-docs command handler\n\n## TDD Loop\nRED: Integration test with seeded DB\nGREEN: Implement run_generate_docs with seeding + drain\nVERIFY: `cargo build && cargo test generate_docs`\n\n## Edge Cases\n- Empty database (no issues/MRs/discussions): full mode seeds nothing, returns all-zero counts\n- --project filter in full mode: only seed dirty_sources for entities in that project\n- Interrupted full mode: dirty_sources entries persist (ON CONFLICT DO NOTHING), resume by re-running\n- FTS optimize on empty FTS table: no-op (safe)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:25:55.226666Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:49:23.397157Z","closed_at":"2026-01-30T17:49:23.397098Z","close_reason":"Implemented generate-docs command with incremental + full mode, keyset pagination seeding, FTS optimize, project filter, human + JSON output. Builds clean.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3qs","depends_on_id":"bd-1u1","type":"blocks","created_at":"2026-01-30T15:29:16.089769Z","created_by":"tayloreernisse"},{"issue_id":"bd-3qs","depends_on_id":"bd-221","type":"blocks","created_at":"2026-01-30T15:29:16.125158Z","created_by":"tayloreernisse"}]} {"id":"bd-3rl","title":"Epic: Gate C - Sync MVP","description":"## Background\nGate C adds the sync orchestrator and queue infrastructure that makes the search pipeline incremental and self-maintaining. It introduces dirty source tracking (change detection during ingestion), the discussion fetch queue, and the unified lore sync command that orchestrates the full pipeline. Gate C also adds integrity checks and repair paths.\n\n## Gate C Deliverables\n1. Orchestrated lore sync command with incremental doc regen + re-embedding\n2. Integrity checks + repair paths for FTS/embeddings consistency\n\n## Bead Dependencies (execution order, after Gate A)\n1. **bd-mem** — Shared backoff utility (no deps, shared with Gate B)\n2. **bd-38q** — Dirty source tracking (blocked by bd-36p, bd-hrs, bd-mem)\n3. **bd-1je** — Discussion queue (blocked by bd-hrs, bd-mem)\n4. **bd-1i2** — Integrate dirty tracking into ingestion (blocked by bd-38q)\n5. **bd-1x6** — Sync CLI (blocked by bd-38q, bd-1je, bd-1i2, bd-3qs, bd-2sx)\n\n## Acceptance Criteria\n- [ ] `lore sync` runs full pipeline: ingest -> generate-docs -> embed\n- [ ] `lore sync --full` does full re-sync + regeneration\n- [ ] `lore sync --no-embed` skips embedding stage\n- [ ] Dirty tracking: upserted entities automatically marked for regeneration\n- [ ] Queue draining: dirty_sources fully drained in bounded batch loop\n- [ ] Backoff: failed items use exponential backoff with jitter\n- [ ] `lore stats --check` detects inconsistencies\n- [ ] `lore stats --repair` fixes FTS/embedding inconsistencies","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-30T15:25:13.494698Z","created_by":"tayloreernisse","updated_at":"2026-01-30T18:05:52.121666Z","closed_at":"2026-01-30T18:05:52.121619Z","close_reason":"All Gate C sub-beads complete: backoff utility, dirty tracking, discussion queue, ingestion integration, sync CLI, stats CLI","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3rl","depends_on_id":"bd-1x6","type":"blocks","created_at":"2026-01-30T15:29:35.853817Z","created_by":"tayloreernisse"},{"issue_id":"bd-3rl","depends_on_id":"bd-pr1","type":"blocks","created_at":"2026-01-30T15:29:35.892441Z","created_by":"tayloreernisse"}]} {"id":"bd-3sh","title":"Add 'lore count events' command with robot mode","description":"## Background\nNeed to verify event ingestion and report counts by type. The existing count command (src/cli/commands/count.rs) handles issues, mrs, discussions, notes with both human and robot output. This adds 'events' as a new count subcommand.\n\n## Approach\nExtend the existing count command in src/cli/commands/count.rs:\n\n1. Add CountTarget::Events variant (or string match) in the count dispatcher\n2. Query each event table with GROUP BY entity type:\n```sql\nSELECT \n CASE WHEN issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END as entity_type,\n COUNT(*) as count\nFROM resource_state_events\nGROUP BY entity_type;\n-- (repeat for label and milestone events)\n```\n\n3. Human output: table format\n```\nEvent Type Issues MRs Total\nState events 1,234 567 1,801\nLabel events 2,345 890 3,235\nMilestone events 456 123 579\nTotal 4,035 1,580 5,615\n```\n\n4. Robot JSON:\n```json\n{\n \"ok\": true,\n \"data\": {\n \"state_events\": {\"issue\": 1234, \"merge_request\": 567, \"total\": 1801},\n \"label_events\": {\"issue\": 2345, \"merge_request\": 890, \"total\": 3235},\n \"milestone_events\": {\"issue\": 456, \"merge_request\": 123, \"total\": 579},\n \"total\": 5615\n }\n}\n```\n\n5. Register in CLI: add \"events\" to count's entity_type argument in src/cli/mod.rs\n\n## Acceptance Criteria\n- [ ] `lore count events` shows correct counts by event type and entity type\n- [ ] Robot JSON matches the schema above\n- [ ] Works with empty tables (all zeros)\n- [ ] Does not error if migration 011 hasn't been applied (graceful degradation or \"no event tables\" message)\n\n## Files\n- src/cli/commands/count.rs (add events counting logic)\n- src/cli/mod.rs (add \"events\" to count's accepted entity types)\n\n## TDD Loop\nRED: tests/count_tests.rs (or extend existing):\n- `test_count_events_empty_tables` - verify all zeros on fresh DB\n- `test_count_events_with_data` - seed state + label events, verify correct counts\n- `test_count_events_robot_json` - verify JSON structure\n\nGREEN: Add the events branch to count command\n\nVERIFY: `cargo test count -- --nocapture`\n\n## Edge Cases\n- Tables don't exist if user hasn't run migrate — check table existence first or catch the error\n- COUNT with GROUP BY returns no rows for empty tables — need to handle missing entity types as 0","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-02T21:31:57.379702Z","created_by":"tayloreernisse","updated_at":"2026-02-03T16:21:21.408874Z","closed_at":"2026-02-03T16:21:21.408806Z","close_reason":"Added 'events' to count CLI parser, run_count_events function, print_event_count (table format) and print_event_count_json (structured JSON). Wired into handle_count in main.rs.","compaction_level":0,"original_size":0,"labels":["cli","gate-1","phase-b"],"dependencies":[{"issue_id":"bd-3sh","depends_on_id":"bd-2zl","type":"parent-child","created_at":"2026-02-02T21:31:57.380927Z","created_by":"tayloreernisse"},{"issue_id":"bd-3sh","depends_on_id":"bd-hu3","type":"blocks","created_at":"2026-02-02T21:32:06.308285Z","created_by":"tayloreernisse"}]} @@ -170,7 +171,7 @@ {"id":"bd-hu3","title":"Write migration 011: resource event tables, entity_references, and dependent fetch queue","description":"## Background\nPhase B needs three new event tables and a generic dependent fetch queue to power temporal queries (timeline, file-history, trace). These tables store structured event data from GitLab Resource Events APIs, replacing fragile system note parsing for state/label/milestone changes.\n\nMigration 010_chunk_config.sql already exists, so Phase B starts at migration 011.\n\n## Approach\nCreate migrations/011_resource_events.sql with the exact schema from the Phase B spec (§1.2 + §2.2):\n\n**Event tables:**\n- resource_state_events: state changes (opened/closed/reopened/merged/locked) with source_merge_request_id for \"closed by MR\" linking\n- resource_label_events: label add/remove with label_name\n- resource_milestone_events: milestone add/remove with milestone_title + milestone_id\n\n**Cross-reference table (Gate 2):**\n- entity_references: source/target entity pairs with reference_type (closes/mentioned/related), source_method provenance, and unresolved reference support (target_entity_id NULL with target_project_path + target_entity_iid)\n\n**Dependent fetch queue:**\n- pending_dependent_fetches: generic job queue with job_type IN ('resource_events', 'mr_closes_issues', 'mr_diffs'), locked_at crash recovery, exponential backoff via attempts + next_retry_at\n\n**All tables must have:**\n- CHECK constraints for entity exclusivity (issue XOR merge_request) on event tables\n- UNIQUE constraints (gitlab_id + project_id for events, composite for queue, multi-column for references)\n- Partial indexes (WHERE issue_id IS NOT NULL, WHERE target_entity_id IS NULL, etc.)\n- CASCADE deletes on project_id and entity FKs\n\nRegister in src/core/db.rs MIGRATIONS array:\n```rust\n(\"011\", include_str!(\"../../migrations/011_resource_events.sql\")),\n```\n\nEnd migration with:\n```sql\nINSERT INTO schema_version (version, applied_at, description)\nVALUES (11, strftime('%s', 'now') * 1000, 'Resource events, entity references, and dependent fetch queue');\n```\n\n## Acceptance Criteria\n- [ ] migrations/011_resource_events.sql exists with all 4 tables + indexes + constraints\n- [ ] src/core/db.rs MIGRATIONS array includes (\"011\", include_str!(...))\n- [ ] `cargo build` succeeds (migration SQL compiles into binary)\n- [ ] `cargo test migration` passes (migration applies cleanly on fresh DB)\n- [ ] All CHECK constraints enforced (issue XOR merge_request on event tables)\n- [ ] All UNIQUE constraints present (prevents duplicate events/refs/jobs)\n- [ ] entity_references UNIQUE handles NULL coalescing correctly\n- [ ] pending_dependent_fetches job_type CHECK includes all three types\n\n## Files\n- migrations/011_resource_events.sql (new)\n- src/core/db.rs (add to MIGRATIONS array, line ~46)\n\n## TDD Loop\nRED: Add test in tests/migration_tests.rs:\n- `test_migration_011_creates_event_tables` - verify all 4 tables exist after migration\n- `test_migration_011_entity_exclusivity_constraint` - verify CHECK rejects both NULL and both non-NULL for issue_id/merge_request_id\n- `test_migration_011_event_dedup` - verify UNIQUE(gitlab_id, project_id) rejects duplicate events\n- `test_migration_011_entity_references_dedup` - verify UNIQUE constraint with NULL coalescing\n- `test_migration_011_queue_dedup` - verify UNIQUE(project_id, entity_type, entity_iid, job_type)\n\nGREEN: Write the migration SQL + register in db.rs\n\nVERIFY: `cargo test migration_tests -- --nocapture`\n\n## Edge Cases\n- entity_references UNIQUE uses COALESCE for NULLable columns — test with both resolved and unresolved refs\n- pending_dependent_fetches job_type CHECK — ensure 'mr_diffs' is included (Gate 4 needs it)\n- SQLite doesn't enforce CHECK on INSERT OR REPLACE — verify constraint behavior\n- The entity exclusivity CHECK must allow exactly one of issue_id/merge_request_id to be non-NULL\n- Verify CASCADE deletes work (delete project → all events/refs/jobs deleted)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:31:23.933894Z","created_by":"tayloreernisse","updated_at":"2026-02-03T16:06:28.918228Z","closed_at":"2026-02-03T16:06:28.917906Z","close_reason":"Already completed in prior session, re-closing after accidental reopen","compaction_level":0,"original_size":0,"labels":["gate-1","phase-b","schema"],"dependencies":[{"issue_id":"bd-hu3","depends_on_id":"bd-2zl","type":"parent-child","created_at":"2026-02-02T21:31:23.937375Z","created_by":"tayloreernisse"}]} {"id":"bd-iba","title":"Add GitLab client MR pagination methods","description":"## Background\nGitLab client pagination for merge requests and discussions. Must support robust pagination with fallback chain because some GitLab instances/proxies strip headers.\n\n## Approach\nAdd to existing `src/gitlab/client.rs`:\n1. `MergeRequestPage` struct - Items + pagination metadata\n2. `parse_link_header_next()` - RFC 8288 Link header parsing\n3. `fetch_merge_requests_page()` - Single page fetch with metadata\n4. `paginate_merge_requests()` - Async stream for all MRs\n5. `paginate_mr_discussions()` - Async stream for MR discussions\n\n## Files\n- `src/gitlab/client.rs` - Add pagination methods\n\n## Acceptance Criteria\n- [ ] `MergeRequestPage` struct exists with `items`, `next_page`, `is_last_page`\n- [ ] `parse_link_header_next()` extracts `rel=\"next\"` URL from Link header\n- [ ] Pagination fallback chain: Link header > x-next-page > full-page heuristic\n- [ ] `paginate_merge_requests()` returns `Pin>>>`\n- [ ] `paginate_mr_discussions()` returns `Pin>>>`\n- [ ] MR endpoint uses `scope=all&state=all` to include all MRs\n- [ ] `cargo test client` passes\n\n## TDD Loop\nRED: `cargo test fetch_merge_requests` -> method not found\nGREEN: Add pagination methods\nVERIFY: `cargo test client`\n\n## Struct Definitions\n```rust\n#[derive(Debug)]\npub struct MergeRequestPage {\n pub items: Vec,\n pub next_page: Option,\n pub is_last_page: bool,\n}\n```\n\n## Link Header Parsing (RFC 8288)\n```rust\n/// Parse Link header to extract rel=\"next\" URL.\nfn parse_link_header_next(headers: &reqwest::header::HeaderMap) -> Option {\n headers\n .get(\"link\")\n .and_then(|v| v.to_str().ok())\n .and_then(|link_str| {\n // Format: ; rel=\"next\", ; rel=\"last\"\n for part in link_str.split(',') {\n let part = part.trim();\n if part.contains(\"rel=\\\"next\\\"\") || part.contains(\"rel=next\") {\n if let Some(start) = part.find('<') {\n if let Some(end) = part.find('>') {\n return Some(part[start + 1..end].to_string());\n }\n }\n }\n }\n None\n })\n}\n```\n\n## Pagination Fallback Chain\n```rust\nlet next_page = match (link_next, x_next_page, items.len() as u32 == per_page) {\n (Some(_), _, _) => Some(page + 1), // Link header present: continue\n (None, Some(np), _) => Some(np), // x-next-page present: use it\n (None, None, true) => Some(page + 1), // Full page, no headers: try next\n (None, None, false) => None, // Partial page: we're done\n};\n```\n\n## Fetch Single Page\n```rust\npub async fn fetch_merge_requests_page(\n &self,\n gitlab_project_id: i64,\n updated_after: Option,\n cursor_rewind_seconds: u32,\n page: u32,\n per_page: u32,\n) -> Result {\n let mut params = vec![\n (\"scope\", \"all\".to_string()),\n (\"state\", \"all\".to_string()),\n (\"order_by\", \"updated_at\".to_string()),\n (\"sort\", \"asc\".to_string()),\n (\"per_page\", per_page.to_string()),\n (\"page\", page.to_string()),\n ];\n // Apply cursor rewind for safety\n // ...\n}\n```\n\n## Async Stream Pattern\n```rust\npub fn paginate_merge_requests(\n &self,\n gitlab_project_id: i64,\n updated_after: Option,\n cursor_rewind_seconds: u32,\n) -> Pin> + Send + '_>> {\n Box::pin(async_stream::try_stream! {\n let mut page = 1u32;\n let per_page = 100u32;\n loop {\n let page_result = self.fetch_merge_requests_page(...).await?;\n for mr in page_result.items {\n yield mr;\n }\n if page_result.is_last_page {\n break;\n }\n match page_result.next_page {\n Some(np) => page = np,\n None => break,\n }\n }\n })\n}\n```\n\n## Edge Cases\n- `scope=all` required to include all MRs (not just authored by current user)\n- `state=all` required to include merged/closed (GitLab defaults may exclude)\n- `locked` state cannot be filtered server-side (use local SQL filtering)\n- Cursor rewind should clamp to 0 to avoid negative timestamps","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:41.633065Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:13:05.613625Z","closed_at":"2026-01-27T00:13:05.613440Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-iba","depends_on_id":"bd-5ta","type":"blocks","created_at":"2026-01-26T22:08:54.364647Z","created_by":"tayloreernisse"}]} {"id":"bd-ike","title":"Epic: Gate 3 - Decision Timeline (lore timeline)","description":"## Background\n\nGate 3 is the first user-facing temporal feature: `lore timeline `. It answers \"What happened with X?\" by finding matching entities via FTS5, expanding cross-references, collecting all temporal events, and rendering a chronological narrative.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Gate 3 (Sections 3.1-3.6).\n\n## Prerequisites (All Complete)\n\n- Gates 1-2 COMPLETE: resource_state_events, resource_label_events, resource_milestone_events, entity_references all populated\n- FTS5 search index (CP3): working search infrastructure for keyword matching\n- Migration 015 (commit SHAs, closes watermark) exists on disk (registered by bd-1oo)\n\n## Architecture — 5-Stage Pipeline\n\n```\n1. SEED: FTS5 keyword search -> matched document IDs (issues, MRs, notes)\n2. HYDRATE: Map document IDs -> source entities + top matched notes as evidence\n3. EXPAND: BFS over entity_references (depth-limited, edge-type filtered)\n4. COLLECT: Gather events from all tables for seed + expanded entities\n5. RENDER: Sort chronologically, format as human or robot output\n```\n\nNo new tables required. All reads are from existing tables at query time.\n\n## Children (Execution Order)\n\n1. **bd-20e** — Define TimelineEvent model and TimelineEventType enum (types first)\n2. **bd-32q** — Implement timeline seed phase: FTS5 keyword search to entity IDs\n3. **bd-ypa** — Implement timeline expand phase: BFS cross-reference expansion\n4. **bd-3as** — Implement timeline event collection and chronological interleaving\n5. **bd-1nf** — Register lore timeline command with all flags (CLI wiring)\n6. **bd-2f2** — Implement timeline human output renderer\n7. **bd-dty** — Implement timeline robot mode JSON output\n\n## Gate Completion Criteria\n\n- [ ] `lore timeline ` returns chronologically ordered events\n- [ ] Seed entities found via FTS5 keyword search (issues, MRs, and notes)\n- [ ] State, label, and milestone events interleaved from resource event tables\n- [ ] Entity creation and merge events included\n- [ ] Evidence-bearing notes included as note_evidence events (top FTS5 matches, bounded default 10)\n- [ ] Cross-reference expansion follows entity_references to configurable depth\n- [ ] Default: follows closes + related edges; --expand-mentions adds mentioned\n- [ ] --depth 0 disables expansion\n- [ ] --since filters by event timestamp\n- [ ] -p scopes to project\n- [ ] Human output is colored and readable\n- [ ] Robot mode returns structured JSON with expansion provenance\n- [ ] Unresolved (external) references included in JSON output\n","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-02-02T21:31:01.036474Z","created_by":"tayloreernisse","updated_at":"2026-02-06T13:49:21.285350Z","closed_at":"2026-02-06T13:49:21.285302Z","close_reason":"Gate 3 complete: all 7 children closed. Timeline pipeline fully implemented with SEED->HYDRATE->EXPAND->COLLECT->RENDER stages, human+robot renderers, CLI wiring with 9 flags, robot-docs manifest entry","compaction_level":0,"original_size":0,"labels":["epic","gate-3","phase-b"],"dependencies":[{"issue_id":"bd-ike","depends_on_id":"bd-1se","type":"blocks","created_at":"2026-02-02T21:33:37.875622Z","created_by":"tayloreernisse"},{"issue_id":"bd-ike","depends_on_id":"bd-2zl","type":"blocks","created_at":"2026-02-02T21:33:37.831914Z","created_by":"tayloreernisse"}]} -{"id":"bd-jec","title":"Add fetchMrFileChanges config flag","description":"## Background\n\nConfig flag controlling whether MR diff fetching is enabled, following the fetchResourceEvents pattern.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.2.\n\n## Codebase Context\n\n- src/core/config.rs has SyncConfig with fetch_resource_events: bool (serde rename 'fetchResourceEvents', default true)\n- Default impl exists for SyncConfig\n- CLI sync options in src/cli/mod.rs have --no-events flag pattern\n- Orchestrator checks config.sync.fetch_resource_events before enqueuing resource_events jobs\n\n## Approach\n\n### 1. Add to SyncConfig (`src/core/config.rs`):\n```rust\n#[serde(rename = \"fetchMrFileChanges\", default = \"default_true\")]\npub fetch_mr_file_changes: bool,\n```\n\nUpdate Default impl to include fetch_mr_file_changes: true.\n\n### 2. CLI override (`src/cli/mod.rs`):\n```rust\n#[arg(long = \"no-file-changes\")]\npub no_file_changes: bool,\n```\n\n### 3. Apply in main.rs:\n```rust\nif args.no_file_changes { config.sync.fetch_mr_file_changes = false; }\n```\n\n### 4. Guard in orchestrator:\n```rust\nif config.sync.fetch_mr_file_changes { enqueue mr_diffs jobs }\n```\n\n## Acceptance Criteria\n\n- [ ] fetchMrFileChanges in SyncConfig, default true\n- [ ] Config without field defaults to true\n- [ ] --no-file-changes disables diff fetching\n- [ ] Orchestrator skips mr_diffs when false\n- [ ] `cargo check --all-targets` passes\n\n## Files\n\n- `src/core/config.rs` (add field + Default)\n- `src/cli/mod.rs` (add --no-file-changes)\n- `src/main.rs` (apply override)\n- `src/ingestion/orchestrator.rs` (guard enqueue)\n\n## TDD Loop\n\nRED:\n- `test_config_default_fetch_mr_file_changes` - default is true\n- `test_config_deserialize_false` - JSON with false\n\nGREEN: Add field, default, serde attribute.\n\nVERIFY: `cargo test --lib -- config`\n\n## Edge Cases\n\n- Config missing fetchMrFileChanges key entirely: serde default_true fills in true\n- Config explicitly set to false: no mr_diffs jobs enqueued, mr_file_changes table empty\n- --no-file-changes with --full sync: overrides config, no diffs fetched even on full resync\n- sync.fetchMrFileChanges = false in config + no --no-file-changes flag: respects config (no override)","status":"open","priority":3,"issue_type":"task","created_at":"2026-02-02T21:34:08.892666Z","created_by":"tayloreernisse","updated_at":"2026-02-05T19:58:59.391677Z","compaction_level":0,"original_size":0,"labels":["config","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-jec","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:08.895167Z","created_by":"tayloreernisse"}]} +{"id":"bd-jec","title":"Add fetchMrFileChanges config flag","description":"## Background\n\nConfig flag controlling whether MR diff fetching is enabled, following the fetchResourceEvents pattern.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.2.\n\n## Codebase Context\n\n- src/core/config.rs has SyncConfig with fetch_resource_events: bool (serde rename 'fetchResourceEvents', default true)\n- Default impl exists for SyncConfig\n- CLI sync options in src/cli/mod.rs have --no-events flag pattern\n- Orchestrator checks config.sync.fetch_resource_events before enqueuing resource_events jobs\n\n## Approach\n\n### 1. Add to SyncConfig (`src/core/config.rs`):\n```rust\n#[serde(rename = \"fetchMrFileChanges\", default = \"default_true\")]\npub fetch_mr_file_changes: bool,\n```\n\nUpdate Default impl to include fetch_mr_file_changes: true.\n\n### 2. CLI override (`src/cli/mod.rs`):\n```rust\n#[arg(long = \"no-file-changes\")]\npub no_file_changes: bool,\n```\n\n### 3. Apply in main.rs:\n```rust\nif args.no_file_changes { config.sync.fetch_mr_file_changes = false; }\n```\n\n### 4. Guard in orchestrator:\n```rust\nif config.sync.fetch_mr_file_changes { enqueue mr_diffs jobs }\n```\n\n## Acceptance Criteria\n\n- [ ] fetchMrFileChanges in SyncConfig, default true\n- [ ] Config without field defaults to true\n- [ ] --no-file-changes disables diff fetching\n- [ ] Orchestrator skips mr_diffs when false\n- [ ] `cargo check --all-targets` passes\n\n## Files\n\n- `src/core/config.rs` (add field + Default)\n- `src/cli/mod.rs` (add --no-file-changes)\n- `src/main.rs` (apply override)\n- `src/ingestion/orchestrator.rs` (guard enqueue)\n\n## TDD Loop\n\nRED:\n- `test_config_default_fetch_mr_file_changes` - default is true\n- `test_config_deserialize_false` - JSON with false\n\nGREEN: Add field, default, serde attribute.\n\nVERIFY: `cargo test --lib -- config`\n\n## Edge Cases\n\n- Config missing fetchMrFileChanges key entirely: serde default_true fills in true\n- Config explicitly set to false: no mr_diffs jobs enqueued, mr_file_changes table empty\n- --no-file-changes with --full sync: overrides config, no diffs fetched even on full resync\n- sync.fetchMrFileChanges = false in config + no --no-file-changes flag: respects config (no override)","status":"closed","priority":3,"issue_type":"task","created_at":"2026-02-02T21:34:08.892666Z","created_by":"tayloreernisse","updated_at":"2026-02-08T18:18:36.409511Z","closed_at":"2026-02-08T18:18:36.409467Z","close_reason":"Added fetch_mr_file_changes to SyncConfig (default true, serde rename fetchMrFileChanges), --no-file-changes CLI flag in SyncArgs, override in main.rs. Orchestrator guard deferred to bd-2yo which implements the actual drain.","compaction_level":0,"original_size":0,"labels":["config","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-jec","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:08.895167Z","created_by":"tayloreernisse"}]} {"id":"bd-jov","title":"[CP1] Discussion and note transformers","description":"Transform GitLab discussion/note payloads to normalized database schema.\n\n## Module\nsrc/gitlab/transformers/discussion.rs\n\n## Structs\n\n### NormalizedDiscussion\n- gitlab_discussion_id: String\n- project_id: i64\n- issue_id: i64\n- noteable_type: String (\"Issue\")\n- individual_note: bool\n- first_note_at, last_note_at: Option\n- last_seen_at: i64\n- resolvable, resolved: bool\n\n### NormalizedNote\n- gitlab_id: i64\n- project_id: i64\n- note_type: Option\n- is_system: bool\n- author_username: String\n- body: String\n- created_at, updated_at, last_seen_at: i64\n- position: i32 (array index in notes[])\n- resolvable, resolved: bool\n- resolved_by: Option\n- resolved_at: Option\n\n## Functions\n\n### transform_discussion(gitlab_discussion, local_project_id, local_issue_id) -> NormalizedDiscussion\n- Compute first_note_at/last_note_at from notes array min/max created_at\n- Compute resolvable (any note resolvable)\n- Compute resolved (resolvable AND all resolvable notes resolved)\n\n### transform_notes(gitlab_discussion, local_project_id) -> Vec\n- Enumerate notes to get position (array index)\n- Set is_system from note.system\n- Convert timestamps to ms epoch\n\nFiles: src/gitlab/transformers/discussion.rs\nTests: tests/discussion_transformer_tests.rs\nDone when: Unit tests pass for discussion/note transformation with system note flagging","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:43:04.481361Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.759691Z","deleted_at":"2026-01-25T17:02:01.759684Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-k7b","title":"[CP1] gi show issue command","description":"Show issue details with discussions.\n\n## Module\nsrc/cli/commands/show.rs\n\n## Clap Definition\nShow {\n #[arg(value_parser = [\"issue\", \"mr\"])]\n entity: String,\n \n iid: i64,\n \n #[arg(long)]\n project: Option,\n}\n\n## Output Format\nIssue #1234: Authentication redesign\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\nProject: group/project-one\nState: opened\nAuthor: @johndoe\nCreated: 2024-01-15\nUpdated: 2024-03-20\nLabels: enhancement, auth\nURL: https://gitlab.example.com/group/project-one/-/issues/1234\n\nDescription:\n We need to redesign the authentication flow to support...\n\nDiscussions (5):\n\n @janedoe (2024-01-16):\n I agree we should move to JWT-based auth...\n\n @johndoe (2024-01-16):\n What about refresh token strategy?\n\n @bobsmith (2024-01-17):\n Have we considered OAuth2?\n\n## Ambiguity Handling\nIf multiple projects have same iid, either:\n- Prompt for --project flag\n- Show error listing which projects have that iid\n\nFiles: src/cli/commands/show.rs\nDone when: Issue detail view displays all fields including threaded discussions","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:58:26.904813Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.944183Z","deleted_at":"2026-01-25T17:02:01.944179Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-lcb","title":"Epic: CP2 Gate E - CLI Complete","description":"## Background\nGate E validates all CLI commands are functional and user-friendly. This is the final usability gate - even if all data is correct, users need good CLI UX to access it.\n\n## Acceptance Criteria (Pass/Fail)\n\n### List Command\n- [ ] `gi list mrs` shows MR table with columns: iid, title, state, author, branches, updated\n- [ ] `gi list mrs --state=opened` filters to only opened MRs\n- [ ] `gi list mrs --state=merged` filters to only merged MRs\n- [ ] `gi list mrs --state=closed` filters to only closed MRs\n- [ ] `gi list mrs --state=locked` filters locally (not server-side filter)\n- [ ] `gi list mrs --draft` shows only draft MRs\n- [ ] `gi list mrs --no-draft` excludes draft MRs\n- [ ] Draft MRs show `[DRAFT]` prefix in title column\n- [ ] `gi list mrs --author=username` filters by author\n- [ ] `gi list mrs --assignee=username` filters by assignee\n- [ ] `gi list mrs --reviewer=username` filters by reviewer\n- [ ] `gi list mrs --target-branch=main` filters by target branch\n- [ ] `gi list mrs --source-branch=feature/x` filters by source branch\n- [ ] `gi list mrs --label=bugfix` filters by label\n- [ ] `gi list mrs --limit=N` limits output\n\n### Show Command\n- [ ] `gi show mr ` displays full MR detail\n- [ ] Show includes: title, description, state, draft status, author\n- [ ] Show includes: assignees, reviewers, labels\n- [ ] Show includes: source_branch, target_branch\n- [ ] Show includes: detailed_merge_status (e.g., \"mergeable\")\n- [ ] Show includes: merge_user and merged_at for merged MRs\n- [ ] Show includes: discussions with author and date\n- [ ] DiffNote shows file context: `[src/file.ts:45]`\n- [ ] Multi-line DiffNote shows range: `[src/file.ts:45-48]`\n- [ ] Resolved discussions show `[RESOLVED]` marker\n\n### Count Command\n- [ ] `gi count mrs` shows total count\n- [ ] Count shows state breakdown: opened, merged, closed\n\n### Sync Status\n- [ ] `gi sync-status` shows MR cursor position\n- [ ] Sync status shows last sync timestamp\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate E: CLI Complete ===\"\n\n# 1. Test list command (basic)\necho \"Step 1: Basic list...\"\ngi list mrs --limit=5 || { echo \"FAIL: list mrs failed\"; exit 1; }\n\n# 2. Test state filters\necho \"Step 2: State filters...\"\nfor state in opened merged closed; do\n echo \" Testing --state=$state\"\n gi list mrs --state=$state --limit=3 || echo \" Warning: No $state MRs\"\ndone\n\n# 3. Test draft filters\necho \"Step 3: Draft filters...\"\ngi list mrs --draft --limit=3 || echo \" Note: No draft MRs found\"\ngi list mrs --no-draft --limit=3 || echo \" Note: All MRs are drafts?\"\n\n# 4. Check [DRAFT] prefix\necho \"Step 4: Check [DRAFT] prefix...\"\nDRAFT_IID=$(sqlite3 \"$DB_PATH\" \"SELECT iid FROM merge_requests WHERE draft = 1 LIMIT 1;\")\nif [ -n \"$DRAFT_IID\" ]; then\n if gi list mrs --limit=100 | grep -q \"\\[DRAFT\\]\"; then\n echo \" PASS: [DRAFT] prefix found\"\n else\n echo \" FAIL: Draft MR exists but no [DRAFT] prefix in output\"\n fi\nelse\n echo \" Skip: No draft MRs to test\"\nfi\n\n# 5. Test author/assignee/reviewer filters\necho \"Step 5: User filters...\"\nAUTHOR=$(sqlite3 \"$DB_PATH\" \"SELECT author_username FROM merge_requests LIMIT 1;\")\nif [ -n \"$AUTHOR\" ]; then\n echo \" Testing --author=$AUTHOR\"\n gi list mrs --author=\"$AUTHOR\" --limit=3\nfi\n\nREVIEWER=$(sqlite3 \"$DB_PATH\" \"SELECT username FROM mr_reviewers LIMIT 1;\")\nif [ -n \"$REVIEWER\" ]; then\n echo \" Testing --reviewer=$REVIEWER\"\n gi list mrs --reviewer=\"$REVIEWER\" --limit=3\nfi\n\n# 6. Test branch filters\necho \"Step 6: Branch filters...\"\nTARGET=$(sqlite3 \"$DB_PATH\" \"SELECT target_branch FROM merge_requests LIMIT 1;\")\nif [ -n \"$TARGET\" ]; then\n echo \" Testing --target-branch=$TARGET\"\n gi list mrs --target-branch=\"$TARGET\" --limit=3\nfi\n\n# 7. Test show command\necho \"Step 7: Show command...\"\nMR_IID=$(sqlite3 \"$DB_PATH\" \"SELECT iid FROM merge_requests LIMIT 1;\")\ngi show mr \"$MR_IID\" || { echo \"FAIL: show mr failed\"; exit 1; }\n\n# 8. Test show with DiffNote context\necho \"Step 8: Show with DiffNote...\"\nDIFFNOTE_MR=$(sqlite3 \"$DB_PATH\" \"\n SELECT DISTINCT m.iid\n FROM merge_requests m\n JOIN discussions d ON d.merge_request_id = m.id\n JOIN notes n ON n.discussion_id = d.id\n WHERE n.position_new_path IS NOT NULL\n LIMIT 1;\n\")\nif [ -n \"$DIFFNOTE_MR\" ]; then\n echo \" Testing MR with DiffNotes: !$DIFFNOTE_MR\"\n OUTPUT=$(gi show mr \"$DIFFNOTE_MR\")\n if echo \"$OUTPUT\" | grep -qE '\\[[^]]+:[0-9]+\\]'; then\n echo \" PASS: File context [path:line] found\"\n else\n echo \" FAIL: DiffNote should show [path:line] context\"\n fi\nelse\n echo \" Skip: No MRs with DiffNotes\"\nfi\n\n# 9. Test count command\necho \"Step 9: Count command...\"\ngi count mrs || { echo \"FAIL: count mrs failed\"; exit 1; }\n\n# 10. Test sync-status\necho \"Step 10: Sync status...\"\ngi sync-status || echo \" Note: sync-status may need implementation\"\n\necho \"\"\necho \"=== Gate E: PASSED ===\"\n```\n\n## Test Commands (Quick Verification)\n```bash\n# List with all column types visible:\ngi list mrs --limit=10\n\n# Show a specific MR:\ngi show mr 42\n\n# Count with breakdown:\ngi count mrs\n\n# Complex filter:\ngi list mrs --state=opened --reviewer=alice --target-branch=main --limit=5\n```\n\n## Expected Output Formats\n\n### gi list mrs\n```\nMerge Requests (showing 5 of 1,234)\n\n !847 Refactor auth to use JWT tokens merged @johndoe main <- feature/jwt 3d ago\n !846 Fix memory leak in websocket handler opened @janedoe main <- fix/websocket 5d ago\n !845 [DRAFT] Add dark mode CSS variables opened @bobsmith main <- ui/dark-mode 1w ago\n !844 Update dependencies to latest versions closed @alice main <- chore/deps 2w ago\n```\n\n### gi show mr 847\n```\nMerge Request !847: Refactor auth to use JWT tokens\n================================================================================\n\nProject: group/project-one\nState: merged\nDraft: No\nAuthor: @johndoe\nAssignees: @janedoe, @bobsmith\nReviewers: @alice, @charlie\nLabels: enhancement, auth, reviewed\nSource: feature/jwt\nTarget: main\nMerge Status: merged\nMerged By: @alice\nMerged At: 2024-03-20 14:30:00\n\nDescription:\n Moving away from session cookies to JWT-based authentication...\n\nDiscussions (3):\n\n @janedoe (2024-03-16) [src/auth/jwt.ts:45]:\n Should we use a separate signing key for refresh tokens?\n\n @johndoe (2024-03-16):\n Good point. I'll add a separate key with rotation support.\n\n @alice (2024-03-18) [RESOLVED]:\n Looks good! Just one nit about the token expiry constant.\n```\n\n### gi count mrs\n```\nMerge Requests: 1,234\n opened: 89\n merged: 1,045\n closed: 100\n```\n\n## Dependencies\nThis gate requires:\n- bd-3js (CLI commands implementation)\n- All previous gates must pass first\n\n## Edge Cases\n- Ambiguous MR iid across projects: should prompt for `--project` or show error\n- Very long titles: should truncate with `...` in list view\n- Empty description: should show \"No description\" or empty section\n- No discussions: should show \"No discussions\" message\n- Unicode in titles/descriptions: should render correctly","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:02.411132Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.061166Z","closed_at":"2026-01-27T00:48:21.061125Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-lcb","depends_on_id":"bd-3js","type":"blocks","created_at":"2026-01-26T22:08:55.957747Z","created_by":"tayloreernisse"}]} diff --git a/.beads/last-touched b/.beads/last-touched index 865268f..a7a6ff8 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -bd-1q8z +bd-3qn6 diff --git a/README.md b/README.md index eff4a2f..7141a6d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Gitlore -Local GitLab data management with semantic search and temporal intelligence. Syncs issues, MRs, discussions, and notes from GitLab to a local SQLite database for fast, offline-capable querying, filtering, hybrid search, and chronological event reconstruction. +Local GitLab data management with semantic search, people intelligence, and temporal analysis. Syncs issues, MRs, discussions, and notes from GitLab to a local SQLite database for fast, offline-capable querying, filtering, hybrid search, chronological event reconstruction, and expert discovery. ## Features @@ -10,6 +10,7 @@ Local GitLab data management with semantic search and temporal intelligence. Syn - **Multi-project**: Track issues and MRs across multiple GitLab projects - **Rich filtering**: Filter by state, author, assignee, labels, milestone, due date, draft status, reviewer, branches - **Hybrid search**: Combines FTS5 lexical search with Ollama-powered vector embeddings via Reciprocal Rank Fusion +- **People intelligence**: Expert discovery, workload analysis, review patterns, active discussions, and code ownership overlap - **Timeline pipeline**: Reconstructs chronological event histories by combining search, graph traversal, and event aggregation across related entities - **Git history linking**: Tracks merge and squash commit SHAs to connect MRs with git history - **File change tracking**: Records which files each MR touches, enabling file-level history queries @@ -17,7 +18,7 @@ Local GitLab data management with semantic search and temporal intelligence. Syn - **Discussion threading**: Full support for issue and MR discussions including inline code review comments - **Cross-reference tracking**: Automatic extraction of "closes", "mentioned" relationships between MRs and issues - **Resource event history**: Tracks state changes, label events, and milestone events for issues and MRs -- **Robot mode**: Machine-readable JSON output with structured errors and meaningful exit codes +- **Robot mode**: Machine-readable JSON output with structured errors, meaningful exit codes, and actionable recovery steps - **Observability**: Verbosity controls, JSON log format, structured metrics, and stage timing ## Installation @@ -60,6 +61,15 @@ lore mrs 456 # Search across all indexed data lore search "authentication bug" +# Who knows about this code area? +lore who src/features/auth/ + +# What is @asmith working on? +lore who @asmith + +# Timeline of events related to deployments +lore timeline "deployment" + # Robot mode (machine-readable JSON) lore -J issues -n 5 | jq . ``` @@ -256,8 +266,135 @@ lore search "deploy" --explain # Show ranking explanation per resu lore search "deploy" --fts-mode raw # Raw FTS5 query syntax (advanced) ``` +The `--fts-mode` flag defaults to `safe`, which sanitizes user input into valid FTS5 queries with automatic fallback. Use `raw` for advanced FTS5 query syntax (AND, OR, NOT, phrase matching, prefix queries). + Requires `lore generate-docs` (or `lore sync`) to have been run at least once. Semantic and hybrid modes require `lore embed` (or `lore sync`) to have generated vector embeddings via Ollama. +### `lore who` + +People intelligence: discover experts, analyze workloads, review patterns, active discussions, and code overlap. + +#### Expert Mode + +Find who has expertise in a code area based on authoring and reviewing history (DiffNote analysis). + +```bash +lore who src/features/auth/ # Who knows about this directory? +lore who src/features/auth/login.ts # Who knows about this file? +lore who --path README.md # Root files need --path flag +lore who --path Makefile # Dotless root files too +lore who src/ --since 3m # Limit to recent 3 months +lore who src/ -p group/repo # Scope to project +``` + +The target is auto-detected as a path when it contains `/`. For root files without `/` (e.g., `README.md`), use the `--path` flag. Default time window: 6 months. + +#### Workload Mode + +See what someone is currently working on. + +```bash +lore who @asmith # Full workload summary +lore who @asmith -p group/repo # Scoped to one project +``` + +Shows: assigned open issues, authored MRs, MRs under review, and unresolved discussions. + +#### Reviews Mode + +Analyze someone's code review patterns by area. + +```bash +lore who @asmith --reviews # Review activity breakdown +lore who @asmith --reviews --since 3m # Recent review patterns +``` + +Shows: total DiffNotes, categorized by code area with percentage breakdown. + +#### Active Mode + +Surface unresolved discussions needing attention. + +```bash +lore who --active # Unresolved discussions (last 7 days) +lore who --active --since 30d # Wider time window +lore who --active -p group/repo # Scoped to project +``` + +Shows: discussion threads with participants and last activity timestamps. + +#### Overlap Mode + +Find who else is touching a file or directory. + +```bash +lore who --overlap src/features/auth/ # Who else works here? +lore who --overlap src/lib.rs # Single file overlap +``` + +Shows: users with touch counts (author vs. review), linked MR references. Default time window: 6 months. + +#### Common Flags + +| Flag | Description | +|------|-------------| +| `-p` / `--project` | Scope to a project (fuzzy match) | +| `--since` | Time window (7d, 2w, 6m, YYYY-MM-DD). Default varies by mode. | +| `-n` / `--limit` | Max results per section (1-500, default 20) | + +### `lore timeline` + +Reconstruct a chronological timeline of events matching a keyword query. The pipeline discovers related entities through cross-reference graph traversal and assembles a unified, time-ordered event stream. + +```bash +lore timeline "deployment" # Events related to deployments +lore timeline "auth" -p group/repo # Scoped to a project +lore timeline "auth" --since 30d # Only recent events +lore timeline "migration" --depth 2 # Deeper cross-reference expansion +lore timeline "migration" --expand-mentions # Follow 'mentioned' edges (high fan-out) +lore timeline "deploy" -n 50 # Limit event count +lore timeline "auth" --max-seeds 5 # Fewer seed entities +``` + +#### Flags + +| Flag | Default | Description | +|------|---------|-------------| +| `-p` / `--project` | all | Scope to a specific project (fuzzy match) | +| `--since` | none | Only events after this date (7d, 2w, 6m, YYYY-MM-DD) | +| `--depth` | `1` | Cross-reference expansion depth (0 = seeds only) | +| `--expand-mentions` | off | Also follow "mentioned" edges during expansion | +| `-n` / `--limit` | `100` | Maximum events to display | +| `--max-seeds` | `10` | Maximum seed entities from search | +| `--max-entities` | `50` | Maximum entities discovered via cross-references | +| `--max-evidence` | `10` | Maximum evidence notes included | + +#### Pipeline Stages + +1. **SEED** -- Full-text search identifies the most relevant issues and MRs matching the query. Documents are ranked by BM25 relevance. +2. **HYDRATE** -- Evidence notes are extracted: the top FTS-matched discussion notes with 200-character snippets explaining *why* each entity was surfaced. +3. **EXPAND** -- Breadth-first traversal over the `entity_references` graph discovers related entities via "closes", "related", and optionally "mentioned" references up to the configured depth. +4. **COLLECT** -- Events are gathered for all discovered entities. Event types include: creation, state changes, label adds/removes, milestone assignments, merge events, and evidence notes. Events are sorted chronologically with stable tiebreaking. +5. **RENDER** -- Events are formatted as human-readable text or structured JSON (robot mode). + +#### Event Types + +| Event | Description | +|-------|-------------| +| `Created` | Entity creation | +| `StateChanged` | State transitions (opened, closed, reopened) | +| `LabelAdded` | Label applied to entity | +| `LabelRemoved` | Label removed from entity | +| `MilestoneSet` | Milestone assigned | +| `MilestoneRemoved` | Milestone removed | +| `Merged` | MR merged (deduplicated against state events) | +| `NoteEvidence` | Discussion note matched by FTS, with snippet | +| `CrossReferenced` | Reference to another entity | + +#### Unresolved References + +When graph expansion encounters cross-project references to entities not yet synced locally, these are collected as unresolved references in the output. This enables discovery of external dependencies and can inform future sync targets. + ### `lore sync` Run the full sync pipeline: ingest from GitLab, generate searchable documents, and compute embeddings. @@ -269,6 +406,7 @@ lore sync --force # Override stale lock lore sync --no-embed # Skip embedding step lore sync --no-docs # Skip document regeneration lore sync --no-events # Skip resource event fetching +lore sync --dry-run # Preview what would be synced ``` The sync command displays animated progress bars for each stage and outputs timing metrics on completion. In robot mode (`-J`), detailed stage timing is included in the JSON response. @@ -284,6 +422,7 @@ lore ingest mrs # MRs only lore ingest issues -p group/repo # Single project lore ingest --force # Override stale lock lore ingest --full # Full re-sync (reset cursors) +lore ingest --dry-run # Preview what would change ``` The `--full` flag resets sync cursors and discussion watermarks, then fetches all data from scratch. Useful when: @@ -307,6 +446,7 @@ Generate vector embeddings for documents via Ollama. Requires Ollama running wit ```bash lore embed # Embed new/changed documents +lore embed --full # Re-embed all documents (clears existing) lore embed --retry-failed # Retry previously failed embeddings ``` @@ -322,6 +462,9 @@ lore count discussions --for issue # Issue discussions only lore count discussions --for mr # MR discussions only lore count notes # Total notes (system vs user breakdown) lore count notes --for issue # Issue notes only +lore count events # Total resource events +lore count events --for issue # Issue events only +lore count events --for mr # MR events only ``` ### `lore stats` @@ -332,6 +475,7 @@ Show document and index statistics, with optional integrity checks. lore stats # Document and index statistics lore stats --check # Run integrity checks lore stats --check --repair # Repair integrity issues +lore stats --dry-run # Preview repairs without saving ``` ### `lore status` @@ -357,6 +501,14 @@ lore init --force # Overwrite existing config lore init --non-interactive # Fail if prompts needed ``` +In robot mode, `init` supports non-interactive setup via flags: + +```bash +lore -J init --gitlab-url https://gitlab.com \ + --token-env-var GITLAB_TOKEN \ + --projects "group/project,other/project" +``` + ### `lore auth` Verify GitLab authentication is working. @@ -392,7 +544,7 @@ lore migrate ### `lore health` -Quick pre-flight check for config, database, and schema version. Exits 0 if healthy, 1 if unhealthy. +Quick pre-flight check for config, database, and schema version. Exits 0 if healthy, 19 if unhealthy. ```bash lore health @@ -591,42 +743,6 @@ Data is stored in SQLite with WAL mode and foreign keys enabled. Main tables: The database is stored at `~/.local/share/lore/lore.db` by default (XDG compliant). -## Timeline Pipeline - -The timeline pipeline reconstructs chronological event histories for GitLab entities by combining full-text search, cross-reference graph traversal, and resource event aggregation. Given a search query, it identifies relevant issues and MRs, discovers related entities through their reference graph, and assembles a unified, time-ordered event stream. - -### Stages - -The pipeline executes in five stages: - -1. **SEED** -- Full-text search identifies the most relevant issues and MRs matching the query. Documents (issue bodies, MR descriptions, discussion notes) are ranked by BM25 relevance. - -2. **HYDRATE** -- Evidence notes are extracted from the seed results: the top FTS-matched discussion notes with 200-character snippets that explain *why* each entity was surfaced. - -3. **EXPAND** -- Breadth-first traversal over the `entity_references` graph discovers related entities. Starting from seed entities, the pipeline follows "closes", "related", and optionally "mentioned" references up to a configurable depth, tracking provenance (which entity referenced which, via what method). - -4. **COLLECT** -- Events are gathered for all discovered entities (seeds + expanded). Event types include: creation, state changes, label adds/removes, milestone assignments, merge events, and evidence notes. Events are sorted chronologically with stable tiebreaking (timestamp, then entity ID, then event type). - -5. **RENDER** -- Events are formatted for output as human-readable text or structured JSON. - -### Event Types - -| Event | Description | -|-------|-------------| -| `Created` | Entity creation | -| `StateChanged` | State transitions (opened, closed, reopened) | -| `LabelAdded` | Label applied to entity | -| `LabelRemoved` | Label removed from entity | -| `MilestoneSet` | Milestone assigned | -| `MilestoneRemoved` | Milestone removed | -| `Merged` | MR merged (deduplicated against state events) | -| `NoteEvidence` | Discussion note matched by FTS, with snippet | -| `CrossReferenced` | Reference to another entity | - -### Unresolved References - -When the graph expansion encounters cross-project references to entities not yet synced locally, these are collected as unresolved references in the pipeline output. This enables discovery of external dependencies and can inform future sync targets. - ## Development ```bash diff --git a/migrations/019_list_performance.sql b/migrations/019_list_performance.sql new file mode 100644 index 0000000..817ffa3 --- /dev/null +++ b/migrations/019_list_performance.sql @@ -0,0 +1,13 @@ +-- Standalone updated_at DESC indexes for ORDER BY without temp B-tree sort. +-- The existing composite indexes (project_id, updated_at) only help when +-- filtering by project first. +CREATE INDEX IF NOT EXISTS idx_issues_updated_at_desc + ON issues(updated_at DESC); + +CREATE INDEX IF NOT EXISTS idx_mrs_updated_at_desc + ON merge_requests(updated_at DESC); + +-- Covering index for correlated subquery: unresolved discussion count per issue. +-- MRs already have idx_discussions_mr_resolved (migration 006). +CREATE INDEX IF NOT EXISTS idx_discussions_issue_resolved + ON discussions(issue_id, resolvable, resolved); diff --git a/migrations/020_mr_diffs_watermark.sql b/migrations/020_mr_diffs_watermark.sql new file mode 100644 index 0000000..80faa79 --- /dev/null +++ b/migrations/020_mr_diffs_watermark.sql @@ -0,0 +1,7 @@ +-- Migration 020: Watermark column for MR diffs sync +-- Tracks which MRs have had their file changes fetched, same pattern as closes_issues_synced_for_updated_at + +ALTER TABLE merge_requests ADD COLUMN diffs_synced_for_updated_at INTEGER; + +INSERT INTO schema_version (version, applied_at, description) +VALUES (20, strftime('%s', 'now') * 1000, 'MR diffs sync watermark'); diff --git a/src/cli/autocorrect.rs b/src/cli/autocorrect.rs new file mode 100644 index 0000000..895832d --- /dev/null +++ b/src/cli/autocorrect.rs @@ -0,0 +1,802 @@ +use serde::Serialize; +use strsim::jaro_winkler; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/// A single correction applied to one argument. +#[derive(Debug, Clone, Serialize)] +pub struct Correction { + pub original: String, + pub corrected: String, + pub rule: CorrectionRule, + pub confidence: f64, +} + +/// Which rule triggered the correction. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum CorrectionRule { + SingleDashLongFlag, + CaseNormalization, + FuzzyFlag, +} + +/// Result of the correction pass over raw args. +#[derive(Debug, Clone)] +pub struct CorrectionResult { + pub args: Vec, + pub corrections: Vec, +} + +// --------------------------------------------------------------------------- +// Flag registry +// --------------------------------------------------------------------------- + +/// Global flags accepted by every command (from `Cli` struct). +const GLOBAL_FLAGS: &[&str] = &[ + "--config", + "--robot", + "--json", + "--color", + "--quiet", + "--no-quiet", + "--verbose", + "--no-verbose", + "--log-format", +]; + +/// Per-subcommand flags. Each entry is `(command_name, &[flags])`. +/// Hidden `--no-*` variants are included so they can be fuzzy-matched too. +const COMMAND_FLAGS: &[(&str, &[&str])] = &[ + ( + "issues", + &[ + "--limit", + "--fields", + "--state", + "--project", + "--author", + "--assignee", + "--label", + "--milestone", + "--since", + "--due-before", + "--has-due", + "--no-has-due", + "--sort", + "--asc", + "--no-asc", + "--open", + "--no-open", + ], + ), + ( + "mrs", + &[ + "--limit", + "--fields", + "--state", + "--project", + "--author", + "--assignee", + "--reviewer", + "--label", + "--since", + "--draft", + "--no-draft", + "--target", + "--source", + "--sort", + "--asc", + "--no-asc", + "--open", + "--no-open", + ], + ), + ( + "ingest", + &[ + "--project", + "--force", + "--no-force", + "--full", + "--no-full", + "--dry-run", + "--no-dry-run", + ], + ), + ( + "sync", + &[ + "--full", + "--no-full", + "--force", + "--no-force", + "--no-embed", + "--no-docs", + "--no-events", + "--no-file-changes", + "--dry-run", + "--no-dry-run", + ], + ), + ( + "search", + &[ + "--mode", + "--type", + "--author", + "--project", + "--label", + "--path", + "--after", + "--updated-after", + "--limit", + "--explain", + "--no-explain", + "--fts-mode", + ], + ), + ( + "embed", + &["--full", "--no-full", "--retry-failed", "--no-retry-failed"], + ), + ( + "stats", + &[ + "--check", + "--no-check", + "--repair", + "--dry-run", + "--no-dry-run", + ], + ), + ("count", &["--for"]), + ( + "timeline", + &[ + "--project", + "--since", + "--depth", + "--expand-mentions", + "--limit", + "--max-seeds", + "--max-entities", + "--max-evidence", + ], + ), + ( + "who", + &[ + "--path", + "--active", + "--overlap", + "--reviews", + "--since", + "--project", + "--limit", + ], + ), + ( + "init", + &[ + "--force", + "--non-interactive", + "--gitlab-url", + "--token-env-var", + "--projects", + ], + ), + ("generate-docs", &["--full", "--project"]), + ("completions", &[]), + ( + "list", + &[ + "--limit", + "--project", + "--state", + "--author", + "--assignee", + "--label", + "--milestone", + "--since", + "--due-before", + "--has-due-date", + "--sort", + "--order", + "--open", + "--draft", + "--no-draft", + "--reviewer", + "--target-branch", + "--source-branch", + ], + ), + ("show", &["--project"]), + ("reset", &["--yes"]), +]; + +/// Valid values for enum-like flags, used for post-clap error enhancement. +pub const ENUM_VALUES: &[(&str, &[&str])] = &[ + ("--state", &["opened", "closed", "merged", "locked", "all"]), + ("--mode", &["lexical", "hybrid", "semantic"]), + ("--sort", &["updated", "created", "iid"]), + ("--type", &["issue", "mr", "discussion"]), + ("--fts-mode", &["safe", "raw"]), + ("--color", &["auto", "always", "never"]), + ("--log-format", &["text", "json"]), + ("--for", &["issue", "mr"]), +]; + +// --------------------------------------------------------------------------- +// Correction thresholds +// --------------------------------------------------------------------------- + +const FUZZY_FLAG_THRESHOLD: f64 = 0.8; + +// --------------------------------------------------------------------------- +// Core logic +// --------------------------------------------------------------------------- + +/// Detect which subcommand is being invoked by finding the first positional +/// arg (not a flag, not a flag value). +fn detect_subcommand(args: &[String]) -> Option<&str> { + // Skip args[0] (binary name). Walk forward looking for the first + // arg that isn't a flag and isn't the value to a flag that takes one. + let mut skip_next = false; + for arg in args.iter().skip(1) { + if skip_next { + skip_next = false; + continue; + } + if arg.starts_with('-') { + // Flags that take a value: we know global ones; for simplicity + // skip the next arg for any `--flag=value` form (handled inline) + // or known value-taking global flags. + if arg.contains('=') { + continue; + } + if matches!(arg.as_str(), "--config" | "-c" | "--color" | "--log-format") { + skip_next = true; + } + continue; + } + // First non-flag positional = subcommand + return Some(arg.as_str()); + } + None +} + +/// Build the set of valid long flags for the detected subcommand. +fn valid_flags_for(subcommand: Option<&str>) -> Vec<&'static str> { + let mut flags: Vec<&str> = GLOBAL_FLAGS.to_vec(); + if let Some(cmd) = subcommand { + for (name, cmd_flags) in COMMAND_FLAGS { + if *name == cmd { + flags.extend_from_slice(cmd_flags); + break; + } + } + } else { + // No subcommand detected — include all flags for maximum matching + for (_, cmd_flags) in COMMAND_FLAGS { + for flag in *cmd_flags { + if !flags.contains(flag) { + flags.push(flag); + } + } + } + } + flags +} + +/// Run the pre-clap correction pass on raw args. +/// +/// Returns the (possibly modified) args and any corrections applied. +pub fn correct_args(raw: Vec) -> CorrectionResult { + let subcommand = detect_subcommand(&raw); + let valid = valid_flags_for(subcommand); + + let mut corrected = Vec::with_capacity(raw.len()); + let mut corrections = Vec::new(); + + for arg in raw { + if let Some(fixed) = try_correct(&arg, &valid) { + let s = fixed.corrected.clone(); + corrections.push(fixed); + corrected.push(s); + } else { + corrected.push(arg); + } + } + + CorrectionResult { + args: corrected, + corrections, + } +} + +/// Try to correct a single arg. Returns `None` if no correction needed. +fn try_correct(arg: &str, valid_flags: &[&str]) -> Option { + // Only attempt correction on flag-like args (starts with `-`) + if !arg.starts_with('-') { + return None; + } + + // Skip short flags — they're unambiguous single chars (-p, -n, -v, -J) + // Also skip stacked short flags (-vvv) + if !arg.starts_with("--") { + // Rule 1: Single-dash long flag — e.g. `-robot` (len > 2, not a valid short flag) + // A short flag is `-` + single char, optionally stacked (-vvv). + // If it's `-` + multiple chars and NOT all the same char, it's likely a single-dash long flag. + let after_dash = &arg[1..]; + + // Check if it's a stacked short flag like -vvv (all same char) + let all_same_char = after_dash.len() > 1 + && after_dash + .chars() + .all(|c| c == after_dash.chars().next().unwrap_or('\0')); + if all_same_char { + return None; + } + + // Single char = valid short flag, don't touch + if after_dash.len() == 1 { + return None; + } + + // It looks like a single-dash long flag (e.g. `-robot`, `-state`) + let candidate = format!("--{after_dash}"); + + // Check exact match first (case-sensitive) + if valid_flags.contains(&candidate.as_str()) { + return Some(Correction { + original: arg.to_string(), + corrected: candidate, + rule: CorrectionRule::SingleDashLongFlag, + confidence: 0.95, + }); + } + + // Check case-insensitive exact match + let lower = candidate.to_lowercase(); + if let Some(&flag) = valid_flags.iter().find(|f| f.to_lowercase() == lower) { + return Some(Correction { + original: arg.to_string(), + corrected: flag.to_string(), + rule: CorrectionRule::SingleDashLongFlag, + confidence: 0.95, + }); + } + + // Try fuzzy on the single-dash candidate + if let Some((best_flag, score)) = best_fuzzy_match(&lower, valid_flags) + && score >= FUZZY_FLAG_THRESHOLD + { + return Some(Correction { + original: arg.to_string(), + corrected: best_flag.to_string(), + rule: CorrectionRule::SingleDashLongFlag, + confidence: score * 0.95, // discount slightly for compound correction + }); + } + + return None; + } + + // For `--flag` or `--flag=value` forms: only correct the flag name + let (flag_part, value_suffix) = if let Some(eq_pos) = arg.find('=') { + (&arg[..eq_pos], Some(&arg[eq_pos..])) + } else { + (arg, None) + }; + + // Already valid? No correction needed. + if valid_flags.contains(&flag_part) { + return None; + } + + // Rule 2: Case normalization — `--Robot` -> `--robot` + let lower = flag_part.to_lowercase(); + if lower != flag_part + && let Some(&flag) = valid_flags.iter().find(|f| f.to_lowercase() == lower) + { + let corrected = match value_suffix { + Some(suffix) => format!("{flag}{suffix}"), + None => flag.to_string(), + }; + return Some(Correction { + original: arg.to_string(), + corrected, + rule: CorrectionRule::CaseNormalization, + confidence: 0.9, + }); + } + + // Rule 3: Fuzzy flag match — `--staate` -> `--state` + if let Some((best_flag, score)) = best_fuzzy_match(&lower, valid_flags) + && score >= FUZZY_FLAG_THRESHOLD + { + let corrected = match value_suffix { + Some(suffix) => format!("{best_flag}{suffix}"), + None => best_flag.to_string(), + }; + return Some(Correction { + original: arg.to_string(), + corrected, + rule: CorrectionRule::FuzzyFlag, + confidence: score, + }); + } + + None +} + +/// Find the best fuzzy match among valid flags for a given (lowercased) input. +fn best_fuzzy_match<'a>(input: &str, valid_flags: &[&'a str]) -> Option<(&'a str, f64)> { + valid_flags + .iter() + .map(|&flag| (flag, jaro_winkler(input, flag))) + .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)) +} + +// --------------------------------------------------------------------------- +// Post-clap suggestion helpers +// --------------------------------------------------------------------------- + +/// Given an unrecognized flag (from a clap error), suggest the most similar +/// valid flag for the detected subcommand. +pub fn suggest_similar_flag(invalid_flag: &str, raw_args: &[String]) -> Option { + let subcommand = detect_subcommand(raw_args); + let valid = valid_flags_for(subcommand); + let lower = invalid_flag.to_lowercase(); + + let (best_flag, score) = best_fuzzy_match(&lower, &valid)?; + if score >= 0.6 { + Some(best_flag.to_string()) + } else { + None + } +} + +/// Given a flag name, return its valid enum values (if known). +pub fn valid_values_for_flag(flag: &str) -> Option<&'static [&'static str]> { + let lower = flag.to_lowercase(); + ENUM_VALUES + .iter() + .find(|(f, _)| f.to_lowercase() == lower) + .map(|(_, vals)| *vals) +} + +/// Format a human/robot teaching note for a correction. +pub fn format_teaching_note(correction: &Correction) -> String { + match correction.rule { + CorrectionRule::SingleDashLongFlag => { + format!( + "Use double-dash for long flags: {} (not {})", + correction.corrected, correction.original + ) + } + CorrectionRule::CaseNormalization => { + format!( + "Flags are lowercase: {} (not {})", + correction.corrected, correction.original + ) + } + CorrectionRule::FuzzyFlag => { + format!( + "Correct spelling: {} (not {})", + correction.corrected, correction.original + ) + } + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + fn args(s: &str) -> Vec { + s.split_whitespace().map(String::from).collect() + } + + // ---- Single-dash long flag ---- + + #[test] + fn single_dash_robot() { + let result = correct_args(args("lore -robot issues -n 5")); + assert_eq!(result.corrections.len(), 1); + assert_eq!(result.corrections[0].original, "-robot"); + assert_eq!(result.corrections[0].corrected, "--robot"); + assert_eq!( + result.corrections[0].rule, + CorrectionRule::SingleDashLongFlag + ); + assert_eq!(result.args, args("lore --robot issues -n 5")); + } + + #[test] + fn single_dash_state() { + let result = correct_args(args("lore --robot issues -state opened")); + assert_eq!(result.corrections.len(), 1); + assert_eq!(result.corrections[0].corrected, "--state"); + } + + // ---- Case normalization ---- + + #[test] + fn case_robot() { + let result = correct_args(args("lore --Robot issues")); + assert_eq!(result.corrections.len(), 1); + assert_eq!(result.corrections[0].corrected, "--robot"); + assert_eq!( + result.corrections[0].rule, + CorrectionRule::CaseNormalization + ); + } + + #[test] + fn case_state_upper() { + let result = correct_args(args("lore --robot issues --State opened")); + assert_eq!(result.corrections.len(), 1); + assert_eq!(result.corrections[0].corrected, "--state"); + assert_eq!( + result.corrections[0].rule, + CorrectionRule::CaseNormalization + ); + } + + #[test] + fn case_all_upper() { + let result = correct_args(args("lore --ROBOT issues --STATE opened")); + assert_eq!(result.corrections.len(), 2); + assert_eq!(result.corrections[0].corrected, "--robot"); + assert_eq!(result.corrections[1].corrected, "--state"); + } + + // ---- Fuzzy flag match ---- + + #[test] + fn fuzzy_staate() { + let result = correct_args(args("lore --robot issues --staate opened")); + assert_eq!(result.corrections.len(), 1); + assert_eq!(result.corrections[0].corrected, "--state"); + assert_eq!(result.corrections[0].rule, CorrectionRule::FuzzyFlag); + } + + #[test] + fn fuzzy_projct() { + let result = correct_args(args("lore --robot issues --projct group/repo")); + assert_eq!(result.corrections.len(), 1); + assert_eq!(result.corrections[0].corrected, "--project"); + assert_eq!(result.corrections[0].rule, CorrectionRule::FuzzyFlag); + } + + // ---- No corrections ---- + + #[test] + fn already_correct() { + let original = args("lore --robot issues --state opened -n 10"); + let result = correct_args(original.clone()); + assert!(result.corrections.is_empty()); + assert_eq!(result.args, original); + } + + #[test] + fn short_flags_untouched() { + let original = args("lore -J issues -n 10 -s opened -p group/repo"); + let result = correct_args(original.clone()); + assert!(result.corrections.is_empty()); + } + + #[test] + fn stacked_short_flags_untouched() { + let original = args("lore -vvv issues"); + let result = correct_args(original.clone()); + assert!(result.corrections.is_empty()); + } + + #[test] + fn positional_args_untouched() { + let result = correct_args(args("lore --robot search authentication")); + assert!(result.corrections.is_empty()); + } + + #[test] + fn wildly_wrong_flag_not_corrected() { + // `--xyzzy` shouldn't match anything above 0.8 + let result = correct_args(args("lore --robot issues --xyzzy foo")); + assert!(result.corrections.is_empty()); + } + + // ---- Flag with = value ---- + + #[test] + fn flag_eq_value_case_correction() { + let result = correct_args(args("lore --robot issues --State=opened")); + assert_eq!(result.corrections.len(), 1); + assert_eq!(result.corrections[0].corrected, "--state=opened"); + } + + // ---- Multiple corrections in one invocation ---- + + #[test] + fn multiple_corrections() { + let result = correct_args(args( + "lore -robot issues --State opened --projct group/repo", + )); + assert_eq!(result.corrections.len(), 3); + assert_eq!(result.args[1], "--robot"); + assert_eq!(result.args[3], "--state"); + assert_eq!(result.args[5], "--project"); + } + + // ---- Teaching notes ---- + + #[test] + fn teaching_note_single_dash() { + let c = Correction { + original: "-robot".to_string(), + corrected: "--robot".to_string(), + rule: CorrectionRule::SingleDashLongFlag, + confidence: 0.95, + }; + let note = format_teaching_note(&c); + assert!(note.contains("double-dash")); + assert!(note.contains("--robot")); + } + + #[test] + fn teaching_note_case() { + let c = Correction { + original: "--State".to_string(), + corrected: "--state".to_string(), + rule: CorrectionRule::CaseNormalization, + confidence: 0.9, + }; + let note = format_teaching_note(&c); + assert!(note.contains("lowercase")); + } + + #[test] + fn teaching_note_fuzzy() { + let c = Correction { + original: "--staate".to_string(), + corrected: "--state".to_string(), + rule: CorrectionRule::FuzzyFlag, + confidence: 0.85, + }; + let note = format_teaching_note(&c); + assert!(note.contains("spelling")); + } + + // ---- Post-clap suggestion helpers ---- + + #[test] + fn suggest_similar_flag_works() { + let raw = args("lore --robot issues --xstat opened"); + let suggestion = suggest_similar_flag("--xstat", &raw); + // Should suggest --state (close enough with lower threshold 0.6) + assert!(suggestion.is_some()); + } + + #[test] + fn valid_values_for_state() { + let vals = valid_values_for_flag("--state"); + assert!(vals.is_some()); + let vals = vals.unwrap(); + assert!(vals.contains(&"opened")); + assert!(vals.contains(&"closed")); + } + + #[test] + fn valid_values_unknown_flag() { + assert!(valid_values_for_flag("--xyzzy").is_none()); + } + + // ---- Subcommand detection ---- + + #[test] + fn detect_subcommand_basic() { + assert_eq!( + detect_subcommand(&args("lore issues -n 10")), + Some("issues") + ); + } + + #[test] + fn detect_subcommand_with_globals() { + assert_eq!( + detect_subcommand(&args("lore --robot --config /tmp/c.json mrs")), + Some("mrs") + ); + } + + #[test] + fn detect_subcommand_with_color() { + assert_eq!( + detect_subcommand(&args("lore --color never issues")), + Some("issues") + ); + } + + #[test] + fn detect_subcommand_none() { + assert_eq!(detect_subcommand(&args("lore --robot")), None); + } + + // ---- Registry drift test ---- + // This test uses clap introspection to verify our static registry covers + // all long flags defined in the Cli struct. + + #[test] + fn registry_covers_global_flags() { + use clap::CommandFactory; + let cmd = crate::cli::Cli::command(); + + let clap_globals: Vec = cmd + .get_arguments() + .filter_map(|a| a.get_long().map(|l| format!("--{l}"))) + .collect(); + + for flag in &clap_globals { + // Skip help/version — clap adds these automatically + if flag == "--help" || flag == "--version" { + continue; + } + assert!( + GLOBAL_FLAGS.contains(&flag.as_str()), + "Clap global flag {flag} is missing from GLOBAL_FLAGS registry. \ + Add it to GLOBAL_FLAGS in autocorrect.rs." + ); + } + } + + #[test] + fn registry_covers_command_flags() { + use clap::CommandFactory; + let cmd = crate::cli::Cli::command(); + + for sub in cmd.get_subcommands() { + let sub_name = sub.get_name().to_string(); + + // Find our registry entry + let registry_entry = COMMAND_FLAGS.iter().find(|(name, _)| *name == sub_name); + + // Not all subcommands need entries (e.g., version, auth, status + // with no subcommand-specific flags) + let clap_flags: Vec = sub + .get_arguments() + .filter_map(|a| a.get_long().map(|l| format!("--{l}"))) + .filter(|f| !GLOBAL_FLAGS.contains(&f.as_str())) + .filter(|f| f != "--help" && f != "--version") + .collect(); + + if clap_flags.is_empty() { + continue; + } + + let registry_flags = registry_entry.map(|(_, flags)| *flags); + let registry_flags = registry_flags.unwrap_or_else(|| { + panic!( + "Subcommand '{sub_name}' has clap flags {clap_flags:?} but no COMMAND_FLAGS \ + registry entry. Add it to COMMAND_FLAGS in autocorrect.rs." + ) + }); + + for flag in &clap_flags { + assert!( + registry_flags.contains(&flag.as_str()), + "Clap flag {flag} on subcommand '{sub_name}' is missing from \ + COMMAND_FLAGS registry. Add it to the '{sub_name}' entry in autocorrect.rs." + ); + } + } + } +} diff --git a/src/cli/commands/ingest.rs b/src/cli/commands/ingest.rs index 0d625d7..e9a7f63 100644 --- a/src/cli/commands/ingest.rs +++ b/src/cli/commands/ingest.rs @@ -501,6 +501,20 @@ async fn run_ingest_inner( ProgressEvent::ClosesIssuesFetchComplete { .. } => { disc_bar_clone.finish_and_clear(); } + ProgressEvent::MrDiffsFetchStarted { total } => { + disc_bar_clone.reset(); + disc_bar_clone.set_length(total as u64); + disc_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100)); + stage_bar_clone.set_message( + "Fetching MR file changes...".to_string() + ); + } + ProgressEvent::MrDiffFetched { current, total: _ } => { + disc_bar_clone.set_position(current as u64); + } + ProgressEvent::MrDiffsFetchComplete { .. } => { + disc_bar_clone.finish_and_clear(); + } }) }; diff --git a/src/cli/commands/list.rs b/src/cli/commands/list.rs index a8ccfd5..1d1bcc2 100644 --- a/src/cli/commands/list.rs +++ b/src/cli/commands/list.rs @@ -335,18 +335,12 @@ fn query_issues(conn: &Connection, filters: &ListFilters) -> Result (SELECT GROUP_CONCAT(ia.username, X'1F') FROM issue_assignees ia WHERE ia.issue_id = i.id) AS assignees_csv, - COALESCE(d.total, 0) AS discussion_count, - COALESCE(d.unresolved, 0) AS unresolved_count + (SELECT COUNT(*) FROM discussions d + WHERE d.issue_id = i.id) AS discussion_count, + (SELECT COUNT(*) FROM discussions d + WHERE d.issue_id = i.id AND d.resolvable = 1 AND d.resolved = 0) AS unresolved_count FROM issues i JOIN projects p ON i.project_id = p.id - LEFT JOIN ( - SELECT issue_id, - COUNT(*) as total, - SUM(CASE WHEN resolvable = 1 AND resolved = 0 THEN 1 ELSE 0 END) as unresolved - FROM discussions - WHERE issue_id IS NOT NULL - GROUP BY issue_id - ) d ON d.issue_id = i.id {where_sql} ORDER BY {sort_column} {order} LIMIT ?" @@ -528,18 +522,12 @@ fn query_mrs(conn: &Connection, filters: &MrListFilters) -> Result (SELECT GROUP_CONCAT(mr.username, X'1F') FROM mr_reviewers mr WHERE mr.merge_request_id = m.id) AS reviewers_csv, - COALESCE(d.total, 0) AS discussion_count, - COALESCE(d.unresolved, 0) AS unresolved_count + (SELECT COUNT(*) FROM discussions d + WHERE d.merge_request_id = m.id) AS discussion_count, + (SELECT COUNT(*) FROM discussions d + WHERE d.merge_request_id = m.id AND d.resolvable = 1 AND d.resolved = 0) AS unresolved_count FROM merge_requests m JOIN projects p ON m.project_id = p.id - LEFT JOIN ( - SELECT merge_request_id, - COUNT(*) as total, - SUM(CASE WHEN resolvable = 1 AND resolved = 0 THEN 1 ELSE 0 END) as unresolved - FROM discussions - WHERE merge_request_id IS NOT NULL - GROUP BY merge_request_id - ) d ON d.merge_request_id = m.id {where_sql} ORDER BY {sort_column} {order} LIMIT ?" diff --git a/src/cli/commands/who.rs b/src/cli/commands/who.rs index d878820..f85ada2 100644 --- a/src/cli/commands/who.rs +++ b/src/cli/commands/who.rs @@ -433,7 +433,7 @@ fn build_path_query(conn: &Connection, path: &str, project_id: Option) -> R // Heuristic is now only a fallback; probes decide first when ambiguous. let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); - // Probe 1: exact file exists (project-scoped via nullable binding) + // Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped) let exact_exists = conn .query_row( "SELECT 1 FROM notes @@ -445,9 +445,19 @@ fn build_path_query(conn: &Connection, path: &str, project_id: Option) -> R rusqlite::params![trimmed, project_id], |_| Ok(()), ) - .is_ok(); + .is_ok() + || conn + .query_row( + "SELECT 1 FROM mr_file_changes + WHERE new_path = ?1 + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![trimmed, project_id], + |_| Ok(()), + ) + .is_ok(); - // Probe 2: directory prefix exists (project-scoped) + // Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped) let prefix_exists = if !forced_dir && !exact_exists { let escaped = escape_like(trimmed); let pat = format!("{escaped}/%"); @@ -462,6 +472,16 @@ fn build_path_query(conn: &Connection, path: &str, project_id: Option) -> R |_| Ok(()), ) .is_ok() + || conn + .query_row( + "SELECT 1 FROM mr_file_changes + WHERE new_path LIKE ?1 ESCAPE '\\' + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![pat, project_id], + |_| Ok(()), + ) + .is_ok() } else { false }; @@ -513,125 +533,117 @@ fn query_expert( let pq = build_path_query(conn, path, project_id)?; let limit_plus_one = (limit + 1) as i64; - let sql_prefix = " - WITH activity AS ( - SELECT - n.author_username AS username, - 'reviewer' AS role, - COUNT(DISTINCT m.id) AS mr_cnt, - COUNT(*) AS note_cnt, - MAX(n.created_at) AS last_seen_at - FROM notes n - JOIN discussions d ON n.discussion_id = d.id - JOIN merge_requests m ON d.merge_request_id = m.id - WHERE n.note_type = 'DiffNote' - AND n.is_system = 0 - AND n.author_username IS NOT NULL - AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND m.state IN ('opened','merged') - AND n.position_new_path LIKE ?1 ESCAPE '\\' - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY n.author_username - - UNION ALL - - SELECT - m.author_username AS username, - 'author' AS role, - COUNT(DISTINCT m.id) AS mr_cnt, - 0 AS note_cnt, - MAX(n.created_at) AS last_seen_at - FROM merge_requests m - JOIN discussions d ON d.merge_request_id = m.id - JOIN notes n ON n.discussion_id = d.id - WHERE n.note_type = 'DiffNote' - AND n.is_system = 0 - AND m.author_username IS NOT NULL - AND n.position_new_path LIKE ?1 ESCAPE '\\' - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY m.author_username - ) - SELECT - username, - SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) AS review_mr_count, - SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) AS review_note_count, - SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) AS author_mr_count, - MAX(last_seen_at) AS last_seen_at, - ( - (SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) * 20) + - (SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) * 12) + - (SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) * 1) - ) AS score - FROM activity - GROUP BY username - ORDER BY score DESC, last_seen_at DESC, username ASC - LIMIT ?4 - "; - - let sql_exact = " - WITH activity AS ( - SELECT - n.author_username AS username, - 'reviewer' AS role, - COUNT(DISTINCT m.id) AS mr_cnt, - COUNT(*) AS note_cnt, - MAX(n.created_at) AS last_seen_at - FROM notes n - JOIN discussions d ON n.discussion_id = d.id - JOIN merge_requests m ON d.merge_request_id = m.id - WHERE n.note_type = 'DiffNote' - AND n.is_system = 0 - AND n.author_username IS NOT NULL - AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND m.state IN ('opened','merged') - AND n.position_new_path = ?1 - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY n.author_username - - UNION ALL - - SELECT - m.author_username AS username, - 'author' AS role, - COUNT(DISTINCT m.id) AS mr_cnt, - 0 AS note_cnt, - MAX(n.created_at) AS last_seen_at - FROM merge_requests m - JOIN discussions d ON d.merge_request_id = m.id - JOIN notes n ON n.discussion_id = d.id - WHERE n.note_type = 'DiffNote' - AND n.is_system = 0 - AND m.author_username IS NOT NULL - AND n.position_new_path = ?1 - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY m.author_username - ) - SELECT - username, - SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) AS review_mr_count, - SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) AS review_note_count, - SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) AS author_mr_count, - MAX(last_seen_at) AS last_seen_at, - ( - (SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) * 20) + - (SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) * 12) + - (SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) * 1) - ) AS score - FROM activity - GROUP BY username - ORDER BY score DESC, last_seen_at DESC, username ASC - LIMIT ?4 - "; - - let mut stmt = if pq.is_prefix { - conn.prepare_cached(sql_prefix)? + // Build SQL with 4 signal sources (UNION ALL), deduplicating via COUNT(DISTINCT mr_id): + // 1. DiffNote reviewer — left inline review comments (not self-review) + // 2. DiffNote MR author — authored MR that has DiffNotes on this path + // 3. File-change author — authored MR that touched this path (mr_file_changes) + // 4. File-change reviewer — assigned reviewer on MR that touched this path + let path_op = if pq.is_prefix { + "LIKE ?1 ESCAPE '\\'" } else { - conn.prepare_cached(sql_exact)? + "= ?1" }; + let sql = format!( + " + WITH signals AS ( + -- 1. DiffNote reviewer (individual notes for note_cnt) + SELECT + n.author_username AS username, + 'diffnote_reviewer' AS signal, + m.id AS mr_id, + n.id AS note_id, + n.created_at AS seen_at + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND n.author_username IS NOT NULL + AND (m.author_username IS NULL OR n.author_username != m.author_username) + AND m.state IN ('opened','merged') + AND n.position_new_path {path_op} + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + + UNION ALL + + -- 2. DiffNote MR author + SELECT DISTINCT + m.author_username AS username, + 'diffnote_author' AS signal, + m.id AS mr_id, + NULL AS note_id, + MAX(n.created_at) AS seen_at + FROM merge_requests m + JOIN discussions d ON d.merge_request_id = m.id + JOIN notes n ON n.discussion_id = d.id + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND m.author_username IS NOT NULL + AND m.state IN ('opened','merged') + AND n.position_new_path {path_op} + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY m.author_username, m.id + + UNION ALL + + -- 3. MR author via file changes + SELECT + m.author_username AS username, + 'file_author' AS signal, + m.id AS mr_id, + NULL AS note_id, + m.updated_at AS seen_at + FROM mr_file_changes fc + JOIN merge_requests m ON fc.merge_request_id = m.id + WHERE m.author_username IS NOT NULL + AND m.state IN ('opened','merged') + AND fc.new_path {path_op} + AND m.updated_at >= ?2 + AND (?3 IS NULL OR fc.project_id = ?3) + + UNION ALL + + -- 4. MR reviewer via file changes + mr_reviewers + SELECT + r.username AS username, + 'file_reviewer' AS signal, + m.id AS mr_id, + NULL AS note_id, + m.updated_at AS seen_at + FROM mr_file_changes fc + JOIN merge_requests m ON fc.merge_request_id = m.id + JOIN mr_reviewers r ON r.merge_request_id = m.id + WHERE r.username IS NOT NULL + AND m.state IN ('opened','merged') + AND fc.new_path {path_op} + AND m.updated_at >= ?2 + AND (?3 IS NULL OR fc.project_id = ?3) + ) + SELECT + username, + COUNT(DISTINCT CASE WHEN signal IN ('diffnote_reviewer', 'file_reviewer') + THEN mr_id END) AS review_mr_count, + COUNT(CASE WHEN signal = 'diffnote_reviewer' THEN note_id END) AS review_note_count, + COUNT(DISTINCT CASE WHEN signal IN ('diffnote_author', 'file_author') + THEN mr_id END) AS author_mr_count, + MAX(seen_at) AS last_seen_at, + ( + (COUNT(DISTINCT CASE WHEN signal IN ('diffnote_reviewer', 'file_reviewer') + THEN mr_id END) * 20) + + (COUNT(DISTINCT CASE WHEN signal IN ('diffnote_author', 'file_author') + THEN mr_id END) * 12) + + (COUNT(CASE WHEN signal = 'diffnote_reviewer' THEN note_id END) * 1) + ) AS score + FROM signals + GROUP BY username + ORDER BY score DESC, last_seen_at DESC, username ASC + LIMIT ?4 + " + ); + + let mut stmt = conn.prepare_cached(&sql)?; let experts: Vec = stmt .query_map( @@ -1160,97 +1172,100 @@ fn query_overlap( ) -> Result { let pq = build_path_query(conn, path, project_id)?; - let sql_prefix = "SELECT username, role, touch_count, last_seen_at, mr_refs FROM ( - SELECT - n.author_username AS username, - 'reviewer' AS role, - COUNT(DISTINCT m.id) AS touch_count, - MAX(n.created_at) AS last_seen_at, - GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM notes n - JOIN discussions d ON n.discussion_id = d.id - JOIN merge_requests m ON d.merge_request_id = m.id - JOIN projects p ON m.project_id = p.id - WHERE n.note_type = 'DiffNote' - AND n.position_new_path LIKE ?1 ESCAPE '\\' - AND n.is_system = 0 - AND n.author_username IS NOT NULL - AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND m.state IN ('opened','merged') - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY n.author_username - - UNION ALL - - SELECT - m.author_username AS username, - 'author' AS role, - COUNT(DISTINCT m.id) AS touch_count, - MAX(n.created_at) AS last_seen_at, - GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM merge_requests m - JOIN discussions d ON d.merge_request_id = m.id - JOIN notes n ON n.discussion_id = d.id - JOIN projects p ON m.project_id = p.id - WHERE n.note_type = 'DiffNote' - AND n.position_new_path LIKE ?1 ESCAPE '\\' - AND n.is_system = 0 - AND m.state IN ('opened', 'merged') - AND m.author_username IS NOT NULL - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY m.author_username - )"; - - let sql_exact = "SELECT username, role, touch_count, last_seen_at, mr_refs FROM ( - SELECT - n.author_username AS username, - 'reviewer' AS role, - COUNT(DISTINCT m.id) AS touch_count, - MAX(n.created_at) AS last_seen_at, - GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM notes n - JOIN discussions d ON n.discussion_id = d.id - JOIN merge_requests m ON d.merge_request_id = m.id - JOIN projects p ON m.project_id = p.id - WHERE n.note_type = 'DiffNote' - AND n.position_new_path = ?1 - AND n.is_system = 0 - AND n.author_username IS NOT NULL - AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND m.state IN ('opened','merged') - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY n.author_username - - UNION ALL - - SELECT - m.author_username AS username, - 'author' AS role, - COUNT(DISTINCT m.id) AS touch_count, - MAX(n.created_at) AS last_seen_at, - GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM merge_requests m - JOIN discussions d ON d.merge_request_id = m.id - JOIN notes n ON n.discussion_id = d.id - JOIN projects p ON m.project_id = p.id - WHERE n.note_type = 'DiffNote' - AND n.position_new_path = ?1 - AND n.is_system = 0 - AND m.state IN ('opened', 'merged') - AND m.author_username IS NOT NULL - AND n.created_at >= ?2 - AND (?3 IS NULL OR n.project_id = ?3) - GROUP BY m.author_username - )"; - - let mut stmt = if pq.is_prefix { - conn.prepare_cached(sql_prefix)? + // Build SQL with 4 signal sources, matching the expert query expansion. + // Each row produces (username, role, mr_id, mr_ref, seen_at) for Rust-side accumulation. + let path_op = if pq.is_prefix { + "LIKE ?1 ESCAPE '\\'" } else { - conn.prepare_cached(sql_exact)? + "= ?1" }; + let sql = format!( + "SELECT username, role, touch_count, last_seen_at, mr_refs FROM ( + -- 1. DiffNote reviewer + SELECT + n.author_username AS username, + 'reviewer' AS role, + COUNT(DISTINCT m.id) AS touch_count, + MAX(n.created_at) AS last_seen_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + JOIN projects p ON m.project_id = p.id + WHERE n.note_type = 'DiffNote' + AND n.position_new_path {path_op} + AND n.is_system = 0 + AND n.author_username IS NOT NULL + AND (m.author_username IS NULL OR n.author_username != m.author_username) + AND m.state IN ('opened','merged') + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY n.author_username + + UNION ALL + + -- 2. DiffNote MR author + SELECT + m.author_username AS username, + 'author' AS role, + COUNT(DISTINCT m.id) AS touch_count, + MAX(n.created_at) AS last_seen_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM merge_requests m + JOIN discussions d ON d.merge_request_id = m.id + JOIN notes n ON n.discussion_id = d.id + JOIN projects p ON m.project_id = p.id + WHERE n.note_type = 'DiffNote' + AND n.position_new_path {path_op} + AND n.is_system = 0 + AND m.state IN ('opened', 'merged') + AND m.author_username IS NOT NULL + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY m.author_username + + UNION ALL + + -- 3. MR author via file changes + SELECT + m.author_username AS username, + 'author' AS role, + COUNT(DISTINCT m.id) AS touch_count, + MAX(m.updated_at) AS last_seen_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM mr_file_changes fc + JOIN merge_requests m ON fc.merge_request_id = m.id + JOIN projects p ON m.project_id = p.id + WHERE m.author_username IS NOT NULL + AND m.state IN ('opened','merged') + AND fc.new_path {path_op} + AND m.updated_at >= ?2 + AND (?3 IS NULL OR fc.project_id = ?3) + GROUP BY m.author_username + + UNION ALL + + -- 4. MR reviewer via file changes + mr_reviewers + SELECT + r.username AS username, + 'reviewer' AS role, + COUNT(DISTINCT m.id) AS touch_count, + MAX(m.updated_at) AS last_seen_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM mr_file_changes fc + JOIN merge_requests m ON fc.merge_request_id = m.id + JOIN projects p ON m.project_id = p.id + JOIN mr_reviewers r ON r.merge_request_id = m.id + WHERE r.username IS NOT NULL + AND m.state IN ('opened','merged') + AND fc.new_path {path_op} + AND m.updated_at >= ?2 + AND (?3 IS NULL OR fc.project_id = ?3) + GROUP BY r.username + )" + ); + + let mut stmt = conn.prepare_cached(&sql)?; let rows: Vec<(String, String, u32, i64, Option)> = stmt .query_map(rusqlite::params![pq.value, since_ms, project_id], |row| { Ok(( @@ -2117,7 +2132,6 @@ mod tests { .unwrap(); } - #[allow(dead_code)] fn insert_reviewer(conn: &Connection, mr_id: i64, username: &str) { conn.execute( "INSERT INTO mr_reviewers (merge_request_id, username) VALUES (?1, ?2)", @@ -2126,6 +2140,21 @@ mod tests { .unwrap(); } + fn insert_file_change( + conn: &Connection, + mr_id: i64, + project_id: i64, + new_path: &str, + change_type: &str, + ) { + conn.execute( + "INSERT INTO mr_file_changes (merge_request_id, project_id, new_path, change_type) + VALUES (?1, ?2, ?3, ?4)", + rusqlite::params![mr_id, project_id, new_path, change_type], + ) + .unwrap(); + } + #[test] fn test_is_file_path_discrimination() { // Contains '/' -> file path @@ -2678,4 +2707,142 @@ mod tests { let result = query_expert(&conn, "src/auth/", None, 0, 10).unwrap(); assert!(!result.truncated); } + + #[test] + fn test_expert_file_changes_only() { + // MR author should appear even when there are zero DiffNotes + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "file_author", "merged"); + insert_file_change(&conn, 1, 1, "src/auth/login.rs", "modified"); + + let result = query_expert(&conn, "src/auth/login.rs", None, 0, 20).unwrap(); + assert_eq!(result.experts.len(), 1); + assert_eq!(result.experts[0].username, "file_author"); + assert!(result.experts[0].author_mr_count > 0); + assert_eq!(result.experts[0].review_mr_count, 0); + } + + #[test] + fn test_expert_mr_reviewer_via_file_changes() { + // A reviewer assigned via mr_reviewers should appear when that MR + // touched the queried file (via mr_file_changes) + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "merged"); + insert_file_change(&conn, 1, 1, "src/auth/login.rs", "modified"); + insert_reviewer(&conn, 1, "assigned_reviewer"); + + let result = query_expert(&conn, "src/auth/login.rs", None, 0, 20).unwrap(); + let reviewer = result + .experts + .iter() + .find(|e| e.username == "assigned_reviewer"); + assert!(reviewer.is_some(), "assigned_reviewer should appear"); + assert!(reviewer.unwrap().review_mr_count > 0); + } + + #[test] + fn test_expert_deduplicates_across_signals() { + // User who is BOTH a DiffNote reviewer AND an mr_reviewers entry for + // the same MR should be counted only once per MR + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "merged"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_diffnote( + &conn, + 1, + 1, + 1, + "reviewer_b", + "src/auth/login.rs", + "looks good", + ); + // Same user also listed as assigned reviewer, with file change data + insert_file_change(&conn, 1, 1, "src/auth/login.rs", "modified"); + insert_reviewer(&conn, 1, "reviewer_b"); + + let result = query_expert(&conn, "src/auth/login.rs", None, 0, 20).unwrap(); + let reviewer = result + .experts + .iter() + .find(|e| e.username == "reviewer_b") + .unwrap(); + // Should be 1 MR, not 2 (dedup across DiffNote + mr_reviewers) + assert_eq!(reviewer.review_mr_count, 1); + } + + #[test] + fn test_expert_combined_diffnote_and_file_changes() { + // Author with DiffNotes on path A and file_changes on path B should + // get credit for both when queried with a directory prefix + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + // MR 1: has DiffNotes on login.rs + insert_mr(&conn, 1, 1, 100, "author_a", "merged"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/auth/login.rs", "note"); + // MR 2: has file_changes on session.rs (no DiffNotes) + insert_mr(&conn, 2, 1, 200, "author_a", "merged"); + insert_file_change(&conn, 2, 1, "src/auth/session.rs", "added"); + + let result = query_expert(&conn, "src/auth/", None, 0, 20).unwrap(); + let author = result + .experts + .iter() + .find(|e| e.username == "author_a") + .unwrap(); + // Should count 2 authored MRs (one from DiffNote path, one from file changes) + assert_eq!(author.author_mr_count, 2); + } + + #[test] + fn test_expert_file_changes_prefix_match() { + // Directory prefix queries should pick up mr_file_changes under the directory + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "merged"); + insert_file_change(&conn, 1, 1, "src/auth/login.rs", "modified"); + insert_file_change(&conn, 1, 1, "src/auth/session.rs", "added"); + + let result = query_expert(&conn, "src/auth/", None, 0, 20).unwrap(); + assert_eq!(result.path_match, "prefix"); + assert_eq!(result.experts.len(), 1); + assert_eq!(result.experts[0].username, "author_a"); + } + + #[test] + fn test_overlap_file_changes_only() { + // Overlap mode should also find users via mr_file_changes + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "merged"); + insert_file_change(&conn, 1, 1, "src/auth/login.rs", "modified"); + insert_reviewer(&conn, 1, "reviewer_x"); + + let result = query_overlap(&conn, "src/auth/", None, 0, 20).unwrap(); + assert!( + result.users.iter().any(|u| u.username == "author_a"), + "author_a should appear via file_changes" + ); + assert!( + result.users.iter().any(|u| u.username == "reviewer_x"), + "reviewer_x should appear via mr_reviewers + file_changes" + ); + } + + #[test] + fn test_build_path_query_resolves_via_file_changes() { + // DB probe should detect exact file match from mr_file_changes even + // when no DiffNotes exist for the path + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "merged"); + insert_file_change(&conn, 1, 1, "src/Dockerfile", "modified"); + + let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap(); + assert_eq!(pq.value, "src/Dockerfile"); + assert!(!pq.is_prefix); + } } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 62f36a8..861c090 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,3 +1,4 @@ +pub mod autocorrect; pub mod commands; pub mod progress; pub mod robot; @@ -81,13 +82,18 @@ impl Cli { /// Detect robot mode from environment before parsing succeeds. /// Used for structured error output when clap parsing fails. + /// Also catches common agent typos like `-robot` and `--Robot`. pub fn detect_robot_mode_from_env() -> bool { let args: Vec = std::env::args().collect(); - args.iter() - .any(|a| a == "--robot" || a == "-J" || a == "--json") - || std::env::var("LORE_ROBOT") - .ok() - .is_some_and(|v| !v.is_empty() && v != "0" && v != "false") + args.iter().any(|a| { + a == "-J" + || a.eq_ignore_ascii_case("--robot") + || a.eq_ignore_ascii_case("-robot") + || a.eq_ignore_ascii_case("--json") + || a.eq_ignore_ascii_case("-json") + }) || std::env::var("LORE_ROBOT") + .ok() + .is_some_and(|v| !v.is_empty() && v != "0" && v != "false") || !std::io::stdout().is_terminal() } } @@ -608,6 +614,10 @@ pub struct SyncArgs { #[arg(long = "no-events")] pub no_events: bool, + /// Skip MR file change fetching (overrides config) + #[arg(long = "no-file-changes")] + pub no_file_changes: bool, + /// Preview what would be synced without making changes #[arg(long, overrides_with = "no_dry_run")] pub dry_run: bool, diff --git a/src/core/config.rs b/src/core/config.rs index 547ed77..b1b323b 100644 --- a/src/core/config.rs +++ b/src/core/config.rs @@ -49,6 +49,9 @@ pub struct SyncConfig { #[serde(rename = "fetchResourceEvents", default = "default_true")] pub fetch_resource_events: bool, + + #[serde(rename = "fetchMrFileChanges", default = "default_true")] + pub fetch_mr_file_changes: bool, } fn default_true() -> bool { @@ -66,6 +69,7 @@ impl Default for SyncConfig { dependent_concurrency: 8, requests_per_second: 30.0, fetch_resource_events: true, + fetch_mr_file_changes: true, } } } diff --git a/src/core/db.rs b/src/core/db.rs index 0e2feb5..145ed9e 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -57,6 +57,14 @@ const MIGRATIONS: &[(&str, &str)] = &[ "018", include_str!("../../migrations/018_fix_assignees_composite_index.sql"), ), + ( + "019", + include_str!("../../migrations/019_list_performance.sql"), + ), + ( + "020", + include_str!("../../migrations/020_mr_diffs_watermark.sql"), + ), ]; pub fn create_connection(db_path: &Path) -> Result { diff --git a/src/gitlab/client.rs b/src/gitlab/client.rs index 59610c9..e1e3600 100644 --- a/src/gitlab/client.rs +++ b/src/gitlab/client.rs @@ -12,7 +12,7 @@ use tracing::{debug, warn}; use super::types::{ GitLabDiscussion, GitLabIssue, GitLabIssueRef, GitLabLabelEvent, GitLabMergeRequest, - GitLabMilestoneEvent, GitLabProject, GitLabStateEvent, GitLabUser, GitLabVersion, + GitLabMilestoneEvent, GitLabMrDiff, GitLabProject, GitLabStateEvent, GitLabUser, GitLabVersion, }; use crate::core::error::{LoreError, Result}; @@ -609,6 +609,15 @@ impl GitLabClient { self.fetch_all_pages(&path).await } + pub async fn fetch_mr_diffs( + &self, + gitlab_project_id: i64, + iid: i64, + ) -> Result> { + let path = format!("/api/v4/projects/{gitlab_project_id}/merge_requests/{iid}/diffs"); + coalesce_not_found(self.fetch_all_pages(&path).await) + } + pub async fn fetch_issue_state_events( &self, gitlab_project_id: i64, diff --git a/src/gitlab/mod.rs b/src/gitlab/mod.rs index fcec895..b43b933 100644 --- a/src/gitlab/mod.rs +++ b/src/gitlab/mod.rs @@ -9,6 +9,6 @@ pub use transformers::{ }; pub use types::{ GitLabAuthor, GitLabDiscussion, GitLabIssue, GitLabIssueRef, GitLabLabelEvent, GitLabLabelRef, - GitLabMergeRequestRef, GitLabMilestoneEvent, GitLabMilestoneRef, GitLabNote, + GitLabMergeRequestRef, GitLabMilestoneEvent, GitLabMilestoneRef, GitLabMrDiff, GitLabNote, GitLabNotePosition, GitLabProject, GitLabStateEvent, GitLabUser, GitLabVersion, }; diff --git a/src/gitlab/types.rs b/src/gitlab/types.rs index 1e7303e..02cc9bb 100644 --- a/src/gitlab/types.rs +++ b/src/gitlab/types.rs @@ -214,6 +214,18 @@ pub struct GitLabReviewer { pub name: String, } +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct GitLabMrDiff { + pub old_path: String, + pub new_path: String, + #[serde(default)] + pub new_file: bool, + #[serde(default)] + pub renamed_file: bool, + #[serde(default)] + pub deleted_file: bool, +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct GitLabMergeRequest { pub id: i64, diff --git a/src/ingestion/mod.rs b/src/ingestion/mod.rs index d422f07..a0c96c5 100644 --- a/src/ingestion/mod.rs +++ b/src/ingestion/mod.rs @@ -3,6 +3,7 @@ pub mod discussion_queue; pub mod discussions; pub mod issues; pub mod merge_requests; +pub mod mr_diffs; pub mod mr_discussions; pub mod orchestrator; diff --git a/src/ingestion/mr_diffs.rs b/src/ingestion/mr_diffs.rs new file mode 100644 index 0000000..90698a7 --- /dev/null +++ b/src/ingestion/mr_diffs.rs @@ -0,0 +1,268 @@ +use rusqlite::Connection; +use tracing::debug; + +use crate::core::error::Result; +use crate::gitlab::types::GitLabMrDiff; + +/// Derive the change type from GitLab's boolean flags. +fn derive_change_type(diff: &GitLabMrDiff) -> &'static str { + if diff.new_file { + "added" + } else if diff.renamed_file { + "renamed" + } else if diff.deleted_file { + "deleted" + } else { + "modified" + } +} + +/// Replace all file change records for a given MR with the provided diffs. +/// Uses DELETE+INSERT (simpler than UPSERT for array replacement). +pub fn upsert_mr_file_changes( + conn: &Connection, + mr_local_id: i64, + project_id: i64, + diffs: &[GitLabMrDiff], +) -> Result { + conn.execute( + "DELETE FROM mr_file_changes WHERE merge_request_id = ?1", + [mr_local_id], + )?; + + let mut stmt = conn.prepare_cached( + "INSERT INTO mr_file_changes (merge_request_id, project_id, old_path, new_path, change_type) \ + VALUES (?1, ?2, ?3, ?4, ?5)", + )?; + + let mut inserted = 0; + for diff in diffs { + let old_path = if diff.renamed_file { + Some(diff.old_path.as_str()) + } else { + None + }; + let change_type = derive_change_type(diff); + + stmt.execute(rusqlite::params![ + mr_local_id, + project_id, + old_path, + diff.new_path, + change_type, + ])?; + inserted += 1; + } + + if inserted > 0 { + debug!(inserted, mr_local_id, "Stored MR file changes"); + } + + Ok(inserted) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::db::{create_connection, run_migrations}; + use std::path::Path; + + fn setup() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + + // Insert a test project + conn.execute( + "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')", + [], + ).unwrap(); + + // Insert a test MR + conn.execute( + "INSERT INTO merge_requests (gitlab_id, iid, project_id, title, state, draft, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at) \ + VALUES (100, 1, 1, 'Test MR', 'merged', 0, 'feature', 'main', 'testuser', 1000, 2000, 3000)", + [], + ).unwrap(); + + conn + } + + #[test] + fn test_derive_change_type_added() { + let diff = GitLabMrDiff { + old_path: String::new(), + new_path: "src/new.rs".to_string(), + new_file: true, + renamed_file: false, + deleted_file: false, + }; + assert_eq!(derive_change_type(&diff), "added"); + } + + #[test] + fn test_derive_change_type_renamed() { + let diff = GitLabMrDiff { + old_path: "src/old.rs".to_string(), + new_path: "src/new.rs".to_string(), + new_file: false, + renamed_file: true, + deleted_file: false, + }; + assert_eq!(derive_change_type(&diff), "renamed"); + } + + #[test] + fn test_derive_change_type_deleted() { + let diff = GitLabMrDiff { + old_path: "src/gone.rs".to_string(), + new_path: "src/gone.rs".to_string(), + new_file: false, + renamed_file: false, + deleted_file: true, + }; + assert_eq!(derive_change_type(&diff), "deleted"); + } + + #[test] + fn test_derive_change_type_modified() { + let diff = GitLabMrDiff { + old_path: "src/lib.rs".to_string(), + new_path: "src/lib.rs".to_string(), + new_file: false, + renamed_file: false, + deleted_file: false, + }; + assert_eq!(derive_change_type(&diff), "modified"); + } + + #[test] + fn test_upsert_inserts_file_changes() { + let conn = setup(); + let diffs = [ + GitLabMrDiff { + old_path: String::new(), + new_path: "src/new.rs".to_string(), + new_file: true, + renamed_file: false, + deleted_file: false, + }, + GitLabMrDiff { + old_path: "src/lib.rs".to_string(), + new_path: "src/lib.rs".to_string(), + new_file: false, + renamed_file: false, + deleted_file: false, + }, + ]; + + let inserted = upsert_mr_file_changes(&conn, 1, 1, &diffs).unwrap(); + assert_eq!(inserted, 2); + + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM mr_file_changes WHERE merge_request_id = 1", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(count, 2); + } + + #[test] + fn test_upsert_replaces_existing() { + let conn = setup(); + let diffs_v1 = [GitLabMrDiff { + old_path: String::new(), + new_path: "src/old.rs".to_string(), + new_file: true, + renamed_file: false, + deleted_file: false, + }]; + upsert_mr_file_changes(&conn, 1, 1, &diffs_v1).unwrap(); + + let diffs_v2 = [ + GitLabMrDiff { + old_path: "src/a.rs".to_string(), + new_path: "src/a.rs".to_string(), + new_file: false, + renamed_file: false, + deleted_file: false, + }, + GitLabMrDiff { + old_path: "src/b.rs".to_string(), + new_path: "src/b.rs".to_string(), + new_file: false, + renamed_file: false, + deleted_file: false, + }, + ]; + let inserted = upsert_mr_file_changes(&conn, 1, 1, &diffs_v2).unwrap(); + assert_eq!(inserted, 2); + + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM mr_file_changes WHERE merge_request_id = 1", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(count, 2); + + // The old "src/old.rs" should be gone + let old_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM mr_file_changes WHERE new_path = 'src/old.rs'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(old_count, 0); + } + + #[test] + fn test_renamed_stores_old_path() { + let conn = setup(); + let diffs = [GitLabMrDiff { + old_path: "src/old_name.rs".to_string(), + new_path: "src/new_name.rs".to_string(), + new_file: false, + renamed_file: true, + deleted_file: false, + }]; + + upsert_mr_file_changes(&conn, 1, 1, &diffs).unwrap(); + + let (old_path, change_type): (Option, String) = conn + .query_row( + "SELECT old_path, change_type FROM mr_file_changes WHERE new_path = 'src/new_name.rs'", + [], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .unwrap(); + assert_eq!(old_path.as_deref(), Some("src/old_name.rs")); + assert_eq!(change_type, "renamed"); + } + + #[test] + fn test_non_renamed_has_null_old_path() { + let conn = setup(); + let diffs = [GitLabMrDiff { + old_path: "src/lib.rs".to_string(), + new_path: "src/lib.rs".to_string(), + new_file: false, + renamed_file: false, + deleted_file: false, + }]; + + upsert_mr_file_changes(&conn, 1, 1, &diffs).unwrap(); + + let old_path: Option = conn + .query_row( + "SELECT old_path FROM mr_file_changes WHERE new_path = 'src/lib.rs'", + [], + |r| r.get(0), + ) + .unwrap(); + assert!(old_path.is_none()); + } +} diff --git a/src/ingestion/orchestrator.rs b/src/ingestion/orchestrator.rs index 6475000..13bccbc 100644 --- a/src/ingestion/orchestrator.rs +++ b/src/ingestion/orchestrator.rs @@ -42,6 +42,9 @@ pub enum ProgressEvent { ClosesIssuesFetchStarted { total: usize }, ClosesIssueFetched { current: usize, total: usize }, ClosesIssuesFetchComplete { fetched: usize, failed: usize }, + MrDiffsFetchStarted { total: usize }, + MrDiffFetched { current: usize, total: usize }, + MrDiffsFetchComplete { fetched: usize, failed: usize }, } #[derive(Debug, Default)] @@ -76,6 +79,8 @@ pub struct IngestMrProjectResult { pub resource_events_failed: usize, pub closes_issues_fetched: usize, pub closes_issues_failed: usize, + pub mr_diffs_fetched: usize, + pub mr_diffs_failed: usize, } pub async fn ingest_project_issues( @@ -466,6 +471,31 @@ pub async fn ingest_project_merge_requests_with_progress( result.closes_issues_failed = closes_result.failed; } + if signal.is_cancelled() { + info!("Shutdown requested, returning partial MR results"); + return Ok(result); + } + + if config.sync.fetch_mr_file_changes { + let enqueued = enqueue_mr_diffs_jobs(conn, project_id)?; + if enqueued > 0 { + debug!(enqueued, "Enqueued mr_diffs jobs"); + } + + let diffs_result = drain_mr_diffs( + conn, + client, + config, + project_id, + gitlab_project_id, + &progress, + signal, + ) + .await?; + result.mr_diffs_fetched = diffs_result.fetched; + result.mr_diffs_failed = diffs_result.failed; + } + info!( mrs_fetched = result.mrs_fetched, mrs_upserted = result.mrs_upserted, @@ -479,6 +509,8 @@ pub async fn ingest_project_merge_requests_with_progress( resource_events_failed = result.resource_events_failed, closes_issues_fetched = result.closes_issues_fetched, closes_issues_failed = result.closes_issues_failed, + mr_diffs_fetched = result.mr_diffs_fetched, + mr_diffs_failed = result.mr_diffs_failed, "MR project ingestion complete" ); @@ -1188,6 +1220,235 @@ fn store_closes_issues_refs( } } +// ─── MR Diffs (file changes) ──────────────────────────────────────────────── + +fn enqueue_mr_diffs_jobs(conn: &Connection, project_id: i64) -> Result { + // Remove stale jobs for MRs that haven't changed since their last diffs sync + conn.execute( + "DELETE FROM pending_dependent_fetches \ + WHERE project_id = ?1 AND entity_type = 'merge_request' AND job_type = 'mr_diffs' \ + AND entity_local_id IN ( \ + SELECT id FROM merge_requests \ + WHERE project_id = ?1 \ + AND updated_at <= COALESCE(diffs_synced_for_updated_at, 0) \ + )", + [project_id], + )?; + + let mut stmt = conn.prepare_cached( + "SELECT id, iid FROM merge_requests \ + WHERE project_id = ?1 \ + AND updated_at > COALESCE(diffs_synced_for_updated_at, 0)", + )?; + let entities: Vec<(i64, i64)> = stmt + .query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))? + .collect::, _>>()?; + + let mut enqueued = 0; + for (local_id, iid) in &entities { + if enqueue_job( + conn, + project_id, + "merge_request", + *iid, + *local_id, + "mr_diffs", + None, + )? { + enqueued += 1; + } + } + + Ok(enqueued) +} + +struct PrefetchedMrDiffs { + job_id: i64, + entity_iid: i64, + entity_local_id: i64, + result: + std::result::Result, crate::core::error::LoreError>, +} + +async fn prefetch_mr_diffs( + client: &GitLabClient, + gitlab_project_id: i64, + job_id: i64, + entity_iid: i64, + entity_local_id: i64, +) -> PrefetchedMrDiffs { + let result = client.fetch_mr_diffs(gitlab_project_id, entity_iid).await; + PrefetchedMrDiffs { + job_id, + entity_iid, + entity_local_id, + result, + } +} + +#[instrument( + skip(conn, client, config, progress, signal), + fields(project_id, gitlab_project_id, items_processed, errors) +)] +async fn drain_mr_diffs( + conn: &Connection, + client: &GitLabClient, + config: &Config, + project_id: i64, + gitlab_project_id: i64, + progress: &Option, + signal: &ShutdownSignal, +) -> Result { + let mut result = DrainResult::default(); + let batch_size = config.sync.dependent_concurrency as usize; + + let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?; + if reclaimed > 0 { + info!(reclaimed, "Reclaimed stale mr_diffs locks"); + } + + let claimable_counts = count_claimable_jobs(conn, project_id)?; + let total_pending = claimable_counts.get("mr_diffs").copied().unwrap_or(0); + + if total_pending == 0 { + return Ok(result); + } + + let emit = |event: ProgressEvent| { + if let Some(cb) = progress { + cb(event); + } + }; + + emit(ProgressEvent::MrDiffsFetchStarted { + total: total_pending, + }); + + let mut processed = 0; + let mut seen_job_ids = std::collections::HashSet::new(); + + loop { + if signal.is_cancelled() { + info!("Shutdown requested during mr_diffs drain, returning partial results"); + break; + } + + let jobs = claim_jobs(conn, "mr_diffs", project_id, batch_size)?; + if jobs.is_empty() { + break; + } + + // Phase 1: Concurrent HTTP fetches + let futures: Vec<_> = jobs + .iter() + .filter(|j| seen_job_ids.insert(j.id)) + .map(|j| { + prefetch_mr_diffs( + client, + gitlab_project_id, + j.id, + j.entity_iid, + j.entity_local_id, + ) + }) + .collect(); + + if futures.is_empty() { + warn!("All claimed mr_diffs jobs were already processed, breaking drain loop"); + break; + } + + let prefetched = join_all(futures).await; + + // Phase 2: Serial DB writes + for p in prefetched { + match p.result { + Ok(diffs) => { + let store_result = super::mr_diffs::upsert_mr_file_changes( + conn, + p.entity_local_id, + project_id, + &diffs, + ); + + match store_result { + Ok(_) => { + let tx = conn.unchecked_transaction()?; + complete_job_tx(&tx, p.job_id)?; + update_diffs_watermark_tx(&tx, p.entity_local_id)?; + tx.commit()?; + result.fetched += 1; + } + Err(e) => { + warn!( + entity_iid = p.entity_iid, + error = %e, + "Failed to store MR file changes" + ); + fail_job(conn, p.job_id, &e.to_string())?; + result.failed += 1; + } + } + } + Err(e) => { + if e.is_permanent_api_error() { + debug!( + entity_iid = p.entity_iid, + error = %e, + "Permanent API error for mr_diffs, marking complete" + ); + let tx = conn.unchecked_transaction()?; + complete_job_tx(&tx, p.job_id)?; + update_diffs_watermark_tx(&tx, p.entity_local_id)?; + tx.commit()?; + result.skipped_not_found += 1; + } else { + warn!( + entity_iid = p.entity_iid, + error = %e, + "Failed to fetch MR diffs from GitLab" + ); + fail_job(conn, p.job_id, &e.to_string())?; + result.failed += 1; + } + } + } + + processed += 1; + emit(ProgressEvent::MrDiffFetched { + current: processed, + total: total_pending, + }); + } + } + + emit(ProgressEvent::MrDiffsFetchComplete { + fetched: result.fetched, + failed: result.failed, + }); + + if result.fetched > 0 || result.failed > 0 { + info!( + fetched = result.fetched, + failed = result.failed, + "mr_diffs drain complete" + ); + } + + tracing::Span::current().record("items_processed", result.fetched); + tracing::Span::current().record("errors", result.failed); + + Ok(result) +} + +fn update_diffs_watermark_tx(tx: &rusqlite::Transaction<'_>, mr_local_id: i64) -> Result<()> { + tx.execute( + "UPDATE merge_requests SET diffs_synced_for_updated_at = updated_at WHERE id = ?", + [mr_local_id], + )?; + Ok(()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/main.rs b/src/main.rs index ccd5126..cd404d7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use lore::Config; +use lore::cli::autocorrect::{self, CorrectionResult}; use lore::cli::commands::{ IngestDisplay, InitInputs, InitOptions, InitResult, ListFilters, MrListFilters, SearchCliFilters, SyncOptions, TimelineParams, open_issue_in_browser, open_mr_in_browser, @@ -49,10 +50,20 @@ async fn main() { // Phase 1: Early robot mode detection for structured clap errors let robot_mode_early = Cli::detect_robot_mode_from_env(); - let cli = match Cli::try_parse() { + // Phase 1.5: Pre-clap arg correction for agent typo tolerance + let raw_args: Vec = std::env::args().collect(); + let correction_result = autocorrect::correct_args(raw_args); + + // Emit correction warnings to stderr (before clap parsing, so they appear + // even if clap still fails on something else) + if !correction_result.corrections.is_empty() { + emit_correction_warnings(&correction_result, robot_mode_early); + } + + let cli = match Cli::try_parse_from(&correction_result.args) { Ok(cli) => cli, Err(e) => { - handle_clap_error(e, robot_mode_early); + handle_clap_error(e, robot_mode_early, &correction_result); } }; let robot_mode = cli.is_robot_mode(); @@ -386,9 +397,50 @@ fn handle_error(e: Box, robot_mode: bool) -> ! { std::process::exit(1); } +/// Emit stderr warnings for any corrections applied during Phase 1.5. +fn emit_correction_warnings(result: &CorrectionResult, robot_mode: bool) { + if robot_mode { + #[derive(Serialize)] + struct CorrectionWarning<'a> { + warning: CorrectionWarningInner<'a>, + } + #[derive(Serialize)] + struct CorrectionWarningInner<'a> { + r#type: &'static str, + corrections: &'a [autocorrect::Correction], + teaching: Vec, + } + + let teaching: Vec = result + .corrections + .iter() + .map(autocorrect::format_teaching_note) + .collect(); + + let warning = CorrectionWarning { + warning: CorrectionWarningInner { + r#type: "ARG_CORRECTED", + corrections: &result.corrections, + teaching, + }, + }; + if let Ok(json) = serde_json::to_string(&warning) { + eprintln!("{json}"); + } + } else { + for c in &result.corrections { + eprintln!( + "{} {}", + style("Auto-corrected:").yellow(), + autocorrect::format_teaching_note(c) + ); + } + } +} + /// Phase 1 & 4: Handle clap parsing errors with structured JSON output in robot mode. -/// Also includes fuzzy command matching to suggest similar commands. -fn handle_clap_error(e: clap::Error, robot_mode: bool) -> ! { +/// Also includes fuzzy command matching and flag-level suggestions. +fn handle_clap_error(e: clap::Error, robot_mode: bool, corrections: &CorrectionResult) -> ! { use clap::error::ErrorKind; // Always let clap handle --help and --version normally (print and exit 0). @@ -406,15 +458,58 @@ fn handle_clap_error(e: clap::Error, robot_mode: bool) -> ! { .unwrap_or("Parse error") .to_string(); - // Phase 4: Try to suggest similar command for unknown commands - let suggestion = if e.kind() == ErrorKind::InvalidSubcommand { - if let Some(invalid_cmd) = extract_invalid_subcommand(&e) { - suggest_similar_command(&invalid_cmd) - } else { - "Run 'lore robot-docs' for valid commands".to_string() + let (suggestion, correction, valid_values) = match e.kind() { + // Phase 4: Suggest similar command for unknown subcommands + ErrorKind::InvalidSubcommand => { + let suggestion = if let Some(invalid_cmd) = extract_invalid_subcommand(&e) { + suggest_similar_command(&invalid_cmd) + } else { + "Run 'lore robot-docs' for valid commands".to_string() + }; + (suggestion, None, None) } - } else { - "Run 'lore robot-docs' for valid commands".to_string() + // Flag-level fuzzy matching for unknown flags + ErrorKind::UnknownArgument => { + let invalid_flag = extract_invalid_flag(&e); + let similar = invalid_flag + .as_deref() + .and_then(|flag| autocorrect::suggest_similar_flag(flag, &corrections.args)); + let suggestion = if let Some(ref s) = similar { + format!("Did you mean '{s}'? Run 'lore robot-docs' for all flags") + } else { + "Run 'lore robot-docs' for valid flags".to_string() + }; + (suggestion, similar, None) + } + // Value-level suggestions for invalid enum values + ErrorKind::InvalidValue => { + let (flag, valid_vals) = extract_invalid_value_context(&e); + let suggestion = if let Some(vals) = &valid_vals { + format!( + "Valid values: {}. Run 'lore robot-docs' for details", + vals.join(", ") + ) + } else if let Some(ref f) = flag { + if let Some(vals) = autocorrect::valid_values_for_flag(f) { + format!("Valid values for {f}: {}", vals.join(", ")) + } else { + "Run 'lore robot-docs' for valid values".to_string() + } + } else { + "Run 'lore robot-docs' for valid values".to_string() + }; + let vals_vec = valid_vals.or_else(|| { + flag.as_deref() + .and_then(autocorrect::valid_values_for_flag) + .map(|v| v.iter().map(|s| (*s).to_string()).collect()) + }); + (suggestion, None, vals_vec) + } + _ => ( + "Run 'lore robot-docs' for valid commands".to_string(), + None, + None, + ), }; let output = RobotErrorWithSuggestion { @@ -422,6 +517,8 @@ fn handle_clap_error(e: clap::Error, robot_mode: bool) -> ! { code: error_code.to_string(), message, suggestion, + correction, + valid_values, }, }; eprintln!( @@ -467,6 +564,61 @@ fn extract_invalid_subcommand(e: &clap::Error) -> Option { None } +/// Extract the invalid flag from a clap UnknownArgument error. +/// Format is typically: "error: unexpected argument '--xyzzy' found" +fn extract_invalid_flag(e: &clap::Error) -> Option { + let msg = e.to_string(); + if let Some(start) = msg.find('\'') + && let Some(end) = msg[start + 1..].find('\'') + { + let value = &msg[start + 1..start + 1 + end]; + if value.starts_with('-') { + return Some(value.to_string()); + } + } + None +} + +/// Extract flag name and valid values from a clap InvalidValue error. +/// Returns (flag_name, valid_values_if_listed_in_error). +fn extract_invalid_value_context(e: &clap::Error) -> (Option, Option>) { + let msg = e.to_string(); + + // Try to find the flag name from "[possible values: ...]" pattern or from the arg info + // Clap format: "error: invalid value 'opend' for '--state '" + let flag = if let Some(for_pos) = msg.find("for '") { + let after_for = &msg[for_pos + 5..]; + if let Some(end) = after_for.find('\'') { + let raw = &after_for[..end]; + // Strip angle-bracket value placeholder: "--state " -> "--state" + Some(raw.split_whitespace().next().unwrap_or(raw).to_string()) + } else { + None + } + } else { + None + }; + + // Try to extract possible values from the error message + // Clap format: "[possible values: opened, closed, merged, locked, all]" + let valid_values = if let Some(pv_pos) = msg.find("[possible values: ") { + let after_pv = &msg[pv_pos + 18..]; + after_pv.find(']').map(|end| { + after_pv[..end] + .split(", ") + .map(|s| s.trim().to_string()) + .collect() + }) + } else { + // Fall back to our static registry + flag.as_deref() + .and_then(autocorrect::valid_values_for_flag) + .map(|v| v.iter().map(|s| (*s).to_string()).collect()) + }; + + (flag, valid_values) +} + /// Phase 4: Suggest similar command using fuzzy matching fn suggest_similar_command(invalid: &str) -> String { const VALID_COMMANDS: &[&str] = &[ @@ -1009,6 +1161,8 @@ async fn handle_init( code: "MISSING_FLAGS".to_string(), message: format!("Robot mode requires flags: {}", missing.join(", ")), suggestion: "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project".to_string(), + correction: None, + valid_values: None, }, }; eprintln!("{}", serde_json::to_string(&output)?); @@ -1347,6 +1501,8 @@ fn handle_backup(robot_mode: bool) -> Result<(), Box> { code: "NOT_IMPLEMENTED".to_string(), message: "The 'backup' command is not yet implemented.".to_string(), suggestion: "Use manual database backup: cp ~/.local/share/lore/lore.db ~/.local/share/lore/lore.db.bak".to_string(), + correction: None, + valid_values: None, }, }; eprintln!("{}", serde_json::to_string(&output)?); @@ -1367,6 +1523,8 @@ fn handle_reset(robot_mode: bool) -> Result<(), Box> { message: "The 'reset' command is not yet implemented.".to_string(), suggestion: "Manually delete the database: rm ~/.local/share/lore/lore.db" .to_string(), + correction: None, + valid_values: None, }, }; eprintln!("{}", serde_json::to_string(&output)?); @@ -1403,6 +1561,10 @@ struct RobotErrorSuggestionData { code: String, message: String, suggestion: String, + #[serde(skip_serializing_if = "Option::is_none")] + correction: Option, + #[serde(skip_serializing_if = "Option::is_none")] + valid_values: Option>, } async fn handle_migrate( @@ -1420,6 +1582,8 @@ async fn handle_migrate( code: "DB_ERROR".to_string(), message: format!("Database not found at {}", db_path.display()), suggestion: "Run 'lore init' first".to_string(), + correction: None, + valid_values: None, }, }; eprintln!("{}", serde_json::to_string(&output)?); @@ -1625,6 +1789,9 @@ async fn handle_sync_cmd( if args.no_events { config.sync.fetch_resource_events = false; } + if args.no_file_changes { + config.sync.fetch_mr_file_changes = false; + } let options = SyncOptions { full: args.full && !args.no_full, force: args.force && !args.no_force,