From 9a6357c353e5d6643fe6c1d6ed137e089c40a6f3 Mon Sep 17 00:00:00 2001 From: teernisse Date: Tue, 27 Jan 2026 07:43:56 -0500 Subject: [PATCH] Begin planning phase 3-5 implementation --- .beads/issues.jsonl | 16 + .beads/last-touched | 2 +- AGENTS.md | 146 +- RUST_CLI_TOOLS_BEST_PRACTICES_GUIDE.md | 2729 ++++++++++++++++++++++++ docs/prd/checkpoint-3.md | 2659 +++++++++++++++++++++++ docs/robot-mode-design.md | 239 +++ 6 files changed, 5646 insertions(+), 145 deletions(-) create mode 100644 RUST_CLI_TOOLS_BEST_PRACTICES_GUIDE.md create mode 100644 docs/prd/checkpoint-3.md create mode 100644 docs/robot-mode-design.md diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 8776be0..b0dc81f 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,3 +1,5 @@ +{"id":"bd-10f","title":"Update orchestrator for MR ingestion","description":"## Background\nOrchestrator coordinates MR ingestion followed by dependent discussion sync. Discussion sync targets are queried from DB (not collected in-memory) to handle large projects without memory growth. This is critical for projects with 10k+ MRs where collecting sync targets in memory during ingestion would cause unbounded growth.\n\n## Approach\nUpdate `src/ingestion/orchestrator.rs` to:\n1. Support `merge_requests` resource type in `run_ingestion()` match arm\n2. Query DB for MRs needing discussion sync after MR ingestion completes\n3. Execute discussion sync with bounded concurrency using `futures::stream::buffer_unordered`\n\n## Files\n- `src/ingestion/orchestrator.rs` - Update existing orchestrator\n\n## Acceptance Criteria\n- [ ] `run_ingestion()` handles `resource_type == \"merge_requests\"`\n- [ ] After MR ingestion, queries DB for MRs where `updated_at > discussions_synced_for_updated_at`\n- [ ] Discussion sync uses `dependent_concurrency` from config (default 5)\n- [ ] Each MR's discussion sync is independent (partial failures don't block others)\n- [ ] Results aggregated from MR ingestion + all discussion ingestion results\n- [ ] `cargo test orchestrator` passes\n\n## TDD Loop\nRED: `cargo test orchestrator_mr` -> merge_requests not handled\nGREEN: Add MR branch to orchestrator\nVERIFY: `cargo test orchestrator`\n\n## Struct Definition\n```rust\n/// Lightweight struct for DB query results - only fields needed for discussion sync\nstruct MrForDiscussionSync {\n local_mr_id: i64,\n iid: i64,\n updated_at: i64,\n}\n```\n\n## DB Query for Discussion Sync Targets\n```sql\nSELECT id, iid, updated_at\nFROM merge_requests\nWHERE project_id = ?\n AND (discussions_synced_for_updated_at IS NULL\n OR updated_at > discussions_synced_for_updated_at)\nORDER BY updated_at ASC;\n```\n\n## Orchestrator Flow\n```rust\npub async fn run_ingestion(\n &self,\n resource_type: &str,\n full_sync: bool,\n) -> Result {\n match resource_type {\n \"issues\" => self.run_issue_ingestion(full_sync).await,\n \"merge_requests\" => self.run_mr_ingestion(full_sync).await,\n _ => Err(GiError::InvalidArgument {\n name: \"type\".to_string(),\n value: resource_type.to_string(),\n expected: \"issues or merge_requests\".to_string(),\n }),\n }\n}\n\nasync fn run_mr_ingestion(&self, full_sync: bool) -> Result {\n // 1. Ingest MRs (handles cursor reset if full_sync)\n let mr_result = ingest_merge_requests(\n &self.conn, &self.client, &self.config,\n self.project_id, self.gitlab_project_id, full_sync,\n ).await?;\n \n // 2. Query DB for MRs needing discussion sync\n // CRITICAL: Do this AFTER ingestion, not during, to avoid memory growth\n let mrs_needing_sync: Vec = {\n let mut stmt = self.conn.prepare(\n \"SELECT id, iid, updated_at FROM merge_requests\n WHERE project_id = ? AND (discussions_synced_for_updated_at IS NULL\n OR updated_at > discussions_synced_for_updated_at)\n ORDER BY updated_at ASC\"\n )?;\n stmt.query_map([self.project_id], |row| {\n Ok(MrForDiscussionSync {\n local_mr_id: row.get(0)?,\n iid: row.get(1)?,\n updated_at: row.get(2)?,\n })\n })?.collect::, _>>()?\n };\n \n let total_needing_sync = mrs_needing_sync.len();\n info!(\"Discussion sync needed for {} MRs\", total_needing_sync);\n \n // 3. Execute discussion sync with bounded concurrency\n let concurrency = self.config.sync.dependent_concurrency.unwrap_or(5);\n \n let discussion_results: Vec> = \n futures::stream::iter(mrs_needing_sync)\n .map(|mr| {\n let conn = &self.conn;\n let client = &self.client;\n let config = &self.config;\n let project_id = self.project_id;\n let gitlab_project_id = self.gitlab_project_id;\n async move {\n ingest_mr_discussions(\n conn, client, config,\n project_id, gitlab_project_id,\n mr.iid, mr.local_mr_id, mr.updated_at,\n ).await\n }\n })\n .buffer_unordered(concurrency)\n .collect()\n .await;\n \n // 4. Aggregate results\n let mut total_discussions = 0;\n let mut total_notes = 0;\n let mut total_diffnotes = 0;\n let mut failed_syncs = 0;\n \n for result in discussion_results {\n match result {\n Ok(r) => {\n total_discussions += r.discussions_upserted;\n total_notes += r.notes_upserted;\n total_diffnotes += r.diffnotes_count;\n }\n Err(e) => {\n warn!(\"Discussion sync failed: {}\", e);\n failed_syncs += 1;\n }\n }\n }\n \n Ok(IngestResult {\n mrs_fetched: mr_result.fetched,\n mrs_upserted: mr_result.upserted,\n labels_created: mr_result.labels_created,\n assignees_linked: mr_result.assignees_linked,\n reviewers_linked: mr_result.reviewers_linked,\n discussions_synced: total_discussions,\n notes_synced: total_notes,\n diffnotes_count: total_diffnotes,\n mrs_skipped_discussion_sync: (mr_result.fetched as usize).saturating_sub(total_needing_sync),\n failed_discussion_syncs: failed_syncs,\n })\n}\n```\n\n## Required Imports\n```rust\nuse futures::stream::StreamExt;\nuse crate::ingestion::merge_requests::ingest_merge_requests;\nuse crate::ingestion::mr_discussions::{ingest_mr_discussions, IngestMrDiscussionsResult};\n```\n\n## Config Reference\n```rust\n// In config.rs or similar\npub struct SyncConfig {\n pub dependent_concurrency: Option, // Default 5\n // ... other fields\n}\n```\n\n## Edge Cases\n- Large projects: 10k+ MRs may need discussion sync - DB-driven query avoids memory growth\n- Partial failures: Each MR's discussion sync is independent; failures logged but don't stop others\n- Concurrency: Too high (>10) may hit GitLab rate limits; default 5 balances throughput with safety\n- Empty result: If no MRs need sync, discussion phase completes immediately with zero counts","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:42.731140Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:25:13.472341Z","closed_at":"2026-01-27T00:25:13.472281Z","close_reason":"Updated orchestrator for MR ingestion:\n- Added IngestMrProjectResult struct with all MR-specific metrics\n- Added ingest_project_merge_requests() and ingest_project_merge_requests_with_progress()\n- Queries DB for MRs needing discussion sync AFTER ingestion (memory-safe for large projects)\n- Added MR-specific progress events (MrsFetchStarted, MrFetched, etc.)\n- Sequential discussion sync using dependent_concurrency config\n- All 164 tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-10f","depends_on_id":"bd-20h","type":"blocks","created_at":"2026-01-26T22:08:54.915469Z","created_by":"tayloreernisse"},{"issue_id":"bd-10f","depends_on_id":"bd-ser","type":"blocks","created_at":"2026-01-26T22:08:54.805860Z","created_by":"tayloreernisse"}]} +{"id":"bd-10i","title":"Epic: CP2 Gate D - Resumability Proof","description":"## Background\nGate D validates resumability and crash recovery. Proves that cursor and watermark mechanics prevent massive refetch after interruption. This is critical for large projects where a full refetch would take hours.\n\n## Acceptance Criteria (Pass/Fail)\n- [ ] Kill mid-run, rerun -> bounded redo (not full refetch from beginning)\n- [ ] Cursor saved at page boundary (not item boundary)\n- [ ] No redundant discussion refetch after crash recovery\n- [ ] No watermark advancement on partial pagination failure\n- [ ] Single-flight lock prevents concurrent ingest runs\n- [ ] `--full` flag resets MR cursor to NULL\n- [ ] `--full` flag resets ALL `discussions_synced_for_updated_at` to NULL\n- [ ] `--force` bypasses single-flight lock\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate D: Resumability Proof ===\"\n\n# 1. Test single-flight lock\necho \"Step 1: Test single-flight lock...\"\ngi ingest --type=merge_requests &\nFIRST_PID=$!\nsleep 1\n\n# Try second ingest - should fail with lock error\nif gi ingest --type=merge_requests 2>&1 | grep -q \"lock\\|already running\"; then\n echo \" PASS: Second ingest blocked by lock\"\nelse\n echo \" FAIL: Lock not working\"\nfi\nwait $FIRST_PID 2>/dev/null || true\n\n# 2. Test --force bypasses lock\necho \"Step 2: Test --force flag...\"\ngi ingest --type=merge_requests &\nFIRST_PID=$!\nsleep 1\nif gi ingest --type=merge_requests --force 2>&1; then\n echo \" PASS: --force bypassed lock\"\nelse\n echo \" Note: --force test inconclusive\"\nfi\nwait $FIRST_PID 2>/dev/null || true\n\n# 3. Check cursor state\necho \"Step 3: Check cursor state...\"\nsqlite3 \"$DB_PATH\" \"\n SELECT resource_type, updated_at, gitlab_id\n FROM sync_cursors \n WHERE resource_type = 'merge_requests';\n\"\n\n# 4. Test crash recovery\necho \"Step 4: Test crash recovery...\"\n\n# Record current cursor\nCURSOR_BEFORE=$(sqlite3 \"$DB_PATH\" \"\n SELECT updated_at FROM sync_cursors WHERE resource_type = 'merge_requests';\n\")\necho \" Cursor before: $CURSOR_BEFORE\"\n\n# Force full sync and kill\necho \" Starting full sync then killing...\"\ngi ingest --type=merge_requests --full &\nPID=$!\nsleep 5 && kill -9 $PID 2>/dev/null || true\nwait $PID 2>/dev/null || true\n\n# Check cursor was saved (should be non-null if any page completed)\nCURSOR_AFTER=$(sqlite3 \"$DB_PATH\" \"\n SELECT updated_at FROM sync_cursors WHERE resource_type = 'merge_requests';\n\")\necho \" Cursor after kill: $CURSOR_AFTER\"\n\n# Re-run and verify bounded redo\necho \" Re-running (should resume from cursor)...\"\ntime gi ingest --type=merge_requests\n# Should be faster than first full sync\n\n# 5. Test --full reset\necho \"Step 5: Test --full resets watermarks...\"\n\n# Check watermarks before\nWATERMARKS_BEFORE=$(sqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM merge_requests \n WHERE discussions_synced_for_updated_at IS NOT NULL;\n\")\necho \" Watermarks set before --full: $WATERMARKS_BEFORE\"\n\n# Record cursor before\nCURSOR_BEFORE_FULL=$(sqlite3 \"$DB_PATH\" \"\n SELECT updated_at, gitlab_id FROM sync_cursors WHERE resource_type = 'merge_requests';\n\")\necho \" Cursor before --full: $CURSOR_BEFORE_FULL\"\n\n# Run --full\ngi ingest --type=merge_requests --full\n\n# Check cursor was reset then rebuilt\nCURSOR_AFTER_FULL=$(sqlite3 \"$DB_PATH\" \"\n SELECT updated_at, gitlab_id FROM sync_cursors WHERE resource_type = 'merge_requests';\n\")\necho \" Cursor after --full: $CURSOR_AFTER_FULL\"\n\n# Watermarks should be set again (sync completed)\nWATERMARKS_AFTER=$(sqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM merge_requests \n WHERE discussions_synced_for_updated_at IS NOT NULL;\n\")\necho \" Watermarks set after --full: $WATERMARKS_AFTER\"\n\necho \"\"\necho \"=== Gate D: PASSED ===\"\n```\n\n## Watermark Safety Test (Simulated Network Failure)\n```bash\n# This tests that watermark doesn't advance on partial failure\n# Requires ability to simulate network issues\n\n# 1. Get an MR that needs discussion sync\nMR_ID=$(sqlite3 \"$DB_PATH\" \"\n SELECT id FROM merge_requests \n WHERE discussions_synced_for_updated_at IS NULL \n OR updated_at > discussions_synced_for_updated_at\n LIMIT 1;\n\")\n\n# 2. Note current watermark\nWATERMARK_BEFORE=$(sqlite3 \"$DB_PATH\" \"\n SELECT discussions_synced_for_updated_at FROM merge_requests WHERE id = $MR_ID;\n\")\necho \"Watermark before: $WATERMARK_BEFORE\"\n\n# 3. Simulate network failure (requires network manipulation)\n# Option A: Block GitLab API temporarily\n# Option B: Run in a container with network limits\n# Option C: Use the automated test instead:\ncargo test does_not_advance_discussion_watermark_on_partial_failure\n\n# 4. Verify watermark unchanged after failure\nWATERMARK_AFTER=$(sqlite3 \"$DB_PATH\" \"\n SELECT discussions_synced_for_updated_at FROM merge_requests WHERE id = $MR_ID;\n\")\necho \"Watermark after failure: $WATERMARK_AFTER\"\n[ \"$WATERMARK_BEFORE\" = \"$WATERMARK_AFTER\" ] && echo \"PASS: Watermark preserved\"\n```\n\n## Test Commands (Quick Verification)\n```bash\n# Check cursor state:\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT * FROM sync_cursors WHERE resource_type = 'merge_requests';\n\"\n\n# Check watermark distribution:\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT \n SUM(CASE WHEN discussions_synced_for_updated_at IS NULL THEN 1 ELSE 0 END) as needs_sync,\n SUM(CASE WHEN discussions_synced_for_updated_at IS NOT NULL THEN 1 ELSE 0 END) as synced\n FROM merge_requests;\n\"\n\n# Test --full resets (check before/after):\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"SELECT COUNT(*) FROM merge_requests WHERE discussions_synced_for_updated_at IS NOT NULL;\"\ngi ingest --type=merge_requests --full\n# During full sync, watermarks should be NULL, then repopulated\n```\n\n## Critical Automated Tests\nThese tests MUST pass for Gate D:\n```bash\ncargo test does_not_advance_discussion_watermark_on_partial_failure\ncargo test full_sync_resets_discussion_watermarks\ncargo test cursor_saved_at_page_boundary\n```\n\n## Dependencies\nThis gate requires:\n- bd-mk3 (ingest command with --full and --force support)\n- bd-ser (MR ingestion with cursor mechanics)\n- bd-20h (MR discussion ingestion with watermark safety)\n- Gates A, B, C must pass first\n\n## Edge Cases\n- Very fast sync: May complete before kill signal reaches; retest with larger project\n- Lock file stale: If previous run crashed, lock file may exist; --force handles this\n- Clock skew: Cursor timestamps should use server time, not local time","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:02.124186Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.060596Z","closed_at":"2026-01-27T00:48:21.060555Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-10i","depends_on_id":"bd-mk3","type":"blocks","created_at":"2026-01-26T22:08:55.875790Z","created_by":"tayloreernisse"}]} {"id":"bd-13b","title":"[CP0] CLI entry point with Commander.js","description":"## Background\n\nCommander.js provides the CLI framework. The main entry point sets up the program with all subcommands. Uses ESM with proper shebang for npx/global installation.\n\nReference: docs/prd/checkpoint-0.md section \"CLI Commands\"\n\n## Approach\n\n**src/cli/index.ts:**\n```typescript\n#!/usr/bin/env node\n\nimport { Command } from 'commander';\nimport { version } from '../../package.json' with { type: 'json' };\nimport { initCommand } from './commands/init';\nimport { authTestCommand } from './commands/auth-test';\nimport { doctorCommand } from './commands/doctor';\nimport { versionCommand } from './commands/version';\nimport { backupCommand } from './commands/backup';\nimport { resetCommand } from './commands/reset';\nimport { syncStatusCommand } from './commands/sync-status';\n\nconst program = new Command();\n\nprogram\n .name('gi')\n .description('GitLab Inbox - Unified notification management')\n .version(version);\n\n// Global --config flag available to all commands\nprogram.option('-c, --config ', 'Path to config file');\n\n// Register subcommands\nprogram.addCommand(initCommand);\nprogram.addCommand(authTestCommand);\nprogram.addCommand(doctorCommand);\nprogram.addCommand(versionCommand);\nprogram.addCommand(backupCommand);\nprogram.addCommand(resetCommand);\nprogram.addCommand(syncStatusCommand);\n\nprogram.parse();\n```\n\nEach command file exports a Command instance:\n```typescript\n// src/cli/commands/version.ts\nimport { Command } from 'commander';\n\nexport const versionCommand = new Command('version')\n .description('Show version information')\n .action(() => {\n console.log(`gi version ${version}`);\n });\n```\n\n## Acceptance Criteria\n\n- [ ] `gi --help` shows all commands and global options\n- [ ] `gi --version` shows version from package.json\n- [ ] `gi --help` shows command-specific help\n- [ ] `gi --config ./path` passes config path to commands\n- [ ] Unknown command shows error and suggests --help\n- [ ] Exit code 0 on success, non-zero on error\n- [ ] Shebang line works for npx execution\n\n## Files\n\nCREATE:\n- src/cli/index.ts (main entry point)\n- src/cli/commands/version.ts (simple command as template)\n\nMODIFY (later beads):\n- package.json (add \"bin\" field pointing to dist/cli/index.js)\n\n## TDD Loop\n\nN/A for CLI entry point - verify with manual testing:\n\n```bash\nnpm run build\nnode dist/cli/index.js --help\nnode dist/cli/index.js version\nnode dist/cli/index.js unknown-command # should error\n```\n\n## Edge Cases\n\n- package.json import requires Node 20+ with { type: 'json' } assertion\n- Alternative: read version from package.json with readFileSync\n- Command registration order affects help display - alphabetical preferred\n- Global options must be defined before subcommands","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:50.499023Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:10:49.224627Z","closed_at":"2026-01-25T03:10:49.224499Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-13b","depends_on_id":"bd-gg1","type":"blocks","created_at":"2026-01-24T16:13:09.370408Z","created_by":"tayloreernisse"}]} {"id":"bd-140","title":"[CP1] Database migration 002_issues.sql","description":"Create migration file with tables for issues, labels, issue_labels, discussions, and notes.\n\nTables to create:\n- issues: gitlab_id, project_id, iid, title, description, state, author_username, timestamps, web_url, raw_payload_id\n- labels: gitlab_id, project_id, name, color, description (unique on project_id+name)\n- issue_labels: junction table\n- discussions: gitlab_discussion_id, project_id, issue_id, noteable_type, individual_note, timestamps, resolvable/resolved\n- notes: gitlab_id, discussion_id, project_id, type, is_system, author_username, body, timestamps, position, resolution fields, DiffNote position fields\n\nInclude appropriate indexes:\n- idx_issues_project_updated, idx_issues_author, uq_issues_project_iid\n- uq_labels_project_name, idx_labels_name\n- idx_issue_labels_label\n- uq_discussions_project_discussion_id, idx_discussions_issue/mr/last_note\n- idx_notes_discussion/author/system\n\nFiles: migrations/002_issues.sql\nDone when: Migration applies cleanly on top of 001_initial.sql","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:18:53.954039Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.154936Z","deleted_at":"2026-01-25T15:21:35.154934Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-157","title":"[CP1] Issue transformer with label extraction","description":"Transform GitLab issue payloads to normalized database schema.\n\n## Module\nsrc/gitlab/transformers/issue.rs\n\n## Structs\n\n### NormalizedIssue\n- gitlab_id: i64\n- project_id: i64 (local DB project ID)\n- iid: i64\n- title: String\n- description: Option\n- state: String\n- author_username: String\n- created_at, updated_at, last_seen_at: i64 (ms epoch)\n- web_url: String\n\n### NormalizedLabel (CP1: name-only)\n- project_id: i64\n- name: String\n\n## Functions\n\n### transform_issue(gitlab_issue: &GitLabIssue, local_project_id: i64) -> NormalizedIssue\n- Convert ISO timestamps to ms epoch using iso_to_ms()\n- Set last_seen_at to now_ms()\n- Clone string fields\n\n### extract_labels(gitlab_issue: &GitLabIssue, local_project_id: i64) -> Vec\n- Map labels vec to NormalizedLabel structs\n\nFiles: \n- src/gitlab/transformers/mod.rs\n- src/gitlab/transformers/issue.rs\nTests: tests/issue_transformer_tests.rs\nDone when: Unit tests pass for payload transformation and label extraction","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:42:47.719562Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.736142Z","deleted_at":"2026-01-25T17:02:01.736129Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} @@ -16,10 +18,12 @@ {"id":"bd-1qz","title":"[CP1] Database migration 002_issues.sql","description":"Create migration file with tables for issues, labels, issue_labels, discussions, and notes.\n\n## Tables\n\n### issues\n- id INTEGER PRIMARY KEY\n- gitlab_id INTEGER UNIQUE NOT NULL\n- project_id INTEGER NOT NULL REFERENCES projects(id)\n- iid INTEGER NOT NULL\n- title TEXT, description TEXT, state TEXT\n- author_username TEXT\n- created_at, updated_at, last_seen_at INTEGER (ms epoch UTC)\n- discussions_synced_for_updated_at INTEGER (watermark for dependent sync)\n- web_url TEXT\n- raw_payload_id INTEGER REFERENCES raw_payloads(id)\n\n### labels (name-only for CP1)\n- id INTEGER PRIMARY KEY\n- gitlab_id INTEGER (optional, for future Labels API)\n- project_id INTEGER NOT NULL REFERENCES projects(id)\n- name TEXT NOT NULL\n- color TEXT, description TEXT (nullable, deferred)\n- UNIQUE(project_id, name)\n\n### issue_labels (junction)\n- issue_id, label_id with CASCADE DELETE\n- Clear existing links before INSERT to handle removed labels\n\n### discussions\n- gitlab_discussion_id TEXT (string ID from API)\n- project_id, issue_id/merge_request_id FKs\n- noteable_type TEXT ('Issue' | 'MergeRequest')\n- individual_note INTEGER, first_note_at, last_note_at, last_seen_at\n- resolvable, resolved flags\n- CHECK constraint for Issue vs MR exclusivity\n\n### notes\n- gitlab_id INTEGER UNIQUE NOT NULL\n- discussion_id, project_id FKs\n- note_type, is_system, author_username, body\n- timestamps, position (array order)\n- resolution fields, DiffNote position fields\n\n## Indexes\n- idx_issues_project_updated, idx_issues_author, idx_issues_discussions_sync\n- uq_issues_project_iid, uq_labels_project_name\n- idx_issue_labels_label\n- uq_discussions_project_discussion_id, idx_discussions_issue/mr/last_note\n- idx_notes_discussion/author/system\n\nFiles: migrations/002_issues.sql\nDone when: Migration applies cleanly on top of 001_initial.sql, schema_version = 2","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:42:31.464544Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.685262Z","deleted_at":"2026-01-25T17:02:01.685258Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-1re","title":"[CP1] gi show issue command","description":"Show issue details with discussions.\n\nFlags:\n- --project=PATH (required if iid is ambiguous across projects)\n\nOutput:\n- Title, project, state, author, dates, labels, URL\n- Description text\n- All discussions with notes (formatted thread view)\n\nHandle ambiguity: If multiple projects have same iid, prompt for --project or show error.\n\nFiles: src/cli/commands/show.ts\nDone when: Issue detail view displays all fields including threaded discussions","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:29.826786Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.153211Z","deleted_at":"2026-01-25T15:21:35.153208Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-1s1","title":"[CP1] Integration tests for issue ingestion","description":"Full integration tests for issue ingestion module.\n\n## Tests (tests/issue_ingestion_tests.rs)\n\n- inserts_issues_into_database\n- creates_labels_from_issue_payloads\n- links_issues_to_labels_via_junction_table\n- removes_stale_label_links_on_resync\n- stores_raw_payload_for_each_issue\n- stores_raw_payload_for_each_discussion\n- updates_cursor_incrementally_per_page\n- resumes_from_cursor_on_subsequent_runs\n- handles_issues_with_no_labels\n- upserts_existing_issues_on_refetch\n- skips_discussion_refetch_for_unchanged_issues\n\n## Test Setup\n- tempfile::TempDir for isolated database\n- wiremock::MockServer for GitLab API\n- Mock handlers returning fixture data\n\nFiles: tests/issue_ingestion_tests.rs\nDone when: All integration tests pass with mocked GitLab","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:59:12.158586Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:02.109109Z","deleted_at":"2026-01-25T17:02:02.109105Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} +{"id":"bd-1t4","title":"Epic: CP2 Gate C - Dependent Discussion Sync","description":"## Background\nGate C validates the dependent discussion sync with DiffNote position capture. This is critical for code review context preservation - without DiffNote positions, we lose the file/line context for review comments.\n\n## Acceptance Criteria (Pass/Fail)\n- [ ] Discussions fetched for MRs with updated_at > discussions_synced_for_updated_at\n- [ ] `SELECT COUNT(*) FROM discussions WHERE merge_request_id IS NOT NULL` > 0\n- [ ] DiffNotes have `position_new_path` populated (file path)\n- [ ] DiffNotes have `position_new_line` populated (line number)\n- [ ] DiffNotes have `position_type` populated (text/image/file)\n- [ ] DiffNotes have SHA triplet: `position_base_sha`, `position_start_sha`, `position_head_sha`\n- [ ] Multi-line DiffNotes have `position_line_range_start` and `position_line_range_end`\n- [ ] Unchanged MRs skip discussion refetch (watermark comparison works)\n- [ ] Watermark NOT advanced on HTTP error mid-pagination\n- [ ] Watermark NOT advanced on note timestamp parse failure\n- [ ] `gi show mr ` displays DiffNote with file context `[path:line]`\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate C: Dependent Discussion Sync ===\"\n\n# 1. Check discussion count for MRs\necho \"Step 1: Check MR discussion count...\"\nMR_DISC_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM discussions WHERE merge_request_id IS NOT NULL;\")\necho \" MR discussions: $MR_DISC_COUNT\"\n[ \"$MR_DISC_COUNT\" -gt 0 ] || { echo \"FAIL: No MR discussions found\"; exit 1; }\n\n# 2. Check note count\necho \"Step 2: Check note count...\"\nNOTE_COUNT=$(sqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM notes n\n JOIN discussions d ON d.id = n.discussion_id\n WHERE d.merge_request_id IS NOT NULL;\n\")\necho \" MR notes: $NOTE_COUNT\"\n\n# 3. Check DiffNote position data\necho \"Step 3: Check DiffNote positions...\"\nDIFFNOTE_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM notes WHERE position_new_path IS NOT NULL;\")\necho \" DiffNotes with position: $DIFFNOTE_COUNT\"\n\n# 4. Sample DiffNote data\necho \"Step 4: Sample DiffNote data...\"\nsqlite3 \"$DB_PATH\" \"\n SELECT \n n.gitlab_id,\n n.position_new_path,\n n.position_new_line,\n n.position_type,\n SUBSTR(n.position_head_sha, 1, 7) as head_sha\n FROM notes n\n WHERE n.position_new_path IS NOT NULL\n LIMIT 5;\n\"\n\n# 5. Check multi-line DiffNotes\necho \"Step 5: Check multi-line DiffNotes...\"\nMULTILINE_COUNT=$(sqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM notes \n WHERE position_line_range_start IS NOT NULL \n AND position_line_range_end IS NOT NULL\n AND position_line_range_start != position_line_range_end;\n\")\necho \" Multi-line DiffNotes: $MULTILINE_COUNT\"\n\n# 6. Check watermarks set\necho \"Step 6: Check watermarks...\"\nWATERMARKED=$(sqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM merge_requests \n WHERE discussions_synced_for_updated_at IS NOT NULL;\n\")\necho \" MRs with watermark set: $WATERMARKED\"\n\n# 7. Check last_seen_at for sweep pattern\necho \"Step 7: Check last_seen_at (sweep pattern)...\"\nsqlite3 \"$DB_PATH\" \"\n SELECT \n MIN(last_seen_at) as oldest,\n MAX(last_seen_at) as newest\n FROM discussions \n WHERE merge_request_id IS NOT NULL;\n\"\n\n# 8. Test show command with DiffNote\necho \"Step 8: Find MR with DiffNotes for show test...\"\nMR_IID=$(sqlite3 \"$DB_PATH\" \"\n SELECT DISTINCT m.iid\n FROM merge_requests m\n JOIN discussions d ON d.merge_request_id = m.id\n JOIN notes n ON n.discussion_id = d.id\n WHERE n.position_new_path IS NOT NULL\n LIMIT 1;\n\")\nif [ -n \"$MR_IID\" ]; then\n echo \" Testing: gi show mr $MR_IID\"\n gi show mr \"$MR_IID\" | head -50\nfi\n\n# 9. Re-run and verify skip count\necho \"Step 9: Re-run ingest (should skip unchanged MRs)...\"\ngi ingest --type=merge_requests\n# Should report \"Skipped discussion sync for N unchanged MRs\"\n\necho \"\"\necho \"=== Gate C: PASSED ===\"\n```\n\n## Atomicity Test (Manual - Kill Test)\n```bash\n# This tests that partial failure preserves data\n\n# 1. Get an MR with discussions\nMR_ID=$(sqlite3 \"$DB_PATH\" \"\n SELECT m.id FROM merge_requests m\n JOIN discussions d ON d.merge_request_id = m.id\n LIMIT 1;\n\")\n\n# 2. Note current note count\nBEFORE=$(sqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM notes n\n JOIN discussions d ON d.id = n.discussion_id\n WHERE d.merge_request_id = $MR_ID;\n\")\necho \"Notes before: $BEFORE\"\n\n# 3. Note watermark\nWATERMARK_BEFORE=$(sqlite3 \"$DB_PATH\" \"\n SELECT discussions_synced_for_updated_at FROM merge_requests WHERE id = $MR_ID;\n\")\necho \"Watermark before: $WATERMARK_BEFORE\"\n\n# 4. Force full sync and kill mid-run\ngi ingest --type=merge_requests --full &\nPID=$!\nsleep 3 && kill -9 $PID 2>/dev/null || true\nwait $PID 2>/dev/null || true\n\n# 5. Verify notes preserved (should be same or more, never less)\nAFTER=$(sqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM notes n\n JOIN discussions d ON d.id = n.discussion_id\n WHERE d.merge_request_id = $MR_ID;\n\")\necho \"Notes after kill: $AFTER\"\n[ \"$AFTER\" -ge \"$BEFORE\" ] || echo \"WARNING: Notes decreased - atomicity may be broken\"\n\n# 6. Note watermark should NOT have advanced if killed mid-pagination\nWATERMARK_AFTER=$(sqlite3 \"$DB_PATH\" \"\n SELECT discussions_synced_for_updated_at FROM merge_requests WHERE id = $MR_ID;\n\")\necho \"Watermark after: $WATERMARK_AFTER\"\n```\n\n## Test Commands (Quick Verification)\n```bash\n# Check DiffNote data:\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT \n (SELECT COUNT(*) FROM discussions WHERE merge_request_id IS NOT NULL) as mr_discussions,\n (SELECT COUNT(*) FROM notes WHERE position_new_path IS NOT NULL) as diffnotes,\n (SELECT COUNT(*) FROM merge_requests WHERE discussions_synced_for_updated_at IS NOT NULL) as watermarked;\n\"\n\n# Find MR with DiffNotes and show it:\ngi show mr $(sqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT DISTINCT m.iid FROM merge_requests m\n JOIN discussions d ON d.merge_request_id = m.id\n JOIN notes n ON n.discussion_id = d.id\n WHERE n.position_new_path IS NOT NULL LIMIT 1;\n\")\n```\n\n## Dependencies\nThis gate requires:\n- bd-3j6 (Discussion transformer with DiffNote position extraction)\n- bd-20h (MR discussion ingestion with atomicity guarantees)\n- bd-iba (Client pagination for MR discussions)\n- Gates A and B must pass first\n\n## Edge Cases\n- MRs without discussions: should sync successfully, just with 0 discussions\n- Discussions without DiffNotes: regular comments have NULL position fields\n- Deleted discussions in GitLab: sweep pattern should remove them locally\n- Invalid note timestamps: should NOT advance watermark, should log warning","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:01.769694Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.060017Z","closed_at":"2026-01-27T00:48:21.059974Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1t4","depends_on_id":"bd-20h","type":"blocks","created_at":"2026-01-26T22:08:55.778989Z","created_by":"tayloreernisse"}]} {"id":"bd-1ta","title":"[CP1] Integration tests for pagination","description":"Integration tests for GitLab pagination with wiremock.\n\n## Tests (tests/pagination_tests.rs)\n\n### Page Navigation\n- fetches_all_pages_when_multiple_exist\n- respects_per_page_parameter\n- follows_x_next_page_header_until_empty\n- falls_back_to_empty_page_stop_if_headers_missing\n\n### Cursor Behavior\n- applies_cursor_rewind_for_tuple_semantics\n- clamps_negative_rewind_to_zero\n\n## Test Setup\n- Use wiremock::MockServer\n- Set up handlers for /api/v4/projects/:id/issues\n- Return x-next-page headers\n- Verify request params (updated_after, per_page)\n\nFiles: tests/pagination_tests.rs\nDone when: All pagination tests pass with mocked server","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:59:07.806593Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:02.038945Z","deleted_at":"2026-01-25T17:02:02.038939Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-1ut","title":"[CP0] Final validation - tests, lint, typecheck","description":"## Background\n\nFinal validation ensures everything works together before marking CP0 complete. This is the integration gate - all unit tests, integration tests, lint, and type checking must pass. Manual smoke tests verify the full user experience.\n\nReference: docs/prd/checkpoint-0.md sections \"Definition of Done\", \"Manual Smoke Tests\"\n\n## Approach\n\n**Automated checks:**\n```bash\n# All tests pass\nnpm run test\n\n# TypeScript strict mode\nnpm run build # or: npx tsc --noEmit\n\n# ESLint with no errors\nnpm run lint\n```\n\n**Manual smoke tests (from PRD table):**\n\n| Command | Expected | Pass Criteria |\n|---------|----------|---------------|\n| `gi --help` | Command list | Shows all commands |\n| `gi version` | Version number | Shows installed version |\n| `gi init` | Interactive prompts | Creates valid config |\n| `gi init` (config exists) | Confirmation prompt | Warns before overwriting |\n| `gi init --force` | No prompt | Overwrites without asking |\n| `gi auth-test` | `Authenticated as @username` | Shows GitLab username |\n| `GITLAB_TOKEN=invalid gi auth-test` | Error message | Non-zero exit, clear error |\n| `gi doctor` | Status table | All required checks pass |\n| `gi doctor --json` | JSON object | Valid JSON, `success: true` |\n| `gi backup` | Backup path | Creates timestamped backup |\n| `gi sync-status` | No runs message | Stub output works |\n\n**Definition of Done gate items:**\n- [ ] `gi init` writes config to XDG path and validates projects against GitLab\n- [ ] `gi auth-test` succeeds with real PAT\n- [ ] `gi doctor` reports DB ok + GitLab ok\n- [ ] DB migrations apply; WAL + FK enabled; busy_timeout + synchronous set\n- [ ] App lock mechanism works (concurrent runs blocked)\n- [ ] All unit tests pass\n- [ ] All integration tests pass (mocked)\n- [ ] ESLint passes with no errors\n- [ ] TypeScript compiles with strict mode\n\n## Acceptance Criteria\n\n- [ ] `npm run test` exits 0 (all tests pass)\n- [ ] `npm run build` exits 0 (TypeScript compiles)\n- [ ] `npm run lint` exits 0 (no ESLint errors)\n- [ ] All 11 manual smoke tests pass\n- [ ] All 9 Definition of Done gate items verified\n\n## Files\n\nNo new files created. This bead verifies existing work.\n\n## TDD Loop\n\nThis IS the final verification step:\n\n```bash\n# Automated\nnpm run test\nnpm run build\nnpm run lint\n\n# Manual (requires GITLAB_TOKEN set with valid token)\ngi --help\ngi version\ngi init # go through setup\ngi auth-test\ngi doctor\ngi doctor --json | jq .success # should output true\ngi backup\ngi sync-status\ngi reset --confirm\ngi init # re-setup\n```\n\n## Edge Cases\n\n- Test coverage should be reasonable (aim for 80%+ on core modules)\n- Integration tests may flake on CI - check MSW setup\n- Manual tests require real GitLab token - document in README\n- ESLint may warn vs error - only errors block\n- TypeScript noImplicitAny catches missed types","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:52.078907Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:37:51.858558Z","closed_at":"2026-01-25T03:37:51.858474Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1ut","depends_on_id":"bd-1cb","type":"blocks","created_at":"2026-01-24T16:13:11.184261Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ut","depends_on_id":"bd-1gu","type":"blocks","created_at":"2026-01-24T16:13:11.168637Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ut","depends_on_id":"bd-1kh","type":"blocks","created_at":"2026-01-24T16:13:11.219042Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ut","depends_on_id":"bd-38e","type":"blocks","created_at":"2026-01-24T16:13:11.150286Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ut","depends_on_id":"bd-3kj","type":"blocks","created_at":"2026-01-24T16:13:11.200998Z","created_by":"tayloreernisse"}]} {"id":"bd-1yu","title":"[CP1] GitLab types for issues, discussions, notes","description":"Add TypeScript interfaces for GitLab API responses.\n\nTypes to add to src/gitlab/types.ts:\n- GitLabIssue: id, iid, project_id, title, description, state, timestamps, author, labels[], labels_details?, web_url\n- GitLabDiscussion: id (string), individual_note, notes[]\n- GitLabNote: id, type, body, author, timestamps, system, resolvable, resolved, resolved_by, resolved_at, position?\n\nFiles: src/gitlab/types.ts\nDone when: Types compile and match GitLab API documentation","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:19:00.558718Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.153996Z","deleted_at":"2026-01-25T15:21:35.153993Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-208","title":"[CP1] Issue ingestion module","description":"## Background\n\nThe issue ingestion module fetches and stores issues with cursor-based incremental sync. It is the primary data ingestion component, establishing the pattern reused for MR ingestion in CP2. The module handles tuple-cursor semantics, raw payload storage, label extraction, and tracking which issues need discussion sync.\n\n## Approach\n\n### Module: src/ingestion/issues.rs\n\n### Key Structs\n\n```rust\n#[derive(Debug, Default)]\npub struct IngestIssuesResult {\n pub fetched: usize,\n pub upserted: usize,\n pub labels_created: usize,\n pub issues_needing_discussion_sync: Vec,\n}\n\n#[derive(Debug, Clone)]\npub struct IssueForDiscussionSync {\n pub local_issue_id: i64,\n pub iid: i64,\n pub updated_at: i64, // ms epoch\n}\n```\n\n### Main Function\n\n```rust\npub async fn ingest_issues(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n project_id: i64, // Local DB project ID\n gitlab_project_id: i64,\n) -> Result\n```\n\n### Logic (Step by Step)\n\n1. **Get current cursor** from sync_cursors table:\n```sql\nSELECT updated_at_cursor, tie_breaker_id\nFROM sync_cursors\nWHERE project_id = ? AND resource_type = 'issues'\n```\n\n2. **Call pagination method** with cursor rewind:\n```rust\nlet issues_stream = client.paginate_issues(\n gitlab_project_id,\n cursor.updated_at_cursor,\n config.sync.cursor_rewind_seconds,\n);\n```\n\n3. **Apply local filtering** for tuple cursor semantics:\n```rust\n// Skip if issue.updated_at < cursor_updated_at\n// Skip if issue.updated_at == cursor_updated_at AND issue.gitlab_id <= cursor_gitlab_id\nfn passes_cursor_filter(issue: &GitLabIssue, cursor: &SyncCursor) -> bool {\n if issue.updated_at < cursor.updated_at_cursor {\n return false;\n }\n if issue.updated_at == cursor.updated_at_cursor \n && issue.gitlab_id <= cursor.tie_breaker_id {\n return false;\n }\n true\n}\n```\n\n4. **For each issue passing filter**:\n```rust\n// Begin transaction (unchecked_transaction for rusqlite)\nlet tx = conn.unchecked_transaction()?;\n\n// Store raw payload (compressed based on config)\nlet payload_id = store_raw_payload(&tx, &issue_json, config.storage.compress_raw_payloads)?;\n\n// Transform and upsert issue\nlet issue_row = transform_issue(&issue)?;\nupsert_issue(&tx, &issue_row, project_id, payload_id)?;\nlet local_issue_id = get_local_issue_id(&tx, project_id, issue.iid)?;\n\n// Clear existing label links (stale removal!)\ntx.execute(\"DELETE FROM issue_labels WHERE issue_id = ?\", [local_issue_id])?;\n\n// Extract and upsert labels\nfor label_name in &issue_row.label_names {\n let label_id = upsert_label(&tx, project_id, label_name)?;\n link_issue_label(&tx, local_issue_id, label_id)?;\n}\n\ntx.commit()?;\n```\n\n5. **Incremental cursor update** every 100 issues:\n```rust\nif batch_count % 100 == 0 {\n update_sync_cursor(conn, project_id, \"issues\", last_updated_at, last_gitlab_id)?;\n}\n```\n\n6. **Final cursor update** after all issues processed\n\n7. **Determine issues needing discussion sync**:\n```sql\nSELECT id, iid, updated_at\nFROM issues\nWHERE project_id = ?\n AND updated_at > COALESCE(discussions_synced_for_updated_at, 0)\n```\n\n### Helper Functions\n\n```rust\nfn store_raw_payload(conn, json: &Value, compress: bool) -> Result\nfn upsert_issue(conn, issue: &IssueRow, project_id: i64, payload_id: i64) -> Result<()>\nfn get_local_issue_id(conn, project_id: i64, iid: i64) -> Result\nfn upsert_label(conn, project_id: i64, name: &str) -> Result\nfn link_issue_label(conn, issue_id: i64, label_id: i64) -> Result<()>\nfn update_sync_cursor(conn, project_id: i64, resource: &str, updated_at: i64, gitlab_id: i64) -> Result<()>\n```\n\n### Critical Invariant\n\nStale label links MUST be removed on resync. The \"DELETE then INSERT\" pattern ensures GitLab reality is reflected locally. If an issue had labels [A, B] and now has [A, C], the B link must be removed.\n\n## Acceptance Criteria\n\n- [ ] `ingest_issues` returns IngestIssuesResult with all counts\n- [ ] Cursor fetched from sync_cursors at start\n- [ ] Cursor rewind applied before API call\n- [ ] Local filtering skips already-processed issues\n- [ ] Each issue wrapped in transaction for atomicity\n- [ ] Raw payload stored with correct compression\n- [ ] Issue upserted (INSERT OR REPLACE pattern)\n- [ ] Existing label links deleted before new links inserted\n- [ ] Labels upserted (INSERT OR IGNORE by project+name)\n- [ ] Cursor updated every 100 issues (crash recovery)\n- [ ] Final cursor update after all issues\n- [ ] issues_needing_discussion_sync populated correctly\n\n## Files\n\n- src/ingestion/mod.rs (add `pub mod issues;`)\n- src/ingestion/issues.rs (create)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/issue_ingestion_tests.rs\n#[tokio::test] async fn ingests_issues_from_stream()\n#[tokio::test] async fn applies_cursor_filter_correctly()\n#[tokio::test] async fn updates_cursor_every_100_issues()\n#[tokio::test] async fn stores_raw_payload_for_each_issue()\n#[tokio::test] async fn upserts_issues_correctly()\n\n// tests/label_linkage_tests.rs\n#[tokio::test] async fn extracts_and_stores_labels()\n#[tokio::test] async fn removes_stale_label_links_on_resync()\n#[tokio::test] async fn handles_empty_labels_array()\n\n// tests/discussion_eligibility_tests.rs\n#[tokio::test] async fn identifies_issues_needing_discussion_sync()\n#[tokio::test] async fn skips_issues_with_current_watermark()\n```\n\nGREEN: Implement ingest_issues with all helper functions\n\nVERIFY: `cargo test issue_ingestion && cargo test label_linkage && cargo test discussion_eligibility`\n\n## Edge Cases\n\n- Empty issues stream - return result with all zeros\n- Cursor at epoch 0 - fetch all issues (no filtering)\n- Issue with no labels - empty Vec, no label links created\n- Issue with 50+ labels - all should be linked\n- Crash mid-batch - cursor at last 100-boundary, some issues re-fetched\n- Label already exists - upsert via INSERT OR IGNORE\n- Same issue fetched twice (due to rewind) - upsert handles it","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.245404Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:52:38.003964Z","closed_at":"2026-01-25T22:52:38.003868Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-208","depends_on_id":"bd-2iq","type":"blocks","created_at":"2026-01-25T17:04:05.425224Z","created_by":"tayloreernisse"},{"issue_id":"bd-208","depends_on_id":"bd-3nd","type":"blocks","created_at":"2026-01-25T17:04:05.450341Z","created_by":"tayloreernisse"},{"issue_id":"bd-208","depends_on_id":"bd-xhz","type":"blocks","created_at":"2026-01-25T17:04:05.473203Z","created_by":"tayloreernisse"}]} +{"id":"bd-20h","title":"Implement MR discussion ingestion module","description":"## Background\nMR discussion ingestion with critical atomicity guarantees. Parse notes BEFORE destructive DB operations to prevent data loss. Watermark ONLY advanced on full success.\n\n## Approach\nCreate `src/ingestion/mr_discussions.rs` with:\n1. `IngestMrDiscussionsResult` - Per-MR stats\n2. `ingest_mr_discussions()` - Main function with atomicity guarantees\n3. Upsert + sweep pattern for notes (not delete-all-then-insert)\n4. Sync health telemetry for debugging failures\n\n## Files\n- `src/ingestion/mr_discussions.rs` - New module\n- `tests/mr_discussion_ingestion_tests.rs` - Integration tests\n\n## Acceptance Criteria\n- [ ] `IngestMrDiscussionsResult` has: discussions_fetched, discussions_upserted, notes_upserted, notes_skipped_bad_timestamp, diffnotes_count, pagination_succeeded\n- [ ] `ingest_mr_discussions()` returns `Result`\n- [ ] CRITICAL: Notes parsed BEFORE any DELETE operations\n- [ ] CRITICAL: Watermark NOT advanced if `pagination_succeeded == false`\n- [ ] CRITICAL: Watermark NOT advanced if any note parse fails\n- [ ] Upsert + sweep pattern using `last_seen_at`\n- [ ] Stale discussions/notes removed only on full success\n- [ ] Selective raw payload storage (skip system notes without position)\n- [ ] Sync health telemetry recorded on failure\n- [ ] `does_not_advance_discussion_watermark_on_partial_failure` test passes\n- [ ] `atomic_note_replacement_preserves_data_on_parse_failure` test passes\n\n## TDD Loop\nRED: `cargo test does_not_advance_watermark` -> test fails\nGREEN: Add ingestion with atomicity guarantees\nVERIFY: `cargo test mr_discussion_ingestion`\n\n## Main Function\n```rust\npub async fn ingest_mr_discussions(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n project_id: i64,\n gitlab_project_id: i64,\n mr_iid: i64,\n local_mr_id: i64,\n mr_updated_at: i64,\n) -> Result\n```\n\n## CRITICAL: Atomic Note Replacement\n```rust\n// Record sync start time for sweep\nlet run_seen_at = now_ms();\n\nwhile let Some(discussion_result) = stream.next().await {\n let discussion = match discussion_result {\n Ok(d) => d,\n Err(e) => {\n result.pagination_succeeded = false;\n break; // Stop but don't advance watermark\n }\n };\n \n // CRITICAL: Parse BEFORE destructive operations\n let notes = match transform_notes_with_diff_position(&discussion, project_id) {\n Ok(notes) => notes,\n Err(e) => {\n warn!(\"Note transform failed; preserving existing notes\");\n result.notes_skipped_bad_timestamp += discussion.notes.len();\n result.pagination_succeeded = false;\n continue; // Skip this discussion, don't delete existing\n }\n };\n \n // Only NOW start transaction (after parse succeeded)\n let tx = conn.unchecked_transaction()?;\n \n // Upsert discussion with run_seen_at\n // Upsert notes with run_seen_at (not delete-all)\n \n tx.commit()?;\n}\n```\n\n## Stale Data Sweep (only on success)\n```rust\nif result.pagination_succeeded {\n // Sweep stale discussions\n conn.execute(\n \"DELETE FROM discussions\n WHERE project_id = ? AND merge_request_id = ?\n AND last_seen_at < ?\",\n params![project_id, local_mr_id, run_seen_at],\n )?;\n \n // Sweep stale notes\n conn.execute(\n \"DELETE FROM notes\n WHERE discussion_id IN (\n SELECT id FROM discussions\n WHERE project_id = ? AND merge_request_id = ?\n )\n AND last_seen_at < ?\",\n params![project_id, local_mr_id, run_seen_at],\n )?;\n}\n```\n\n## Watermark Update (ONLY on success)\n```rust\nif result.pagination_succeeded {\n mark_discussions_synced(conn, local_mr_id, mr_updated_at)?;\n clear_sync_health_error(conn, local_mr_id)?;\n} else {\n record_sync_health_error(conn, local_mr_id, \"Pagination incomplete or parse failure\")?;\n warn!(\"Watermark NOT advanced; will retry on next sync\");\n}\n```\n\n## Selective Payload Storage\n```rust\n// Only store payload for DiffNotes and non-system notes\nlet should_store_note_payload =\n !note.is_system() ||\n note.position_new_path().is_some() ||\n note.position_old_path().is_some();\n```\n\n## Integration Tests (CRITICAL)\n```rust\n#[tokio::test]\nasync fn does_not_advance_discussion_watermark_on_partial_failure() {\n // Setup: MR with updated_at > discussions_synced_for_updated_at\n // Mock: Page 1 returns OK, Page 2 returns 500\n // Assert: discussions_synced_for_updated_at unchanged\n}\n\n#[tokio::test]\nasync fn does_not_advance_discussion_watermark_on_note_parse_failure() {\n // Setup: Existing notes in DB\n // Mock: Discussion with note having invalid created_at\n // Assert: Original notes preserved, watermark unchanged\n}\n\n#[tokio::test]\nasync fn atomic_note_replacement_preserves_data_on_parse_failure() {\n // Setup: Discussion with 3 valid notes\n // Mock: Updated discussion where note 2 has bad timestamp\n // Assert: All 3 original notes still in DB\n}\n```\n\n## Edge Cases\n- HTTP error mid-pagination: preserve existing data, log error, no watermark advance\n- Invalid note timestamp: skip discussion, preserve existing notes\n- System notes without position: don't store raw payload (saves space)\n- Empty discussion: still upsert discussion record, no notes","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:42.335714Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:22:43.207057Z","closed_at":"2026-01-27T00:22:43.206996Z","close_reason":"Implemented MR discussion ingestion module with full atomicity guarantees:\n- IngestMrDiscussionsResult with all required fields\n- parse-before-destructive pattern (transform notes before DB ops)\n- Upsert + sweep pattern with last_seen_at timestamps\n- Watermark advanced ONLY on full pagination success\n- Selective payload storage (skip system notes without position)\n- Sync health telemetry for failure debugging\n- All 163 tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-20h","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-01-26T22:08:54.649094Z","created_by":"tayloreernisse"},{"issue_id":"bd-20h","depends_on_id":"bd-3j6","type":"blocks","created_at":"2026-01-26T22:08:54.686066Z","created_by":"tayloreernisse"},{"issue_id":"bd-20h","depends_on_id":"bd-iba","type":"blocks","created_at":"2026-01-26T22:08:54.722746Z","created_by":"tayloreernisse"}]} {"id":"bd-227","title":"[CP1] gi count issues/discussions/notes commands","description":"Count entities in the database.\n\n## Module\nsrc/cli/commands/count.rs\n\n## Clap Definition\nCount {\n #[arg(value_parser = [\"issues\", \"mrs\", \"discussions\", \"notes\"])]\n entity: String,\n \n #[arg(long, value_parser = [\"issue\", \"mr\"])]\n r#type: Option,\n}\n\n## Commands\n- gi count issues → 'Issues: N'\n- gi count discussions → 'Discussions: N'\n- gi count discussions --type=issue → 'Issue Discussions: N'\n- gi count notes → 'Notes: N (excluding M system)'\n- gi count notes --type=issue → 'Issue Notes: N (excluding M system)'\n\n## Implementation\n- Simple COUNT(*) queries\n- For notes, also count WHERE is_system = 1 for system note count\n- Filter by noteable_type when --type specified\n\nFiles: src/cli/commands/count.rs\nDone when: Counts match expected values from GitLab","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:58:25.648805Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.920135Z","deleted_at":"2026-01-25T17:02:01.920129Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-2as","title":"[CP1] Epic: Issue Ingestion","description":"Ingest all issues, labels, and issue discussions from configured GitLab repositories with resumable cursor-based incremental sync. This establishes the core data ingestion pattern reused for MRs in CP2.\n\nSuccess Criteria:\n- gi ingest --type=issues fetches all issues (count matches GitLab UI)\n- Labels extracted from issue payloads\n- Issue discussions fetched per-issue\n- Cursor-based sync is resumable\n- Sync tracking records all runs\n- Single-flight lock prevents concurrent runs\n\nReference: docs/prd/checkpoint-1.md","status":"tombstone","priority":1,"issue_type":"task","created_at":"2026-01-25T15:18:44.062057Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.155746Z","deleted_at":"2026-01-25T15:21:35.155744Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-2bu","title":"[CP1] GitLab types for issues, discussions, notes","description":"Add Rust types to src/gitlab/types.rs for GitLab API responses.\n\n## Types to Add\n\n### GitLabIssue\n- id: i64 (GitLab global ID)\n- iid: i64 (project-scoped issue number)\n- project_id: i64\n- title: String\n- description: Option\n- state: String (\"opened\" | \"closed\")\n- created_at, updated_at: String (ISO 8601)\n- closed_at: Option\n- author: GitLabAuthor\n- labels: Vec (array of label names - CP1 canonical)\n- web_url: String\nNOTE: labels_details intentionally NOT modeled - varies across GitLab versions\n\n### GitLabAuthor\n- id: i64\n- username: String\n- name: String\n\n### GitLabDiscussion\n- id: String (like \"6a9c1750b37d...\")\n- individual_note: bool\n- notes: Vec\n\n### GitLabNote\n- id: i64\n- note_type: Option (\"DiscussionNote\" | \"DiffNote\" | null)\n- body: String\n- author: GitLabAuthor\n- created_at, updated_at: String (ISO 8601)\n- system: bool\n- resolvable: bool (default false)\n- resolved: bool (default false)\n- resolved_by: Option\n- resolved_at: Option\n- position: Option\n\n### GitLabNotePosition\n- old_path, new_path: Option\n- old_line, new_line: Option\n\nFiles: src/gitlab/types.rs\nTests: Test deserialization with fixtures\nDone when: Types compile and deserialize sample API responses correctly","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:42:46.922805Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.710057Z","deleted_at":"2026-01-25T17:02:01.710051Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} @@ -36,19 +40,27 @@ {"id":"bd-2ys","title":"[CP1] Cargo.toml updates - async-stream and futures","description":"## Background\n\nThe GitLab client pagination methods require async streaming capabilities. The `async-stream` crate provides the `stream!` macro for creating async iterators, and `futures` provides `StreamExt` for consuming them with `.next()` and other combinators.\n\n## Approach\n\nAdd these dependencies to Cargo.toml:\n\n```toml\n[dependencies]\nasync-stream = \"0.3\"\nfutures = { version = \"0.3\", default-features = false, features = [\"alloc\"] }\n```\n\nUse minimal features on `futures` to avoid pulling unnecessary code.\n\n## Acceptance Criteria\n\n- [ ] `async-stream = \"0.3\"` is in Cargo.toml [dependencies]\n- [ ] `futures` with `alloc` feature is in Cargo.toml [dependencies]\n- [ ] `cargo check` succeeds after adding dependencies\n\n## Files\n\n- Cargo.toml (edit)\n\n## TDD Loop\n\nRED: Not applicable (dependency addition)\nGREEN: Add lines to Cargo.toml\nVERIFY: `cargo check`\n\n## Edge Cases\n\n- If `futures` is already present, merge features rather than duplicate\n- Use exact version pins for reproducibility","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.104664Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:25:10.274787Z","closed_at":"2026-01-25T22:25:10.274727Z","close_reason":"Added async-stream 0.3 and futures 0.3 (alloc feature) to Cargo.toml, cargo check passes","compaction_level":0,"original_size":0} {"id":"bd-2zr","title":"[CP1] GitLab client pagination methods","description":"Add async stream methods for paginated GitLab API calls.\n\n## Methods to Add to GitLabClient\n\n### paginate_issues(gitlab_project_id, updated_after, cursor_rewind_seconds) -> Stream\n- Use async_stream::try_stream! macro\n- Query params: scope=all, state=all, order_by=updated_at, sort=asc, per_page=100\n- If updated_after provided, apply cursor_rewind_seconds (subtract from timestamp)\n- Clamp to 0 to avoid underflow: (ts - rewind_ms).max(0)\n- Follow x-next-page header until empty/absent\n- Fall back to empty-page detection if headers missing\n\n### paginate_issue_discussions(gitlab_project_id, issue_iid) -> Stream\n- Paginate through discussions for single issue\n- per_page=100\n- Follow x-next-page header\n\n### request_with_headers(path, params) -> Result<(T, HeaderMap)>\n- Acquire rate limiter\n- Make request with PRIVATE-TOKEN header\n- Return both deserialized data and response headers\n\n## Dependencies\n- async-stream = \"0.3\" (for try_stream! macro)\n- futures = \"0.3\" (for Stream trait and StreamExt)\n\nFiles: src/gitlab/client.rs\nTests: tests/pagination_tests.rs\nDone when: Pagination handles multiple pages and respects cursors, tests pass","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:57:13.045971Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.784887Z","deleted_at":"2026-01-25T17:02:01.784883Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-31b","title":"[CP1] Discussion ingestion module","description":"Fetch and store discussions/notes for each issue.\n\nImplement ingestIssueDiscussions(options) → { discussionsFetched, discussionsUpserted, notesUpserted, systemNotesCount }\n\nLogic:\n1. Paginate through all discussions for given issue\n2. For each discussion:\n - Store raw payload (compressed)\n - Upsert discussion record with correct issue FK\n - Transform and upsert all notes\n - Store raw payload per note\n - Track system notes count\n\nFiles: src/ingestion/discussions.ts\nTests: tests/integration/issue-discussion-ingestion.test.ts\nDone when: Discussions and notes populated with correct FKs and is_system flags","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:19:57.131442Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.156574Z","deleted_at":"2026-01-25T15:21:35.156571Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} +{"id":"bd-31i","title":"Epic: CP2 Gate B - Labels + Assignees + Reviewers","description":"## Background\nGate B validates junction tables for labels, assignees, and reviewers. Ensures relationships are tracked correctly and stale links are removed on resync. This is critical for filtering (`--reviewer=alice`) and display.\n\n## Acceptance Criteria (Pass/Fail)\n- [ ] `mr_labels` table has rows for MRs with labels\n- [ ] Label count per MR matches GitLab UI (spot check 3 MRs)\n- [ ] `mr_assignees` table has rows for MRs with assignees\n- [ ] Assignee usernames match GitLab UI (spot check 3 MRs)\n- [ ] `mr_reviewers` table has rows for MRs with reviewers\n- [ ] Reviewer usernames match GitLab UI (spot check 3 MRs)\n- [ ] Remove label in GitLab -> resync -> link removed from mr_labels\n- [ ] Add reviewer in GitLab -> resync -> link added to mr_reviewers\n- [ ] `gi list mrs --label=bugfix` filters correctly\n- [ ] `gi list mrs --reviewer=alice` filters correctly\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate B: Labels + Assignees + Reviewers ===\"\n\n# 1. Check label linkage exists\necho \"Step 1: Check label linkage...\"\nLABEL_LINKS=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM mr_labels;\")\necho \" Total label links: $LABEL_LINKS\"\n\n# 2. Show sample label linkage\necho \"Step 2: Sample label linkage...\"\nsqlite3 \"$DB_PATH\" \"\n SELECT m.iid, GROUP_CONCAT(l.name, ', ') as labels\n FROM merge_requests m\n JOIN mr_labels ml ON ml.merge_request_id = m.id\n JOIN labels l ON l.id = ml.label_id\n GROUP BY m.id\n LIMIT 5;\n\"\n\n# 3. Check assignee linkage\necho \"Step 3: Check assignee linkage...\"\nASSIGNEE_LINKS=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM mr_assignees;\")\necho \" Total assignee links: $ASSIGNEE_LINKS\"\n\n# 4. Show sample assignee linkage\necho \"Step 4: Sample assignee linkage...\"\nsqlite3 \"$DB_PATH\" \"\n SELECT m.iid, GROUP_CONCAT(a.username, ', ') as assignees\n FROM merge_requests m\n JOIN mr_assignees a ON a.merge_request_id = m.id\n GROUP BY m.id\n LIMIT 5;\n\"\n\n# 5. Check reviewer linkage\necho \"Step 5: Check reviewer linkage...\"\nREVIEWER_LINKS=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM mr_reviewers;\")\necho \" Total reviewer links: $REVIEWER_LINKS\"\n\n# 6. Show sample reviewer linkage\necho \"Step 6: Sample reviewer linkage...\"\nsqlite3 \"$DB_PATH\" \"\n SELECT m.iid, GROUP_CONCAT(r.username, ', ') as reviewers\n FROM merge_requests m\n JOIN mr_reviewers r ON r.merge_request_id = m.id\n GROUP BY m.id\n LIMIT 5;\n\"\n\n# 7. Test filter commands\necho \"Step 7: Test filter commands...\"\n# Get a label that exists\nLABEL=$(sqlite3 \"$DB_PATH\" \"SELECT name FROM labels LIMIT 1;\")\nif [ -n \"$LABEL\" ]; then\n echo \" Testing --label=$LABEL\"\n gi list mrs --label=\"$LABEL\" --limit=3\nfi\n\n# Get a reviewer that exists\nREVIEWER=$(sqlite3 \"$DB_PATH\" \"SELECT username FROM mr_reviewers LIMIT 1;\")\nif [ -n \"$REVIEWER\" ]; then\n echo \" Testing --reviewer=$REVIEWER\"\n gi list mrs --reviewer=\"$REVIEWER\" --limit=3\nfi\n\necho \"\"\necho \"=== Gate B: PASSED ===\"\n```\n\n## Stale Link Removal Test (Manual)\n```bash\n# 1. Pick an MR with labels in GitLab UI\nMR_IID=123\n\n# 2. Note current label count\nsqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM mr_labels ml\n JOIN merge_requests m ON m.id = ml.merge_request_id\n WHERE m.iid = $MR_IID;\n\"\n# Example: 3 labels\n\n# 3. Remove a label in GitLab UI (manually)\n\n# 4. Resync\ngi ingest --type=merge_requests\n\n# 5. Verify label removed\nsqlite3 \"$DB_PATH\" \"\n SELECT COUNT(*) FROM mr_labels ml\n JOIN merge_requests m ON m.id = ml.merge_request_id\n WHERE m.iid = $MR_IID;\n\"\n# Should be: 2 labels (one less)\n```\n\n## Test Commands (Quick Verification)\n```bash\n# Check counts:\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT \n (SELECT COUNT(*) FROM mr_labels) as label_links,\n (SELECT COUNT(*) FROM mr_assignees) as assignee_links,\n (SELECT COUNT(*) FROM mr_reviewers) as reviewer_links;\n\"\n\n# Test filtering:\ngi list mrs --label=enhancement --limit=5\ngi list mrs --reviewer=alice --limit=5\ngi list mrs --assignee=bob --limit=5\n```\n\n## Dependencies\nThis gate requires:\n- bd-ser (MR ingestion with label/assignee/reviewer linking via clear-and-relink pattern)\n- Gate A must pass first\n\n## Edge Cases\n- MRs with no labels/assignees/reviewers: junction tables should have no rows for that MR\n- Labels shared across issues and MRs: labels table is shared, only junction differs\n- Usernames are case-sensitive: `Alice` != `alice`","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:01.292318Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.059422Z","closed_at":"2026-01-27T00:48:21.059378Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-31i","depends_on_id":"bd-ser","type":"blocks","created_at":"2026-01-26T22:08:55.684769Z","created_by":"tayloreernisse"}]} {"id":"bd-31m","title":"[CP1] Test fixtures for mocked GitLab responses","description":"Create mock response files for integration tests.\n\nFixtures to create:\n- gitlab-issue.json (single issue with labels)\n- gitlab-issues-page.json (paginated list)\n- gitlab-discussion.json (single discussion with notes)\n- gitlab-discussions-page.json (paginated list)\n\nInclude edge cases:\n- Issue with labels_details\n- Issue with no labels\n- Discussion with individual_note=true\n- System notes with system=true\n\nFiles: tests/fixtures/mock-responses/gitlab-issue*.json, gitlab-discussion*.json\nDone when: MSW handlers can use fixtures for deterministic tests","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:43.781288Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.155480Z","deleted_at":"2026-01-25T15:21:35.155478Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-327","title":"[CP0] Project scaffold","description":"## Background\n\nThis is the foundational scaffold for the GitLab Inbox CLI tool. Every subsequent bead depends on having the correct project structure, TypeScript configuration, and tooling in place. The configuration choices here (ESM modules, strict TypeScript, Vitest for testing) set constraints for all future code.\n\n## Approach\n\nCreate a Node.js 20+ ESM project with TypeScript strict mode. Use flat ESLint config (v9+) with TypeScript plugin. Configure Vitest with coverage. Create the directory structure matching the PRD exactly.\n\n**package.json essentials:**\n- `\"type\": \"module\"` for ESM\n- `\"bin\": { \"gi\": \"./dist/cli/index.js\" }` for CLI entry point\n- Runtime deps: better-sqlite3, sqlite-vec, commander, zod, pino, pino-pretty, ora, chalk, cli-table3, inquirer\n- Dev deps: typescript, @types/better-sqlite3, @types/node, vitest, msw, eslint, @typescript-eslint/*\n\n**tsconfig.json:**\n- `target: ES2022`, `module: Node16`, `moduleResolution: Node16`\n- `strict: true`, `noImplicitAny: true`, `strictNullChecks: true`\n- `outDir: ./dist`, `rootDir: ./src`\n\n**vitest.config.ts:**\n- Exclude `tests/live/**` unless `GITLAB_LIVE_TESTS=1`\n- Coverage with v8 provider\n\n## Acceptance Criteria\n\n- [ ] `npm install` completes without errors\n- [ ] `npm run build` compiles TypeScript to dist/\n- [ ] `npm run test` runs vitest (0 tests is fine at this stage)\n- [ ] `npm run lint` runs ESLint with no config errors\n- [ ] All directories exist: src/cli/commands/, src/core/, src/gitlab/, src/types/, tests/unit/, tests/integration/, tests/live/, tests/fixtures/mock-responses/, migrations/\n\n## Files\n\nCREATE:\n- package.json\n- tsconfig.json\n- vitest.config.ts\n- eslint.config.js\n- .gitignore\n- src/cli/index.ts (empty placeholder with shebang)\n- src/cli/commands/.gitkeep\n- src/core/.gitkeep\n- src/gitlab/.gitkeep\n- src/types/index.ts (empty)\n- tests/unit/.gitkeep\n- tests/integration/.gitkeep\n- tests/live/.gitkeep\n- tests/fixtures/mock-responses/.gitkeep\n- migrations/.gitkeep\n\n## TDD Loop\n\nN/A - scaffold only. Verify with:\n\n```bash\nnpm install\nnpm run build\nnpm run lint\nnpm run test\n```\n\n## Edge Cases\n\n- Node.js version < 20 will fail on ESM features - add `engines` field\n- better-sqlite3 requires native compilation - may need python/build-essential\n- sqlite-vec installation can fail on some platforms - document fallback","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:47.955044Z","created_by":"tayloreernisse","updated_at":"2026-01-25T02:51:25.347932Z","closed_at":"2026-01-25T02:51:25.347799Z","compaction_level":0,"original_size":0} +{"id":"bd-34o","title":"Implement MR transformer","description":"## Background\nTransforms GitLab MR API responses into normalized schema for database storage. Handles deprecated field fallbacks and extracts metadata (labels, assignees, reviewers).\n\n## Approach\nCreate new transformer module following existing issue transformer pattern:\n- `NormalizedMergeRequest` - Database-ready struct\n- `MergeRequestWithMetadata` - MR + extracted labels/assignees/reviewers\n- `transform_merge_request()` - Main transformation function\n- `extract_labels()` - Label extraction helper\n\n## Files\n- `src/gitlab/transformers/merge_request.rs` - New transformer module\n- `src/gitlab/transformers/mod.rs` - Export new module\n- `tests/mr_transformer_tests.rs` - Unit tests\n\n## Acceptance Criteria\n- [ ] `NormalizedMergeRequest` struct exists with all DB columns\n- [ ] `MergeRequestWithMetadata` contains MR + label_names + assignee_usernames + reviewer_usernames\n- [ ] `transform_merge_request()` returns `Result`\n- [ ] `draft` computed as `gitlab_mr.draft || gitlab_mr.work_in_progress`\n- [ ] `detailed_merge_status` prefers `detailed_merge_status` over `merge_status_legacy`\n- [ ] `merge_user_username` prefers `merge_user` over `merged_by`\n- [ ] `head_sha` extracted from `sha` field\n- [ ] `references_short` and `references_full` extracted from `references` Option\n- [ ] Timestamps parsed with `iso_to_ms()`, errors returned (not zeroed)\n- [ ] `last_seen_at` set to `now_ms()`\n- [ ] `cargo test mr_transformer` passes\n\n## TDD Loop\nRED: `cargo test mr_transformer` -> module not found\nGREEN: Add transformer with all fields\nVERIFY: `cargo test mr_transformer`\n\n## Struct Definitions\n```rust\n#[derive(Debug, Clone)]\npub struct NormalizedMergeRequest {\n pub gitlab_id: i64,\n pub project_id: i64,\n pub iid: i64,\n pub title: String,\n pub description: Option,\n pub state: String,\n pub draft: bool,\n pub author_username: String,\n pub source_branch: String,\n pub target_branch: String,\n pub head_sha: Option,\n pub references_short: Option,\n pub references_full: Option,\n pub detailed_merge_status: Option,\n pub merge_user_username: Option,\n pub created_at: i64,\n pub updated_at: i64,\n pub merged_at: Option,\n pub closed_at: Option,\n pub last_seen_at: i64,\n pub web_url: String,\n}\n\n#[derive(Debug, Clone)]\npub struct MergeRequestWithMetadata {\n pub merge_request: NormalizedMergeRequest,\n pub label_names: Vec,\n pub assignee_usernames: Vec,\n pub reviewer_usernames: Vec,\n}\n```\n\n## Function Signature\n```rust\npub fn transform_merge_request(\n gitlab_mr: &GitLabMergeRequest,\n local_project_id: i64,\n) -> Result\n```\n\n## Key Logic\n```rust\n// Draft: prefer draft, fallback to work_in_progress\nlet is_draft = gitlab_mr.draft || gitlab_mr.work_in_progress;\n\n// Merge status: prefer detailed_merge_status\nlet detailed_merge_status = gitlab_mr.detailed_merge_status\n .clone()\n .or_else(|| gitlab_mr.merge_status_legacy.clone());\n\n// Merge user: prefer merge_user\nlet merge_user_username = gitlab_mr.merge_user\n .as_ref()\n .map(|u| u.username.clone())\n .or_else(|| gitlab_mr.merged_by.as_ref().map(|u| u.username.clone()));\n\n// References extraction\nlet (references_short, references_full) = gitlab_mr.references\n .as_ref()\n .map(|r| (Some(r.short.clone()), Some(r.full.clone())))\n .unwrap_or((None, None));\n\n// Head SHA\nlet head_sha = gitlab_mr.sha.clone();\n```\n\n## Edge Cases\n- Invalid timestamps should return `Err`, not zero values\n- Empty labels/assignees/reviewers should return empty Vecs, not None\n- `state` must pass through as-is (including \"locked\")","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:40.849049Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:11:48.501301Z","closed_at":"2026-01-27T00:11:48.501241Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-34o","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-01-26T22:08:54.023616Z","created_by":"tayloreernisse"},{"issue_id":"bd-34o","depends_on_id":"bd-5ta","type":"blocks","created_at":"2026-01-26T22:08:54.059646Z","created_by":"tayloreernisse"}]} {"id":"bd-35r","title":"[CP1] Discussion and note transformers","description":"Transform GitLab discussion/note payloads to normalized database schema.\n\nFunctions to implement:\n- transformDiscussion(gitlabDiscussion, localProjectId, localIssueId) → NormalizedDiscussion\n- transformNotes(gitlabDiscussion, localProjectId) → NormalizedNote[]\n\nTransformation rules:\n- Compute first_note_at/last_note_at from notes array\n- Compute resolvable/resolved status from notes\n- Set is_system from note.system\n- Preserve note order via position (array index)\n- Convert ISO timestamps to ms epoch\n\nFiles: src/gitlab/transformers/discussion.ts\nTests: tests/unit/discussion-transformer.test.ts\nDone when: Unit tests pass for discussion/note transformation with system note flagging","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:19:16.861421Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.154646Z","deleted_at":"2026-01-25T15:21:35.154643Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} +{"id":"bd-36m","title":"Final validation and test coverage","description":"## Background\nFinal validation gate ensuring all CP2 features work correctly. Verifies tests, lint, and manual smoke tests pass.\n\n## Approach\nRun comprehensive validation:\n1. Automated tests (unit + integration)\n2. Clippy and formatting\n3. Critical test case verification\n4. Gate A/B/C/D/E checklist\n5. Manual smoke tests\n\n## Files\nNone - validation only\n\n## Acceptance Criteria\n- [ ] `cargo test` passes (all tests green)\n- [ ] `cargo test --release` passes\n- [ ] `cargo clippy -- -D warnings` passes (zero warnings)\n- [ ] `cargo fmt --check` passes\n- [ ] Critical tests pass (see list below)\n- [ ] Gate A/B/C/D/E verification complete\n- [ ] Manual smoke tests pass\n\n## Validation Commands\n```bash\n# 1. Build and test\ncargo build --release\ncargo test --release\n\n# 2. Lint\ncargo clippy -- -D warnings\ncargo fmt --check\n\n# 3. Run specific critical tests\ncargo test does_not_advance_discussion_watermark_on_partial_failure\ncargo test prefers_detailed_merge_status_when_both_fields_present\ncargo test prefers_merge_user_when_both_fields_present\ncargo test prefers_draft_when_both_draft_and_work_in_progress_present\ncargo test atomic_note_replacement_preserves_data_on_parse_failure\ncargo test full_sync_resets_discussion_watermarks\n```\n\n## Critical Test Cases\n| Test | What It Verifies |\n|------|------------------|\n| `does_not_advance_discussion_watermark_on_partial_failure` | Pagination failure doesn't lose data |\n| `prefers_detailed_merge_status_when_both_fields_present` | Non-deprecated field wins |\n| `prefers_merge_user_when_both_fields_present` | Non-deprecated field wins |\n| `prefers_draft_when_both_draft_and_work_in_progress_present` | OR semantics for draft |\n| `atomic_note_replacement_preserves_data_on_parse_failure` | Parse before delete |\n| `full_sync_resets_discussion_watermarks` | --full truly refreshes |\n\n## Gate Checklist\n\n### Gate A: MRs Only\n- [ ] `gi ingest --type=merge_requests` fetches all MRs\n- [ ] MR state supports: opened, merged, closed, locked\n- [ ] draft field captured with work_in_progress fallback\n- [ ] detailed_merge_status used with merge_status fallback\n- [ ] head_sha and references captured\n- [ ] Cursor-based sync is resumable\n\n### Gate B: Labels + Assignees + Reviewers\n- [ ] Labels linked via mr_labels junction\n- [ ] Stale labels removed on resync\n- [ ] Assignees linked via mr_assignees\n- [ ] Reviewers linked via mr_reviewers\n\n### Gate C: Dependent Discussion Sync\n- [ ] Discussions fetched for MRs with updated_at advancement\n- [ ] DiffNote position metadata captured\n- [ ] DiffNote SHA triplet captured\n- [ ] Upsert + sweep pattern for notes\n- [ ] Watermark NOT advanced on partial failure\n- [ ] Unchanged MRs skip discussion refetch\n\n### Gate D: Resumability Proof\n- [ ] Kill mid-run, rerun -> bounded redo\n- [ ] `--full` resets cursor AND discussion watermarks\n- [ ] Single-flight lock prevents concurrent runs\n\n### Gate E: CLI Complete\n- [ ] `gi list mrs` with all filters including --draft/--no-draft\n- [ ] `gi show mr ` with discussions and DiffNote context\n- [ ] `gi count mrs` with state breakdown\n- [ ] `gi sync-status` shows MR cursors\n\n## Manual Smoke Tests\n| Command | Expected |\n|---------|----------|\n| `gi ingest --type=merge_requests` | Completes, shows counts |\n| `gi list mrs --limit=10` | Shows 10 MRs with correct columns |\n| `gi list mrs --state=merged` | Only merged MRs |\n| `gi list mrs --draft` | Only draft MRs with [DRAFT] prefix |\n| `gi show mr ` | Full detail with discussions |\n| `gi count mrs` | Count with state breakdown |\n| Re-run ingest | \"0 new MRs\", skipped discussion count |\n| `gi ingest --type=merge_requests --full` | Full resync |\n\n## Data Integrity Checks\n```sql\n-- MR count matches GitLab\nSELECT COUNT(*) FROM merge_requests;\n\n-- Every MR has raw payload\nSELECT COUNT(*) FROM merge_requests WHERE raw_payload_id IS NULL;\n-- Should be 0\n\n-- Labels linked correctly\nSELECT m.iid, COUNT(ml.label_id) \nFROM merge_requests m\nLEFT JOIN mr_labels ml ON ml.merge_request_id = m.id\nGROUP BY m.id;\n\n-- DiffNotes have position metadata\nSELECT COUNT(*) FROM notes WHERE position_new_path IS NOT NULL;\n```","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:43.697983Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:45:17.794393Z","closed_at":"2026-01-27T00:45:17.794325Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-36m","depends_on_id":"bd-3js","type":"blocks","created_at":"2026-01-26T22:08:55.409785Z","created_by":"tayloreernisse"},{"issue_id":"bd-36m","depends_on_id":"bd-mk3","type":"blocks","created_at":"2026-01-26T22:08:55.340118Z","created_by":"tayloreernisse"}]} {"id":"bd-38e","title":"[CP0] gi init command - interactive setup wizard","description":"## Background\n\nThe init command is the user's first interaction with gi. It must guide them through setup, validate everything works before writing config, and leave the system in a ready-to-use state. Poor UX here will frustrate new users.\n\nReference: docs/prd/checkpoint-0.md section \"gi init\"\n\n## Approach\n\n**src/cli/commands/init.ts:**\n\nInteractive flow (using inquirer):\n1. Check if config exists at target path\n - If exists and no --force: prompt \"Config exists. Overwrite? [y/N]\"\n - If --non-interactive and config exists: exit 2\n2. Prompt for GitLab base URL (validate URL format)\n3. Prompt for token env var name (default: GITLAB_TOKEN)\n4. Check token is set in environment\n - If not set: exit 1 with \"Export GITLAB_TOKEN first\"\n5. Test auth with GET /api/v4/user\n - If 401: exit 1 with \"Authentication failed\"\n - Show \"Authenticated as @username (Display Name)\"\n6. Prompt for project paths (comma-separated or add one at a time)\n7. Validate each project with GET /api/v4/projects/:encoded_path\n - If 404: exit 1 with \"Project not found: group/project\"\n - Show \"✓ group/project (Project Name)\"\n8. Write config.json to target path\n9. Initialize database with migrations\n10. Insert validated projects into projects table\n11. Show \"Setup complete! Run 'gi doctor' to verify.\"\n\n**Flags:**\n- `--config `: Write config to specific path\n- `--force`: Skip overwrite confirmation\n- `--non-interactive`: Fail if prompts would be shown (for CI/scripting)\n\n## Acceptance Criteria\n\n- [ ] Creates config.json with valid structure\n- [ ] Validates GitLab URL is reachable before writing config\n- [ ] Validates token with GET /api/v4/user before writing config\n- [ ] Validates each project path exists in GitLab before writing config\n- [ ] Fails with exit 1 if token not set in environment\n- [ ] Fails with exit 1 if GitLab auth fails\n- [ ] Fails with exit 1 if any project not found\n- [ ] Prompts before overwriting existing config (unless --force)\n- [ ] --force skips overwrite confirmation\n- [ ] --non-interactive fails if prompts would be shown\n- [ ] Creates data directory and applies DB migrations\n- [ ] Inserts validated projects into projects table\n- [ ] tests/integration/init.test.ts passes (11 tests)\n\n## Files\n\nCREATE:\n- src/cli/commands/init.ts\n- tests/integration/init.test.ts\n\n## TDD Loop\n\nRED:\n```typescript\n// tests/integration/init.test.ts\ndescribe('gi init', () => {\n it('creates config file with valid structure')\n it('validates GitLab URL format')\n it('validates GitLab connection before writing config')\n it('validates each project path exists in GitLab')\n it('fails if token not set')\n it('fails if GitLab auth fails')\n it('fails if any project path not found')\n it('prompts before overwriting existing config')\n it('respects --force to skip confirmation')\n it('generates config with sensible defaults')\n it('creates data directory if missing')\n})\n```\n\nGREEN: Implement init.ts\n\nVERIFY: `npm run test -- tests/integration/init.test.ts`\n\n## Edge Cases\n\n- User cancels at any prompt: exit 2 (user cancelled)\n- Network error during validation: show specific error, exit 1\n- Token has wrong scopes (no read_api): auth succeeds but project fetch fails\n- Project path with special characters must be URL-encoded\n- Config directory might not exist - create with mkdirSync recursive\n- --non-interactive with missing env var should fail immediately","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:50.810720Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:27:07.775170Z","closed_at":"2026-01-25T03:27:07.774984Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-38e","depends_on_id":"bd-13b","type":"blocks","created_at":"2026-01-24T16:13:09.682253Z","created_by":"tayloreernisse"},{"issue_id":"bd-38e","depends_on_id":"bd-1l1","type":"blocks","created_at":"2026-01-24T16:13:09.733568Z","created_by":"tayloreernisse"},{"issue_id":"bd-38e","depends_on_id":"bd-3ng","type":"blocks","created_at":"2026-01-24T16:13:09.715644Z","created_by":"tayloreernisse"},{"issue_id":"bd-38e","depends_on_id":"bd-epj","type":"blocks","created_at":"2026-01-24T16:13:09.699092Z","created_by":"tayloreernisse"}]} {"id":"bd-39w","title":"[CP1] Test fixtures for mocked GitLab responses","description":"## Background\n\nTest fixtures provide mocked GitLab API responses for unit and integration tests. They enable testing without a live GitLab instance and ensure consistent test data across runs.\n\n## Approach\n\n### Fixture Files\n\nCreate JSON fixtures that match GitLab API response shapes:\n\n```\ntests/fixtures/\n├── gitlab_issue.json # Single issue\n├── gitlab_issues_page.json # Array of issues (pagination test)\n├── gitlab_discussion.json # Single discussion with notes\n└── gitlab_discussions_page.json # Array of discussions\n```\n\n### gitlab_issue.json\n\n```json\n{\n \"id\": 12345,\n \"iid\": 42,\n \"project_id\": 100,\n \"title\": \"Test issue title\",\n \"description\": \"Test issue description\",\n \"state\": \"opened\",\n \"created_at\": \"2024-01-15T10:00:00.000Z\",\n \"updated_at\": \"2024-01-20T15:30:00.000Z\",\n \"closed_at\": null,\n \"author\": {\n \"id\": 1,\n \"username\": \"testuser\",\n \"name\": \"Test User\"\n },\n \"labels\": [\"bug\", \"priority::high\"],\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/42\"\n}\n```\n\n### gitlab_discussion.json\n\n```json\n{\n \"id\": \"6a9c1750b37d513a43987b574953fceb50b03ce7\",\n \"individual_note\": false,\n \"notes\": [\n {\n \"id\": 1001,\n \"type\": \"DiscussionNote\",\n \"body\": \"First comment in thread\",\n \"author\": { \"id\": 1, \"username\": \"testuser\", \"name\": \"Test User\" },\n \"created_at\": \"2024-01-16T09:00:00.000Z\",\n \"updated_at\": \"2024-01-16T09:00:00.000Z\",\n \"system\": false,\n \"resolvable\": true,\n \"resolved\": false,\n \"resolved_by\": null,\n \"resolved_at\": null,\n \"position\": null\n },\n {\n \"id\": 1002,\n \"type\": \"DiscussionNote\",\n \"body\": \"Reply to first comment\",\n \"author\": { \"id\": 2, \"username\": \"reviewer\", \"name\": \"Reviewer\" },\n \"created_at\": \"2024-01-16T10:00:00.000Z\",\n \"updated_at\": \"2024-01-16T10:00:00.000Z\",\n \"system\": false,\n \"resolvable\": true,\n \"resolved\": false,\n \"resolved_by\": null,\n \"resolved_at\": null,\n \"position\": null\n }\n ]\n}\n```\n\n### Helper Module\n\n```rust\n// tests/fixtures/mod.rs\n\npub fn load_fixture(name: &str) -> T {\n let path = PathBuf::from(env!(\"CARGO_MANIFEST_DIR\"))\n .join(\"tests/fixtures\")\n .join(name);\n let content = std::fs::read_to_string(&path)\n .expect(&format!(\"Failed to read fixture: {}\", name));\n serde_json::from_str(&content)\n .expect(&format!(\"Failed to parse fixture: {}\", name))\n}\n\npub fn gitlab_issue() -> GitLabIssue {\n load_fixture(\"gitlab_issue.json\")\n}\n\npub fn gitlab_issues_page() -> Vec {\n load_fixture(\"gitlab_issues_page.json\")\n}\n\npub fn gitlab_discussion() -> GitLabDiscussion {\n load_fixture(\"gitlab_discussion.json\")\n}\n```\n\n## Acceptance Criteria\n\n- [ ] gitlab_issue.json deserializes to GitLabIssue correctly\n- [ ] gitlab_issues_page.json contains 3+ issues for pagination tests\n- [ ] gitlab_discussion.json contains multi-note thread\n- [ ] gitlab_discussions_page.json contains mix of individual_note true/false\n- [ ] At least one fixture includes system: true note\n- [ ] Helper functions load fixtures without panic\n\n## Files\n\n- tests/fixtures/gitlab_issue.json (create)\n- tests/fixtures/gitlab_issues_page.json (create)\n- tests/fixtures/gitlab_discussion.json (create)\n- tests/fixtures/gitlab_discussions_page.json (create)\n- tests/fixtures/mod.rs (create)\n\n## TDD Loop\n\nRED:\n```rust\n#[test] fn fixture_gitlab_issue_deserializes()\n#[test] fn fixture_gitlab_discussion_deserializes()\n#[test] fn fixture_has_system_note()\n```\n\nGREEN: Create JSON fixtures and helper module\n\nVERIFY: `cargo test fixture`\n\n## Edge Cases\n\n- Include issue with empty labels array\n- Include issue with null description\n- Include system note (system: true)\n- Include individual_note: true discussion (standalone comment)\n- Timestamps must be valid ISO 8601","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-25T17:02:38.433752Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:48:08.415195Z","closed_at":"2026-01-25T22:48:08.415132Z","close_reason":"Created 4 JSON fixture files (issue, issues_page, discussion, discussions_page) with helper tests - 6 tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-39w","depends_on_id":"bd-1np","type":"blocks","created_at":"2026-01-25T17:04:05.770848Z","created_by":"tayloreernisse"}]} +{"id":"bd-3ae","title":"Epic: CP2 Gate A - MRs Only","description":"## Background\nGate A validates core MR ingestion works before adding complexity. Proves the cursor-based sync, pagination, and basic CLI work. This is the foundation - if Gate A fails, nothing else matters.\n\n## Acceptance Criteria (Pass/Fail)\n- [ ] `gi ingest --type=merge_requests` completes without error\n- [ ] `SELECT COUNT(*) FROM merge_requests` > 0\n- [ ] `gi list mrs --limit=5` shows 5 MRs with iid, title, state, author\n- [ ] `gi count mrs` shows total count matching DB query\n- [ ] MR with `state=locked` can be stored (if exists in test data)\n- [ ] Draft MR shows `draft=1` in DB and `[DRAFT]` in list output\n- [ ] `work_in_progress=true` MR shows `draft=1` (fallback works)\n- [ ] `head_sha` populated for MRs with commits\n- [ ] `references_short` and `references_full` populated\n- [ ] Re-run ingest shows \"0 new MRs\" or minimal refetch (cursor working)\n- [ ] Cursor saved at page boundary, not item boundary\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate A: MRs Only ===\"\n\n# 1. Clear any existing MR data for clean test\necho \"Step 1: Reset MR cursor for clean test...\"\nsqlite3 \"$DB_PATH\" \"DELETE FROM sync_cursors WHERE resource_type = 'merge_requests';\"\n\n# 2. Run MR ingestion\necho \"Step 2: Ingest MRs...\"\ngi ingest --type=merge_requests\n\n# 3. Verify MRs exist\necho \"Step 3: Verify MR count...\"\nMR_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests;\")\necho \" MR count: $MR_COUNT\"\n[ \"$MR_COUNT\" -gt 0 ] || { echo \"FAIL: No MRs ingested\"; exit 1; }\n\n# 4. Verify list command\necho \"Step 4: Test list command...\"\ngi list mrs --limit=5\n\n# 5. Verify count command\necho \"Step 5: Test count command...\"\ngi count mrs\n\n# 6. Verify draft handling\necho \"Step 6: Check draft MRs...\"\nDRAFT_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE draft = 1;\")\necho \" Draft MR count: $DRAFT_COUNT\"\n\n# 7. Verify head_sha population\necho \"Step 7: Check head_sha...\"\nSHA_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE head_sha IS NOT NULL;\")\necho \" MRs with head_sha: $SHA_COUNT\"\n\n# 8. Verify references\necho \"Step 8: Check references...\"\nREF_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE references_short IS NOT NULL;\")\necho \" MRs with references: $REF_COUNT\"\n\n# 9. Verify cursor saved\necho \"Step 9: Check cursor...\"\nCURSOR=$(sqlite3 \"$DB_PATH\" \"SELECT updated_at, gitlab_id FROM sync_cursors WHERE resource_type = 'merge_requests';\")\necho \" Cursor: $CURSOR\"\n[ -n \"$CURSOR\" ] || { echo \"FAIL: Cursor not saved\"; exit 1; }\n\n# 10. Re-run and verify minimal refetch\necho \"Step 10: Re-run ingest (should be minimal)...\"\ngi ingest --type=merge_requests\n# Output should show minimal or zero new MRs\n\necho \"\"\necho \"=== Gate A: PASSED ===\"\n```\n\n## Test Commands (Quick Verification)\n```bash\n# Run these in order:\ngi ingest --type=merge_requests\ngi list mrs --limit=10\ngi count mrs\n\n# Verify in DB:\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT \n COUNT(*) as total,\n SUM(CASE WHEN draft = 1 THEN 1 ELSE 0 END) as drafts,\n SUM(CASE WHEN head_sha IS NOT NULL THEN 1 ELSE 0 END) as with_sha,\n SUM(CASE WHEN references_short IS NOT NULL THEN 1 ELSE 0 END) as with_refs\n FROM merge_requests;\n\"\n\n# Re-run (should be no-op):\ngi ingest --type=merge_requests\n```\n\n## Dependencies\nThis gate requires these beads to be complete:\n- bd-3ir (Database migration)\n- bd-5ta (GitLab MR types)\n- bd-34o (MR transformer)\n- bd-iba (GitLab client pagination)\n- bd-ser (MR ingestion module)\n\n## Edge Cases\n- `locked` state is transitional (merge in progress); may not exist in test data\n- Some older GitLab instances may not return `head_sha` for all MRs\n- `work_in_progress` is deprecated but should still work as fallback\n- Very large projects (10k+ MRs) may take significant time on first sync","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:00.966522Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.057298Z","closed_at":"2026-01-27T00:48:21.057225Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3ae","depends_on_id":"bd-iba","type":"blocks","created_at":"2026-01-26T22:08:55.576626Z","created_by":"tayloreernisse"},{"issue_id":"bd-3ae","depends_on_id":"bd-ser","type":"blocks","created_at":"2026-01-26T22:08:55.446814Z","created_by":"tayloreernisse"}]} {"id":"bd-3bo","title":"[CP1] gi count issues/discussions/notes commands","description":"Count entities in the database.\n\nCommands:\n- gi count issues → 'Issues: N'\n- gi count discussions --type=issue → 'Issue Discussions: N'\n- gi count notes --type=issue → 'Issue Notes: N (excluding M system)'\n\nFiles: src/cli/commands/count.ts\nDone when: Counts match expected values from GitLab","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:16.190875Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.156293Z","deleted_at":"2026-01-25T15:21:35.156290Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-3hy","title":"[CP1] Test fixtures for mocked GitLab responses","description":"Create mock response files for integration tests using wiremock.\n\n## Fixtures to Create\n\n### tests/fixtures/gitlab_issue.json\nSingle issue with labels:\n- id, iid, project_id, title, description, state\n- author object\n- labels array (string names)\n- timestamps\n- web_url\n\n### tests/fixtures/gitlab_issues_page.json\nArray of issues simulating paginated response:\n- 3-5 issues with varying states\n- Mix of labels\n\n### tests/fixtures/gitlab_discussion.json\nSingle discussion:\n- id (string)\n- individual_note: false\n- notes array with 2+ notes\n- Include one system note\n\n### tests/fixtures/gitlab_discussions_page.json\nArray of discussions:\n- Mix of individual_note true/false\n- Include resolvable/resolved examples\n\n## Edge Cases to Cover\n- Issue with no labels (empty array)\n- Issue with labels_details (ignored in CP1)\n- Discussion with individual_note=true (single note)\n- System notes with system=true\n- Resolvable notes\n\nFiles: tests/fixtures/gitlab_issue.json, gitlab_issues_page.json, gitlab_discussion.json, gitlab_discussions_page.json\nDone when: wiremock handlers can use fixtures for deterministic tests","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:59:01.206436Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.991367Z","deleted_at":"2026-01-25T17:02:01.991362Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} +{"id":"bd-3ir","title":"Add database migration 006_merge_requests.sql","description":"## Background\nFoundation for all CP2 MR features. This migration defines the schema that all other MR components depend on. Must complete BEFORE any other CP2 work can proceed.\n\n## Approach\nCreate migration file that adds:\n1. `merge_requests` table with all CP2 fields\n2. `mr_labels`, `mr_assignees`, `mr_reviewers` junction tables\n3. Indexes on discussions for MR queries\n4. DiffNote position columns on notes table\n\n## Files\n- `migrations/006_merge_requests.sql` - New migration file\n- `src/core/db.rs` - Update MIGRATIONS const to include version 6\n\n## Acceptance Criteria\n- [ ] Migration file exists at `migrations/006_merge_requests.sql`\n- [ ] `merge_requests` table has columns: id, gitlab_id, project_id, iid, title, description, state, draft, author_username, source_branch, target_branch, head_sha, references_short, references_full, detailed_merge_status, merge_user_username, created_at, updated_at, merged_at, closed_at, last_seen_at, discussions_synced_for_updated_at, discussions_sync_last_attempt_at, discussions_sync_attempts, discussions_sync_last_error, web_url, raw_payload_id\n- [ ] `mr_labels` junction table exists with (merge_request_id, label_id) PK\n- [ ] `mr_assignees` junction table exists with (merge_request_id, username) PK\n- [ ] `mr_reviewers` junction table exists with (merge_request_id, username) PK\n- [ ] `idx_discussions_mr_id` and `idx_discussions_mr_resolved` indexes exist\n- [ ] `notes` table has new columns: position_type, position_line_range_start, position_line_range_end, position_base_sha, position_start_sha, position_head_sha\n- [ ] `gi doctor` runs without migration errors\n- [ ] `cargo test` passes\n\n## TDD Loop\nRED: Cannot open DB with version 6 schema\nGREEN: Add migration file with full SQL\nVERIFY: `cargo run -- doctor` shows healthy DB\n\n## SQL Reference (from PRD)\n```sql\n-- Merge requests table\nCREATE TABLE merge_requests (\n id INTEGER PRIMARY KEY,\n gitlab_id INTEGER UNIQUE NOT NULL,\n project_id INTEGER NOT NULL REFERENCES projects(id),\n iid INTEGER NOT NULL,\n title TEXT,\n description TEXT,\n state TEXT, -- opened | merged | closed | locked\n draft INTEGER NOT NULL DEFAULT 0, -- SQLite boolean\n author_username TEXT,\n source_branch TEXT,\n target_branch TEXT,\n head_sha TEXT,\n references_short TEXT,\n references_full TEXT,\n detailed_merge_status TEXT,\n merge_user_username TEXT,\n created_at INTEGER, -- ms epoch UTC\n updated_at INTEGER,\n merged_at INTEGER,\n closed_at INTEGER,\n last_seen_at INTEGER NOT NULL,\n discussions_synced_for_updated_at INTEGER,\n discussions_sync_last_attempt_at INTEGER,\n discussions_sync_attempts INTEGER DEFAULT 0,\n discussions_sync_last_error TEXT,\n web_url TEXT,\n raw_payload_id INTEGER REFERENCES raw_payloads(id)\n);\nCREATE INDEX idx_mrs_project_updated ON merge_requests(project_id, updated_at);\nCREATE UNIQUE INDEX uq_mrs_project_iid ON merge_requests(project_id, iid);\n-- ... (see PRD for full index list)\n\n-- Junction tables\nCREATE TABLE mr_labels (\n merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,\n label_id INTEGER REFERENCES labels(id) ON DELETE CASCADE,\n PRIMARY KEY(merge_request_id, label_id)\n);\n\nCREATE TABLE mr_assignees (\n merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,\n username TEXT NOT NULL,\n PRIMARY KEY(merge_request_id, username)\n);\n\nCREATE TABLE mr_reviewers (\n merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE,\n username TEXT NOT NULL,\n PRIMARY KEY(merge_request_id, username)\n);\n\n-- DiffNote position columns (ALTER TABLE)\nALTER TABLE notes ADD COLUMN position_type TEXT;\nALTER TABLE notes ADD COLUMN position_line_range_start INTEGER;\nALTER TABLE notes ADD COLUMN position_line_range_end INTEGER;\nALTER TABLE notes ADD COLUMN position_base_sha TEXT;\nALTER TABLE notes ADD COLUMN position_start_sha TEXT;\nALTER TABLE notes ADD COLUMN position_head_sha TEXT;\n\nINSERT INTO schema_version (version, applied_at, description)\nVALUES (6, strftime('%s', 'now') * 1000, 'Merge requests, MR labels, assignees, reviewers');\n```\n\n## Edge Cases\n- SQLite does not support ADD CONSTRAINT - FK defined as nullable in CP1\n- `locked` state is transitional (merge-in-progress) - store as first-class\n- discussions_synced_for_updated_at prevents redundant discussion refetch","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:40.101470Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:06:43.899079Z","closed_at":"2026-01-27T00:06:43.898875Z","close_reason":"Migration 006_merge_requests.sql created and verified. Schema v6 applied successfully with all tables, indexes, and position columns.","compaction_level":0,"original_size":0} +{"id":"bd-3j6","title":"Add transform_mr_discussion and transform_notes_with_diff_position","description":"## Background\nExtends discussion transformer for MR context. MR discussions can contain DiffNotes with file position metadata. This is critical for code review context in CP3 document generation.\n\n## Approach\nAdd two new functions to existing `src/gitlab/transformers/discussion.rs`:\n1. `transform_mr_discussion()` - Transform discussion with MR reference\n2. `transform_notes_with_diff_position()` - Extract DiffNote position metadata\n\nCP1 already has the polymorphic `NormalizedDiscussion` with `NoteableRef` enum - reuse that pattern.\n\n## Files\n- `src/gitlab/transformers/discussion.rs` - Add new functions\n- `tests/diffnote_tests.rs` - DiffNote position extraction tests\n- `tests/mr_discussion_tests.rs` - MR discussion transform tests\n\n## Acceptance Criteria\n- [ ] `transform_mr_discussion()` returns `NormalizedDiscussion` with `merge_request_id: Some(local_mr_id)`\n- [ ] `transform_notes_with_diff_position()` returns `Result, String>`\n- [ ] DiffNote position fields extracted: `position_old_path`, `position_new_path`, `position_old_line`, `position_new_line`\n- [ ] Extended position fields extracted: `position_type`, `position_line_range_start`, `position_line_range_end`\n- [ ] SHA triplet extracted: `position_base_sha`, `position_start_sha`, `position_head_sha`\n- [ ] Strict timestamp parsing - returns `Err` on invalid timestamps (no `unwrap_or(0)`)\n- [ ] `cargo test diffnote` passes\n- [ ] `cargo test mr_discussion` passes\n\n## TDD Loop\nRED: `cargo test diffnote_position` -> test fails\nGREEN: Add position extraction logic\nVERIFY: `cargo test diffnote`\n\n## Function Signatures\n```rust\n/// Transform GitLab discussion for MR context.\n/// Reuses existing transform_discussion logic, just with MR reference.\npub fn transform_mr_discussion(\n gitlab_discussion: &GitLabDiscussion,\n local_project_id: i64,\n local_mr_id: i64,\n) -> NormalizedDiscussion {\n // Use existing transform_discussion with NoteableRef::MergeRequest(local_mr_id)\n transform_discussion(\n gitlab_discussion,\n local_project_id,\n NoteableRef::MergeRequest(local_mr_id),\n )\n}\n\n/// Transform notes with DiffNote position extraction.\n/// Returns Result to enforce strict timestamp parsing.\npub fn transform_notes_with_diff_position(\n gitlab_discussion: &GitLabDiscussion,\n local_project_id: i64,\n) -> Result, String>\n```\n\n## DiffNote Position Extraction\n```rust\n// Extract position metadata if present\nlet (old_path, new_path, old_line, new_line, position_type, lr_start, lr_end, base_sha, start_sha, head_sha) = note\n .position\n .as_ref()\n .map(|pos| (\n pos.old_path.clone(),\n pos.new_path.clone(),\n pos.old_line,\n pos.new_line,\n pos.position_type.clone(), // \"text\" | \"image\" | \"file\"\n pos.line_range.as_ref().map(|r| r.start_line),\n pos.line_range.as_ref().map(|r| r.end_line),\n pos.base_sha.clone(),\n pos.start_sha.clone(),\n pos.head_sha.clone(),\n ))\n .unwrap_or((None, None, None, None, None, None, None, None, None, None));\n```\n\n## Strict Timestamp Parsing\n```rust\n// CRITICAL: Return error on invalid timestamps, never zero\nlet created_at = iso_to_ms(¬e.created_at)\n .ok_or_else(|| format\\!(\n \"Invalid note.created_at for note {}: {}\",\n note.id, note.created_at\n ))?;\n```\n\n## NormalizedNote Fields for DiffNotes\n```rust\nNormalizedNote {\n // ... existing fields ...\n // DiffNote position metadata\n position_old_path: old_path,\n position_new_path: new_path,\n position_old_line: old_line,\n position_new_line: new_line,\n // Extended position\n position_type,\n position_line_range_start: lr_start,\n position_line_range_end: lr_end,\n // SHA triplet\n position_base_sha: base_sha,\n position_start_sha: start_sha,\n position_head_sha: head_sha,\n}\n```\n\n## Edge Cases\n- Notes without position should have all position fields as None\n- Invalid timestamp should fail the entire discussion (no partial results)\n- File renames: `old_path \\!= new_path` indicates a renamed file\n- Multi-line comments: `line_range` present means comment spans lines 45-48","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:41.208380Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:20:13.473091Z","closed_at":"2026-01-27T00:20:13.473031Z","close_reason":"Implemented transform_mr_discussion() and transform_notes_with_diff_position() with full DiffNote position extraction:\n- Extended NormalizedNote with 10 DiffNote position fields (path, line, type, line_range, SHA triplet)\n- Added strict timestamp parsing that returns Err on invalid timestamps\n- Created 13 diffnote_position_tests covering all extraction paths and error cases\n- Created 6 mr_discussion_tests verifying MR reference handling\n- All 161 tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3j6","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-01-26T22:08:54.207801Z","created_by":"tayloreernisse"},{"issue_id":"bd-3j6","depends_on_id":"bd-5ta","type":"blocks","created_at":"2026-01-26T22:08:54.244201Z","created_by":"tayloreernisse"}]} +{"id":"bd-3js","title":"Implement MR CLI commands (list, show, count)","description":"## Background\nCLI commands for viewing and filtering merge requests. Includes list, show, and count commands with MR-specific filters.\n\n## Approach\nUpdate existing CLI command files:\n1. `list.rs` - Add MR listing with filters\n2. `show.rs` - Add MR detail view with discussions\n3. `count.rs` - Add MR counting with state breakdown\n\n## Files\n- `src/cli/commands/list.rs` - Add MR subcommand\n- `src/cli/commands/show.rs` - Add MR detail view\n- `src/cli/commands/count.rs` - Add MR counting\n\n## Acceptance Criteria\n- [ ] `gi list mrs` shows MR table with iid, title, state, author, branches\n- [ ] `gi list mrs --state=merged` filters by state\n- [ ] `gi list mrs --state=locked` filters locally (not server-side)\n- [ ] `gi list mrs --draft` shows only draft MRs\n- [ ] `gi list mrs --no-draft` excludes draft MRs\n- [ ] `gi list mrs --reviewer=username` filters by reviewer\n- [ ] `gi list mrs --target-branch=main` filters by target branch\n- [ ] `gi list mrs --source-branch=feature/x` filters by source branch\n- [ ] Draft MRs show `[DRAFT]` prefix in title\n- [ ] `gi show mr ` displays full detail including discussions\n- [ ] DiffNote shows file context: `[src/file.ts:45]`\n- [ ] Multi-line DiffNote shows: `[src/file.ts:45-48]`\n- [ ] `gi show mr` shows `detailed_merge_status`\n- [ ] `gi count mrs` shows total with state breakdown\n- [ ] `gi sync-status` shows MR cursor positions\n- [ ] `cargo test cli_commands` passes\n\n## TDD Loop\nRED: `cargo test list_mrs` -> command not found\nGREEN: Add MR subcommand\nVERIFY: `gi list mrs --help`\n\n## gi list mrs Output\n```\nMerge Requests (showing 20 of 1,234)\n\n !847 Refactor auth to use JWT tokens merged @johndoe main <- feature/jwt 3 days ago\n !846 Fix memory leak in websocket handler opened @janedoe main <- fix/websocket 5 days ago\n !845 [DRAFT] Add dark mode CSS variables opened @bobsmith main <- ui/dark-mode 1 week ago\n```\n\n## SQL for MR Listing\n```sql\nSELECT \n m.iid, m.title, m.state, m.draft, m.author_username,\n m.target_branch, m.source_branch, m.updated_at\nFROM merge_requests m\nWHERE m.project_id = ?\n AND (? IS NULL OR m.state = ?) -- state filter\n AND (? IS NULL OR m.draft = ?) -- draft filter\n AND (? IS NULL OR m.author_username = ?) -- author filter\n AND (? IS NULL OR m.target_branch = ?) -- target-branch filter\n AND (? IS NULL OR m.source_branch = ?) -- source-branch filter\n AND (? IS NULL OR EXISTS ( -- reviewer filter\n SELECT 1 FROM mr_reviewers r \n WHERE r.merge_request_id = m.id AND r.username = ?\n ))\nORDER BY m.updated_at DESC\nLIMIT ?\n```\n\n## gi show mr Output\n```\nMerge Request !847: Refactor auth to use JWT tokens\n================================================================================\n\nProject: group/project-one\nState: merged\nDraft: No\nAuthor: @johndoe\nAssignees: @janedoe, @bobsmith\nReviewers: @alice, @charlie\nSource: feature/jwt\nTarget: main\nMerge Status: mergeable\nMerged By: @alice\nMerged At: 2024-03-20 14:30:00\nLabels: enhancement, auth, reviewed\n\nDescription:\n Moving away from session cookies to JWT-based authentication...\n\nDiscussions (8):\n\n @janedoe (2024-03-16) [src/auth/jwt.ts:45]:\n Should we use a separate signing key for refresh tokens?\n\n @johndoe (2024-03-16):\n Good point. I'll add a separate key with rotation support.\n\n @alice (2024-03-18) [RESOLVED]:\n Looks good! Just one nit about the token expiry constant.\n```\n\n## DiffNote File Context Display\n```rust\n// Build file context string\nlet file_context = match (note.position_new_path, note.position_new_line, note.position_line_range_end) {\n (Some(path), Some(line), Some(end_line)) if line != end_line => {\n format!(\"[{}:{}-{}]\", path, line, end_line)\n }\n (Some(path), Some(line), _) => {\n format!(\"[{}:{}]\", path, line)\n }\n _ => String::new(),\n};\n```\n\n## gi count mrs Output\n```\nMerge Requests: 1,234\n opened: 89\n merged: 1,045\n closed: 100\n```\n\n## Filter Arguments (clap)\n```rust\n#[derive(Parser)]\nstruct ListMrsArgs {\n #[arg(long)]\n state: Option, // opened|merged|closed|locked|all\n #[arg(long)]\n draft: bool,\n #[arg(long)]\n no_draft: bool,\n #[arg(long)]\n author: Option,\n #[arg(long)]\n assignee: Option,\n #[arg(long)]\n reviewer: Option,\n #[arg(long)]\n target_branch: Option,\n #[arg(long)]\n source_branch: Option,\n #[arg(long)]\n label: Vec,\n #[arg(long)]\n project: Option,\n #[arg(long, default_value = \"20\")]\n limit: u32,\n}\n```\n\n## Edge Cases\n- `--state=locked` must filter locally (GitLab API doesn't support it)\n- Ambiguous MR iid across projects: prompt for `--project`\n- Empty discussions: show \"No discussions\" message\n- Multi-line DiffNotes: show line range in context","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:43.354939Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:37:31.792569Z","closed_at":"2026-01-27T00:37:31.792504Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3js","depends_on_id":"bd-20h","type":"blocks","created_at":"2026-01-26T22:08:55.209249Z","created_by":"tayloreernisse"},{"issue_id":"bd-3js","depends_on_id":"bd-ser","type":"blocks","created_at":"2026-01-26T22:08:55.117728Z","created_by":"tayloreernisse"}]} {"id":"bd-3kj","title":"[CP0] gi version, backup, reset, sync-status commands","description":"## Background\n\nThese are the remaining utility commands for CP0. version is trivial. backup creates safety copies before destructive operations. reset provides clean-slate capability. sync-status is a stub for CP0 that will be implemented in CP1.\n\nReference: docs/prd/checkpoint-0.md sections \"gi version\", \"gi backup\", \"gi reset\", \"gi sync-status\"\n\n## Approach\n\n**src/cli/commands/version.ts:**\n```typescript\nimport { Command } from 'commander';\nimport { version } from '../../../package.json' with { type: 'json' };\n\nexport const versionCommand = new Command('version')\n .description('Show version information')\n .action(() => {\n console.log(\\`gi version \\${version}\\`);\n });\n```\n\n**src/cli/commands/backup.ts:**\n```typescript\nimport { Command } from 'commander';\nimport { copyFileSync, mkdirSync } from 'node:fs';\nimport { loadConfig } from '../../core/config';\nimport { getDbPath, getBackupDir } from '../../core/paths';\n\nexport const backupCommand = new Command('backup')\n .description('Create timestamped database backup')\n .action(async (options, command) => {\n const globalOpts = command.optsWithGlobals();\n const config = loadConfig(globalOpts.config);\n \n const dbPath = getDbPath(config.storage?.dbPath);\n const backupDir = getBackupDir(config.storage?.backupDir);\n \n mkdirSync(backupDir, { recursive: true });\n \n // Format: data-2026-01-24T10-30-00.db (colons replaced for Windows compat)\n const timestamp = new Date().toISOString().replace(/:/g, '-').replace(/\\\\..*/, '');\n const backupPath = \\`\\${backupDir}/data-\\${timestamp}.db\\`;\n \n copyFileSync(dbPath, backupPath);\n console.log(\\`Created backup: \\${backupPath}\\`);\n });\n```\n\n**src/cli/commands/reset.ts:**\n```typescript\nimport { Command } from 'commander';\nimport { unlinkSync, existsSync } from 'node:fs';\nimport { createInterface } from 'node:readline';\nimport { loadConfig } from '../../core/config';\nimport { getDbPath } from '../../core/paths';\n\nexport const resetCommand = new Command('reset')\n .description('Delete database and reset all state')\n .option('--confirm', 'Skip confirmation prompt')\n .action(async (options, command) => {\n const globalOpts = command.optsWithGlobals();\n const config = loadConfig(globalOpts.config);\n const dbPath = getDbPath(config.storage?.dbPath);\n \n if (!existsSync(dbPath)) {\n console.log('No database to reset.');\n return;\n }\n \n if (!options.confirm) {\n console.log(\\`This will delete:\\n - Database: \\${dbPath}\\n - All sync cursors\\n - All cached data\\n\\`);\n // Prompt for 'yes' confirmation\n // If not 'yes', exit 2\n }\n \n unlinkSync(dbPath);\n // Also delete WAL and SHM files if they exist\n if (existsSync(\\`\\${dbPath}-wal\\`)) unlinkSync(\\`\\${dbPath}-wal\\`);\n if (existsSync(\\`\\${dbPath}-shm\\`)) unlinkSync(\\`\\${dbPath}-shm\\`);\n \n console.log(\"Database reset. Run 'gi sync' to repopulate.\");\n });\n```\n\n**src/cli/commands/sync-status.ts:**\n```typescript\n// CP0 stub - full implementation in CP1\nexport const syncStatusCommand = new Command('sync-status')\n .description('Show sync state')\n .action(() => {\n console.log(\"No sync runs yet. Run 'gi sync' to start.\");\n });\n```\n\n## Acceptance Criteria\n\n- [ ] `gi version` outputs \"gi version X.Y.Z\"\n- [ ] `gi backup` creates timestamped copy of database\n- [ ] Backup filename is Windows-compatible (no colons)\n- [ ] Backup directory created if missing\n- [ ] `gi reset` prompts for 'yes' confirmation\n- [ ] `gi reset --confirm` skips prompt\n- [ ] Reset deletes .db, .db-wal, and .db-shm files\n- [ ] Reset exits 2 if user doesn't type 'yes'\n- [ ] `gi sync-status` outputs stub message\n\n## Files\n\nCREATE:\n- src/cli/commands/version.ts\n- src/cli/commands/backup.ts\n- src/cli/commands/reset.ts\n- src/cli/commands/sync-status.ts\n\n## TDD Loop\n\nN/A - simple commands, verify manually:\n\n```bash\ngi version\ngi backup\nls ~/.local/share/gi/backups/\ngi reset # type 'no'\ngi reset --confirm\nls ~/.local/share/gi/data.db # should not exist\ngi sync-status\n```\n\n## Edge Cases\n\n- Backup when database doesn't exist - show clear error\n- Reset when database doesn't exist - show \"No database to reset\"\n- WAL/SHM files may not exist - check before unlinking\n- Timestamp with milliseconds could cause very long filename\n- readline prompt in non-interactive terminal - handle SIGINT","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:51.774210Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:31:46.227285Z","closed_at":"2026-01-25T03:31:46.227220Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3kj","depends_on_id":"bd-13b","type":"blocks","created_at":"2026-01-24T16:13:10.810953Z","created_by":"tayloreernisse"},{"issue_id":"bd-3kj","depends_on_id":"bd-3ng","type":"blocks","created_at":"2026-01-24T16:13:10.827689Z","created_by":"tayloreernisse"}]} {"id":"bd-3mk","title":"[CP1] gi list issues command","description":"List issues from the database.\n\nFlags:\n- --limit=N (default: 20)\n- --project=PATH (filter by project)\n- --state=opened|closed|all (default: all)\n\nOutput: Table with iid, title, state, author, relative time\n\nFiles: src/cli/commands/list.ts\nDone when: List displays issues with proper filtering and formatting","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:10.400664Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.155211Z","deleted_at":"2026-01-25T15:21:35.155209Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-3n1","title":"[CP1] gi list issues command","description":"## Background\n\nThe `gi list issues` command displays a paginated list of issues from the local database. It supports filtering by project and state, with configurable limit. This provides quick access to synced issues without opening GitLab.\n\n## Approach\n\n### Module: src/cli/commands/list.rs\n\n### Clap Definition\n\n```rust\n#[derive(Args)]\npub struct ListArgs {\n /// Entity type to list\n #[arg(value_parser = [\"issues\", \"mrs\"])]\n pub entity: String,\n\n /// Maximum results\n #[arg(long, default_value = \"20\")]\n pub limit: usize,\n\n /// Filter by project path\n #[arg(long)]\n pub project: Option,\n\n /// Filter by state\n #[arg(long, value_parser = [\"opened\", \"closed\", \"all\"])]\n pub state: Option,\n}\n```\n\n### Handler Function\n\n```rust\npub async fn handle_list(args: ListArgs, conn: &Connection) -> Result<()>\n```\n\n### Query (for issues)\n\n```sql\nSELECT i.iid, i.title, i.state, i.author_username, i.updated_at, p.path\nFROM issues i\nJOIN projects p ON i.project_id = p.id\nWHERE (p.path = ? OR ? IS NULL)\n AND (i.state = ? OR ? IS NULL OR ? = 'all')\nORDER BY i.updated_at DESC\nLIMIT ?\n```\n\n### Output Format (matches PRD)\n\n```\nIssues (showing 20 of 3,801)\n\n #1234 Authentication redesign opened @johndoe 3 days ago\n #1233 Fix memory leak in cache closed @janedoe 5 days ago\n #1232 Add dark mode support opened @bobsmith 1 week ago\n ...\n```\n\n### Column Layout\n\n| Column | Width | Alignment |\n|--------|-------|-----------|\n| IID | 6 | right |\n| Title | 45 | left (truncate) |\n| State | 8 | left |\n| Author | 12 | left |\n| Updated | 12 | right (relative) |\n\n### Relative Time Formatting\n\n```rust\nfn format_relative_time(ms_epoch: i64) -> String {\n let now = now_ms();\n let diff = now - ms_epoch;\n match diff {\n d if d < 60_000 => \"just now\".to_string(),\n d if d < 3_600_000 => format!(\"{} min ago\", d / 60_000),\n d if d < 86_400_000 => format!(\"{} hours ago\", d / 3_600_000),\n d if d < 604_800_000 => format!(\"{} days ago\", d / 86_400_000),\n d if d < 2_592_000_000 => format!(\"{} weeks ago\", d / 604_800_000),\n _ => format!(\"{} months ago\", diff / 2_592_000_000),\n }\n}\n```\n\n## Acceptance Criteria\n\n- [ ] Lists issues ordered by updated_at DESC\n- [ ] Shows \"showing X of Y\" with total count\n- [ ] Respects --limit parameter\n- [ ] --project filters to single project\n- [ ] --state filters to opened/closed/all\n- [ ] Title truncated if longer than column width\n- [ ] Updated time shown as relative (\"3 days ago\")\n\n## Files\n\n- src/cli/commands/mod.rs (add `pub mod list;`)\n- src/cli/commands/list.rs (create)\n- src/cli/mod.rs (add List variant to Commands enum)\n\n## TDD Loop\n\nRED:\n```rust\n#[tokio::test] async fn list_issues_shows_correct_columns()\n#[tokio::test] async fn list_issues_respects_limit()\n#[tokio::test] async fn list_issues_filters_by_project()\n#[tokio::test] async fn list_issues_filters_by_state()\n```\n\nGREEN: Implement handler with query and formatting\n\nVERIFY: `cargo test list_issues`\n\n## Edge Cases\n\n- No issues match filters - show \"No issues found\"\n- Title exactly 45 chars - no truncation\n- Title 46+ chars - truncate with \"...\"\n- --state=all shows both opened and closed\n- Default state filter is all (not just opened)","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-25T17:02:38.336352Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:58:56.619167Z","closed_at":"2026-01-25T22:58:56.619106Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3n1","depends_on_id":"bd-208","type":"blocks","created_at":"2026-01-25T17:04:05.653278Z","created_by":"tayloreernisse"}]} {"id":"bd-3nd","title":"[CP1] Issue transformer with label extraction","description":"## Background\n\nThe issue transformer converts GitLab API responses into our local schema format. It extracts core fields and, critically, the array of label names from each issue. This transformer is pure logic with no I/O, making it easy to test.\n\n## Approach\n\nCreate a transformer module with a function that:\n1. Takes a `GitLabIssue` and returns an `IssueRow` struct\n2. Extracts the `labels: Vec` directly from the issue (GitLab returns label names as strings)\n\n### Structs\n\n```rust\n// src/gitlab/transformers/issue.rs\n\npub struct IssueRow {\n pub gitlab_id: i64,\n pub iid: i64,\n pub project_id: i64,\n pub title: String,\n pub description: Option,\n pub state: String,\n pub author_username: String,\n pub created_at: i64, // ms epoch UTC\n pub updated_at: i64, // ms epoch UTC\n pub web_url: String,\n}\n\npub struct IssueWithLabels {\n pub issue: IssueRow,\n pub label_names: Vec,\n}\n```\n\n### Function\n\n```rust\npub fn transform_issue(issue: GitLabIssue) -> Result {\n // Parse ISO 8601 timestamps to ms epoch\n // Extract author.username\n // Return IssueWithLabels with label_names from issue.labels\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `IssueRow` struct exists with all fields from schema\n- [ ] `IssueWithLabels` struct bundles issue + label names\n- [ ] `transform_issue` parses ISO 8601 to ms epoch correctly\n- [ ] `transform_issue` handles missing description (None)\n- [ ] Label names are preserved exactly as received from GitLab\n- [ ] Unit tests cover all edge cases\n\n## Files\n\n- src/gitlab/transformers/mod.rs (create, add `pub mod issue;`)\n- src/gitlab/transformers/issue.rs (create)\n\n## TDD Loop\n\nRED: \n```rust\n// tests/unit/issue_transformer_test.rs\n#[test] fn transforms_issue_with_all_fields()\n#[test] fn handles_missing_description()\n#[test] fn extracts_label_names()\n#[test] fn parses_timestamps_to_ms_epoch()\n```\n\nGREEN: Implement transform_issue function\n\nVERIFY: `cargo test issue_transformer`\n\n## Edge Cases\n\n- GitLab timestamps are ISO 8601 with timezone - use chrono::DateTime::parse_from_rfc3339\n- Description can be null in GitLab API - map to Option\n- Empty labels array is valid - return empty Vec\n- Do NOT parse labels_details - it varies across GitLab versions","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.174071Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:27:11.430611Z","closed_at":"2026-01-25T22:27:11.430439Z","close_reason":"Implemented IssueRow, IssueWithLabels, transform_issue with 6 passing unit tests covering all edge cases","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3nd","depends_on_id":"bd-1np","type":"blocks","created_at":"2026-01-25T17:04:05.314883Z","created_by":"tayloreernisse"}]} {"id":"bd-3ng","title":"[CP0] Database setup with migrations and app lock","description":"## Background\n\nThe database is the backbone of gitlab-inbox. SQLite with WAL mode for performance, foreign keys for integrity, and proper pragmas for reliability. Migrations allow schema evolution. App lock prevents concurrent sync corruption.\n\nReference: docs/prd/checkpoint-0.md sections \"Database Schema\", \"SQLite Runtime Pragmas\", \"App Lock Mechanism\"\n\n## Approach\n\n**src/core/db.ts:**\n```typescript\nimport Database from 'better-sqlite3';\nimport { join, dirname } from 'node:path';\nimport { mkdirSync, readdirSync, readFileSync } from 'node:fs';\nimport { getDbPath } from './paths';\nimport { dbLogger } from './logger';\n\nexport function createConnection(dbPath: string): Database.Database {\n mkdirSync(dirname(dbPath), { recursive: true });\n const db = new Database(dbPath);\n \n // Production-grade pragmas\n db.pragma('journal_mode = WAL');\n db.pragma('synchronous = NORMAL');\n db.pragma('foreign_keys = ON');\n db.pragma('busy_timeout = 5000');\n db.pragma('temp_store = MEMORY');\n \n return db;\n}\n\nexport function runMigrations(db: Database.Database, migrationsDir: string): void {\n // Create schema_version table if not exists\n // Read migration files sorted by version\n // Apply migrations not yet applied\n // Track in schema_version table\n}\n```\n\n**migrations/001_initial.sql:**\nFull schema with tables: schema_version, projects, sync_runs, app_locks, sync_cursors, raw_payloads\n\n**src/core/lock.ts:**\nAppLock class with:\n- acquire(force?): acquires lock or throws DatabaseLockError\n- release(): releases lock and stops heartbeat\n- Heartbeat timer that updates heartbeat_at every N seconds\n- Stale lock detection (heartbeat_at > staleLockMinutes ago)\n\n## Acceptance Criteria\n\n- [ ] createConnection() creates parent directories if missing\n- [ ] WAL mode verified: `db.pragma('journal_mode')` returns 'wal'\n- [ ] Foreign keys verified: `db.pragma('foreign_keys')` returns 1\n- [ ] busy_timeout verified: `db.pragma('busy_timeout')` returns 5000\n- [ ] 001_initial.sql creates all 6 tables\n- [ ] schema_version shows version 1 after migration\n- [ ] AppLock.acquire() succeeds for first caller\n- [ ] AppLock.acquire() throws DatabaseLockError for second concurrent caller\n- [ ] Stale lock (heartbeat > 10 min old) can be taken over\n- [ ] tests/unit/db.test.ts passes (8 tests)\n- [ ] tests/integration/app-lock.test.ts passes (6 tests)\n\n## Files\n\nCREATE:\n- src/core/db.ts\n- src/core/lock.ts\n- migrations/001_initial.sql\n- tests/unit/db.test.ts\n- tests/integration/app-lock.test.ts\n\n## TDD Loop\n\nRED:\n```typescript\n// tests/unit/db.test.ts\ndescribe('Database', () => {\n it('creates database file if not exists')\n it('applies migrations in order')\n it('sets WAL journal mode')\n it('enables foreign keys')\n it('sets busy_timeout=5000')\n it('sets synchronous=NORMAL')\n it('sets temp_store=MEMORY')\n it('tracks schema version')\n})\n\n// tests/integration/app-lock.test.ts\ndescribe('App Lock', () => {\n it('acquires lock successfully')\n it('updates heartbeat during operation')\n it('detects stale lock and recovers')\n it('refuses concurrent acquisition')\n it('allows force override')\n it('releases lock on completion')\n})\n```\n\nGREEN: Implement db.ts, lock.ts, 001_initial.sql\n\nVERIFY: \n```bash\nnpm run test -- tests/unit/db.test.ts\nnpm run test -- tests/integration/app-lock.test.ts\n```\n\n## Edge Cases\n\n- Migration file with syntax error should rollback and throw MigrationError\n- Lock heartbeat timer must be unref()'d to not block process exit\n- Database file permissions - fail clearly if not writable\n- Concurrent lock tests need separate database files","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:49.481012Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:08:38.612669Z","closed_at":"2026-01-25T03:08:38.612543Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3ng","depends_on_id":"bd-epj","type":"blocks","created_at":"2026-01-24T16:13:08.349356Z","created_by":"tayloreernisse"}]} {"id":"bd-3qm","title":"[CP1] Final validation - tests, smoke tests, integrity checks","description":"Run all tests and perform data integrity checks.\n\nValidation steps:\n1. Run all unit tests (vitest)\n2. Run all integration tests\n3. Run ESLint\n4. Run TypeScript strict check\n5. Manual smoke tests per PRD table\n6. Data integrity SQL checks:\n - Issue count matches GitLab\n - Every issue has raw_payload\n - Labels in junction exist in labels table\n - sync_cursors has entry per project\n - Re-run fetches 0 new items\n - Discussion count > 0\n - Every discussion has >= 1 note\n - individual_note=true has exactly 1 note\n\nFiles: All CP1 files\nDone when: All gate criteria from Definition of Done pass","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:20:51.994183Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.152852Z","deleted_at":"2026-01-25T15:21:35.152849Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} +{"id":"bd-5ta","title":"Add GitLab MR types to types.rs","description":"## Background\nGitLab API types for merge requests. These structs define how we deserialize GitLab API responses. Must handle deprecated field aliases for backward compatibility with older GitLab instances.\n\n## Approach\nAdd new structs to `src/gitlab/types.rs`:\n- `GitLabMergeRequest` - Main MR struct with all fields\n- `GitLabReviewer` - Reviewer with optional approval state\n- `GitLabReferences` - Short and full reference strings\n\nUse serde `#[serde(alias = \"...\")]` for deprecated field fallbacks.\n\n## Files\n- `src/gitlab/types.rs` - Add new structs after existing GitLabIssue\n- `tests/fixtures/gitlab_merge_request.json` - Test fixture\n\n## Acceptance Criteria\n- [ ] `GitLabMergeRequest` struct exists with all fields from PRD\n- [ ] `detailed_merge_status` field exists (non-deprecated)\n- [ ] `#[serde(alias = \"merge_status\")]` on `merge_status_legacy` for fallback\n- [ ] `merge_user` field exists (non-deprecated)\n- [ ] `merged_by` field exists for fallback\n- [ ] `draft` and `work_in_progress` both exist (draft preferred, WIP fallback)\n- [ ] `sha` field maps to `head_sha` in transformer\n- [ ] `references: Option` for short/full refs\n- [ ] `state: String` supports \"opened\", \"merged\", \"closed\", \"locked\"\n- [ ] Fixture deserializes without error\n- [ ] `cargo test` passes\n\n## TDD Loop\nRED: Add test that deserializes fixture -> struct not found\nGREEN: Add GitLabMergeRequest, GitLabReviewer, GitLabReferences structs\nVERIFY: `cargo test gitlab_types`\n\n## Struct Definitions (from PRD)\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabMergeRequest {\n pub id: i64,\n pub iid: i64,\n pub project_id: i64,\n pub title: String,\n pub description: Option,\n pub state: String, // \"opened\" | \"merged\" | \"closed\" | \"locked\"\n #[serde(default)]\n pub draft: bool,\n #[serde(default)]\n pub work_in_progress: bool, // Deprecated fallback\n pub source_branch: String,\n pub target_branch: String,\n pub sha: Option, // head_sha\n pub references: Option,\n pub detailed_merge_status: Option,\n #[serde(alias = \"merge_status\")]\n pub merge_status_legacy: Option,\n pub created_at: String,\n pub updated_at: String,\n pub merged_at: Option,\n pub closed_at: Option,\n pub author: GitLabAuthor,\n pub merge_user: Option,\n pub merged_by: Option,\n #[serde(default)]\n pub labels: Vec,\n #[serde(default)]\n pub assignees: Vec,\n #[serde(default)]\n pub reviewers: Vec,\n pub web_url: String,\n}\n\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabReferences {\n pub short: String, // e.g. \"\\!123\"\n pub full: String, // e.g. \"group/project\\!123\"\n}\n\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabReviewer {\n pub id: i64,\n pub username: String,\n pub name: String,\n}\n```\n\n## Test Fixture (create tests/fixtures/gitlab_merge_request.json)\n```json\n{\n \"id\": 12345,\n \"iid\": 42,\n \"project_id\": 100,\n \"title\": \"Add user authentication\",\n \"description\": \"Implements JWT auth flow\",\n \"state\": \"merged\",\n \"draft\": false,\n \"work_in_progress\": false,\n \"source_branch\": \"feature/auth\",\n \"target_branch\": \"main\",\n \"sha\": \"abc123def456\",\n \"references\": { \"short\": \"\\!42\", \"full\": \"group/project\\!42\" },\n \"detailed_merge_status\": \"mergeable\",\n \"merge_status\": \"can_be_merged\",\n \"created_at\": \"2024-01-15T10:00:00Z\",\n \"updated_at\": \"2024-01-20T14:30:00Z\",\n \"merged_at\": \"2024-01-20T14:30:00Z\",\n \"closed_at\": null,\n \"author\": { \"id\": 1, \"username\": \"johndoe\", \"name\": \"John Doe\" },\n \"merge_user\": { \"id\": 2, \"username\": \"janedoe\", \"name\": \"Jane Doe\" },\n \"merged_by\": { \"id\": 2, \"username\": \"janedoe\", \"name\": \"Jane Doe\" },\n \"labels\": [\"enhancement\", \"auth\"],\n \"assignees\": [{ \"id\": 3, \"username\": \"bob\", \"name\": \"Bob Smith\" }],\n \"reviewers\": [{ \"id\": 4, \"username\": \"alice\", \"name\": \"Alice Wong\" }],\n \"web_url\": \"https://gitlab.example.com/group/project/-/merge_requests/42\"\n}\n```\n\n## Edge Cases\n- `locked` state is transitional (merge in progress) - rare but valid\n- Some older instances may not return `detailed_merge_status`\n- Some older instances may not return `merge_user` (use `merged_by` fallback)\n- `work_in_progress` is deprecated but still returned by some instances","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:40.498088Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:08:35.520229Z","closed_at":"2026-01-27T00:08:35.520167Z","close_reason":"Added GitLabMergeRequest, GitLabReviewer, GitLabReferences structs. Updated GitLabNotePosition with position_type, line_range, and SHA triplet fields. All 23 type tests passing.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-5ta","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-01-26T22:08:53.981911Z","created_by":"tayloreernisse"}]} {"id":"bd-88m","title":"[CP1] Issue ingestion module","description":"Fetch and store issues with cursor-based incremental sync.\n\n## Module\nsrc/ingestion/issues.rs\n\n## Key Structs\n\n### IngestIssuesResult\n- fetched: usize\n- upserted: usize\n- labels_created: usize\n- issues_needing_discussion_sync: Vec\n\n### IssueForDiscussionSync\n- local_issue_id: i64\n- iid: i64\n- updated_at: i64\n\n## Main Function\npub async fn ingest_issues(conn, client, config, project_id, gitlab_project_id) -> Result\n\n## Logic\n1. Get current cursor from sync_cursors (updated_at_cursor, tie_breaker_id)\n2. Paginate through issues updated after cursor with cursor_rewind_seconds\n3. Apply local filtering for tuple cursor semantics:\n - Skip if issue.updated_at < cursor_updated_at\n - Skip if issue.updated_at == cursor_updated_at AND issue.id <= cursor_gitlab_id\n4. For each issue passing filter:\n - Begin transaction\n - Store raw payload (compressed)\n - Transform and upsert issue\n - Clear existing label links (DELETE FROM issue_labels)\n - Extract and upsert labels\n - Link issue to labels via junction\n - Commit transaction\n - Track for discussion sync eligibility\n5. Incremental cursor update every 100 issues\n6. Final cursor update\n7. Determine issues needing discussion sync: where updated_at > discussions_synced_for_updated_at\n\n## Helper Functions\n- get_cursor(conn, project_id) -> (Option, Option)\n- get_discussions_synced_at(conn, issue_id) -> Option\n- upsert_issue(conn, issue, payload_id) -> usize\n- get_local_issue_id(conn, gitlab_id) -> i64\n- clear_issue_labels(conn, issue_id)\n- upsert_label(conn, label) -> bool\n- get_label_id(conn, project_id, name) -> i64\n- link_issue_label(conn, issue_id, label_id)\n- update_cursor(conn, project_id, resource_type, updated_at, gitlab_id)\n\nFiles: src/ingestion/mod.rs, src/ingestion/issues.rs\nTests: tests/issue_ingestion_tests.rs\nDone when: Issues, labels, issue_labels populated correctly with resumable cursor","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:57:35.655708Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.806982Z","deleted_at":"2026-01-25T17:02:01.806977Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-9av","title":"[CP1] gi sync-status enhancement","description":"Enhance sync-status from CP0 stub to show issue cursors.\n\n## Changes to src/cli/commands/sync_status.rs\n\nUpdate the existing stub to show:\n- Last run timestamp and duration\n- Cursor positions per project (issues resource_type)\n- Entity counts (issues, discussions, notes)\n\n## Output Format\nLast sync: 2026-01-25 10:30:00 (succeeded, 45s)\n\nCursors:\n group/project-one\n issues: 2026-01-25T10:25:00Z (gitlab_id: 12345678)\n\nCounts:\n Issues: 1,234\n Discussions: 5,678\n Notes: 23,456 (4,567 system)\n\nFiles: src/cli/commands/sync_status.rs\nDone when: Shows cursor positions and counts after ingestion","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:58:27.246825Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.968507Z","deleted_at":"2026-01-25T17:02:01.968503Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-cbo","title":"[CP1] Cargo.toml updates - async-stream and futures","description":"Add required dependencies for async pagination streams.\n\n## Changes\nAdd to Cargo.toml:\n- async-stream = \"0.3\"\n- futures = \"0.3\"\n\n## Why\nThe pagination methods use async generators which require async-stream crate.\nfutures crate provides StreamExt for consuming the streams.\n\n## Done When\n- cargo check passes with new deps\n- No unused dependency warnings\n\nFiles: Cargo.toml","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:42:31.143927Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.661666Z","deleted_at":"2026-01-25T17:02:01.661662Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} @@ -56,10 +68,14 @@ {"id":"bd-epj","title":"[CP0] Config loading with Zod validation","description":"## Background\n\nConfig loading is critical infrastructure - every CLI command needs the config. Uses Zod for schema validation with sensible defaults. Must handle missing files gracefully with typed errors.\n\nReference: docs/prd/checkpoint-0.md sections \"Configuration Schema\", \"Config Resolution Order\"\n\n## Approach\n\n**src/core/config.ts:**\n```typescript\nimport { z } from 'zod';\nimport { readFileSync } from 'node:fs';\nimport { ConfigNotFoundError, ConfigValidationError } from './errors';\nimport { getConfigPath } from './paths';\n\nexport const ConfigSchema = z.object({\n gitlab: z.object({\n baseUrl: z.string().url(),\n tokenEnvVar: z.string().default('GITLAB_TOKEN'),\n }),\n projects: z.array(z.object({\n path: z.string().min(1),\n })).min(1),\n sync: z.object({\n backfillDays: z.number().int().positive().default(14),\n staleLockMinutes: z.number().int().positive().default(10),\n heartbeatIntervalSeconds: z.number().int().positive().default(30),\n cursorRewindSeconds: z.number().int().nonnegative().default(2),\n primaryConcurrency: z.number().int().positive().default(4),\n dependentConcurrency: z.number().int().positive().default(2),\n }).default({}),\n storage: z.object({\n dbPath: z.string().optional(),\n backupDir: z.string().optional(),\n compressRawPayloads: z.boolean().default(true),\n }).default({}),\n embedding: z.object({\n provider: z.literal('ollama').default('ollama'),\n model: z.string().default('nomic-embed-text'),\n baseUrl: z.string().url().default('http://localhost:11434'),\n concurrency: z.number().int().positive().default(4),\n }).default({}),\n});\n\nexport type Config = z.infer;\n\nexport function loadConfig(cliOverride?: string): Config {\n const path = getConfigPath(cliOverride);\n // throws ConfigNotFoundError if missing\n // throws ConfigValidationError if invalid\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `loadConfig()` returns validated Config object\n- [ ] `loadConfig()` throws ConfigNotFoundError if file missing\n- [ ] `loadConfig()` throws ConfigValidationError with Zod errors if invalid\n- [ ] Empty optional fields get default values\n- [ ] projects array must have at least 1 item\n- [ ] gitlab.baseUrl must be valid URL\n- [ ] All number fields must be positive integers\n- [ ] tests/unit/config.test.ts passes (8 tests)\n\n## Files\n\nCREATE:\n- src/core/config.ts\n- tests/unit/config.test.ts\n- tests/fixtures/mock-responses/valid-config.json\n- tests/fixtures/mock-responses/invalid-config.json\n\n## TDD Loop\n\nRED:\n```typescript\n// tests/unit/config.test.ts\ndescribe('Config', () => {\n it('loads config from file path')\n it('throws ConfigNotFoundError if file missing')\n it('throws ConfigValidationError if required fields missing')\n it('validates project paths are non-empty strings')\n it('applies default values for optional fields')\n it('loads from XDG path by default')\n it('respects GI_CONFIG_PATH override')\n it('respects --config flag override')\n})\n```\n\nGREEN: Implement loadConfig() function\n\nVERIFY: `npm run test -- tests/unit/config.test.ts`\n\n## Edge Cases\n\n- JSON parse error should wrap in ConfigValidationError\n- Zod error messages should be human-readable\n- File exists but empty → ConfigValidationError\n- File has extra fields → should pass (Zod strips by default)","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:49.091078Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:04:32.592139Z","closed_at":"2026-01-25T03:04:32.592003Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-epj","depends_on_id":"bd-gg1","type":"blocks","created_at":"2026-01-24T16:13:07.835800Z","created_by":"tayloreernisse"}]} {"id":"bd-gg1","title":"[CP0] Core utilities - paths, time, errors, logger","description":"## Background\n\nCore utilities provide the foundation for all other modules. Path resolution enables XDG-compliant config/data locations. Time utilities ensure consistent timestamp handling (ms epoch for DB, ISO for API). Error classes provide typed exceptions for clean error handling. Logger provides structured logging to stderr.\n\nReference: docs/prd/checkpoint-0.md sections \"Config + Data Locations\", \"Timestamp Convention\", \"Error Classes\", \"Logging Configuration\"\n\n## Approach\n\n**src/core/paths.ts:**\n- `getConfigPath(cliOverride?)`: resolution order is CLI flag → GI_CONFIG_PATH env → XDG default → local fallback\n- `getDataDir()`: uses XDG_DATA_HOME or ~/.local/share/gi\n- `getDbPath(configOverride?)`: returns data dir + data.db\n- `getBackupDir(configOverride?)`: returns data dir + backups/\n\n**src/core/time.ts:**\n- `isoToMs(isoString)`: converts GitLab API ISO 8601 → ms epoch\n- `msToIso(ms)`: converts ms epoch → ISO 8601\n- `nowMs()`: returns Date.now() for DB storage\n\n**src/core/errors.ts:**\nError hierarchy (all extend GiError base class with code and cause):\n- ConfigNotFoundError, ConfigValidationError\n- GitLabAuthError, GitLabNotFoundError, GitLabRateLimitError, GitLabNetworkError\n- DatabaseLockError, MigrationError\n- TokenNotSetError\n\n**src/core/logger.ts:**\n- pino logger to stderr (fd 2) with pino-pretty in dev\n- Child loggers: dbLogger, gitlabLogger, configLogger\n- LOG_LEVEL env var support (default: info)\n\n## Acceptance Criteria\n\n- [ ] `getConfigPath()` returns ~/.config/gi/config.json when no overrides\n- [ ] `getConfigPath()` respects GI_CONFIG_PATH env var\n- [ ] `getConfigPath(\"./custom.json\")` returns \"./custom.json\"\n- [ ] `isoToMs(\"2024-01-27T00:00:00.000Z\")` returns 1706313600000\n- [ ] `msToIso(1706313600000)` returns \"2024-01-27T00:00:00.000Z\"\n- [ ] All error classes have correct code property\n- [ ] Logger outputs to stderr (not stdout)\n- [ ] tests/unit/paths.test.ts passes\n- [ ] tests/unit/errors.test.ts passes\n\n## Files\n\nCREATE:\n- src/core/paths.ts\n- src/core/time.ts\n- src/core/errors.ts\n- src/core/logger.ts\n- tests/unit/paths.test.ts\n- tests/unit/errors.test.ts\n\n## TDD Loop\n\nRED: Write tests first\n```typescript\n// tests/unit/paths.test.ts\ndescribe('getConfigPath', () => {\n it('uses XDG_CONFIG_HOME if set')\n it('falls back to ~/.config/gi if XDG not set')\n it('prefers --config flag over environment')\n it('prefers environment over XDG default')\n it('falls back to local gi.config.json in dev')\n})\n```\n\nGREEN: Implement paths.ts, errors.ts, time.ts, logger.ts\n\nVERIFY: `npm run test -- tests/unit/paths.test.ts tests/unit/errors.test.ts`\n\n## Edge Cases\n\n- XDG_CONFIG_HOME may not exist - don't create, just return path\n- existsSync() check for local fallback - only return if file exists\n- Time conversion must handle timezone edge cases - always use UTC\n- Logger must work even if pino-pretty not installed (production)","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:48.604382Z","created_by":"tayloreernisse","updated_at":"2026-01-25T02:53:26.527997Z","closed_at":"2026-01-25T02:53:26.527862Z","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-gg1","depends_on_id":"bd-327","type":"blocks","created_at":"2026-01-24T16:13:07.368187Z","created_by":"tayloreernisse"}]} {"id":"bd-hbo","title":"[CP1] Discussion ingestion module","description":"## Background\n\nDiscussion ingestion fetches all discussions and notes for a single issue. It is called as part of dependent sync - only for issues whose `updated_at` has advanced beyond `discussions_synced_for_updated_at`. After successful sync, it updates the watermark to prevent redundant refetches.\n\n## Approach\n\n### Module: src/ingestion/discussions.rs\n\n### Key Structs\n\n```rust\n#[derive(Debug, Default)]\npub struct IngestDiscussionsResult {\n pub discussions_fetched: usize,\n pub discussions_upserted: usize,\n pub notes_upserted: usize,\n pub system_notes_count: usize,\n}\n```\n\n### Main Function\n\n```rust\npub async fn ingest_issue_discussions(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n project_id: i64, // Local DB project ID\n gitlab_project_id: i64, // GitLab project ID\n issue_iid: i64,\n local_issue_id: i64,\n issue_updated_at: i64, // For watermark update\n) -> Result\n```\n\n### Logic\n\n1. Stream discussions via `client.paginate_issue_discussions()`\n2. For each discussion:\n - Begin transaction\n - Store raw payload (compressed based on config)\n - Transform to NormalizedDiscussion\n - Upsert discussion\n - Get local discussion ID\n - Transform notes via `transform_notes()`\n - For each note: store raw payload, upsert note\n - Track system_notes_count\n - Commit transaction\n3. After all discussions processed: `mark_discussions_synced(conn, local_issue_id, issue_updated_at)`\n\n### Helper Functions\n\n```rust\nfn upsert_discussion(conn, discussion, payload_id) -> Result<()>\nfn get_local_discussion_id(conn, project_id, gitlab_id) -> Result\nfn upsert_note(conn, discussion_id, note, payload_id) -> Result<()>\nfn mark_discussions_synced(conn, issue_id, issue_updated_at) -> Result<()>\n```\n\n### Critical Invariant\n\n`discussions_synced_for_updated_at` MUST be updated only AFTER all discussions are successfully synced. This watermark prevents redundant refetches on subsequent runs.\n\n## Acceptance Criteria\n\n- [ ] `ingest_issue_discussions` streams all discussions for an issue\n- [ ] Each discussion wrapped in transaction for atomicity\n- [ ] Raw payloads stored for discussions and notes\n- [ ] `discussions_synced_for_updated_at` updated after successful sync\n- [ ] System notes tracked in result.system_notes_count\n- [ ] Notes linked to correct discussion via local discussion ID\n\n## Files\n\n- src/ingestion/mod.rs (add `pub mod discussions;`)\n- src/ingestion/discussions.rs (create)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/discussion_watermark_tests.rs\n#[tokio::test] async fn fetches_discussions_when_updated_at_advanced()\n#[tokio::test] async fn updates_watermark_after_successful_discussion_sync()\n#[tokio::test] async fn does_not_update_watermark_on_discussion_sync_failure()\n#[tokio::test] async fn stores_raw_payload_for_each_discussion()\n#[tokio::test] async fn stores_raw_payload_for_each_note()\n```\n\nGREEN: Implement ingest_issue_discussions with watermark logic\n\nVERIFY: `cargo test discussion_watermark`\n\n## Edge Cases\n\n- Issue with 0 discussions - mark synced anyway (empty is valid)\n- Discussion with 0 notes - should not happen per GitLab API (discussions always have >= 1 note)\n- Network failure mid-sync - watermark NOT updated, next run retries\n- individual_note=true discussions - have exactly 1 note","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.267582Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:52:47.500700Z","closed_at":"2026-01-25T22:52:47.500644Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-hbo","depends_on_id":"bd-1qf","type":"blocks","created_at":"2026-01-25T17:04:05.534265Z","created_by":"tayloreernisse"},{"issue_id":"bd-hbo","depends_on_id":"bd-2iq","type":"blocks","created_at":"2026-01-25T17:04:05.499474Z","created_by":"tayloreernisse"},{"issue_id":"bd-hbo","depends_on_id":"bd-xhz","type":"blocks","created_at":"2026-01-25T17:04:05.559260Z","created_by":"tayloreernisse"}]} +{"id":"bd-iba","title":"Add GitLab client MR pagination methods","description":"## Background\nGitLab client pagination for merge requests and discussions. Must support robust pagination with fallback chain because some GitLab instances/proxies strip headers.\n\n## Approach\nAdd to existing `src/gitlab/client.rs`:\n1. `MergeRequestPage` struct - Items + pagination metadata\n2. `parse_link_header_next()` - RFC 8288 Link header parsing\n3. `fetch_merge_requests_page()` - Single page fetch with metadata\n4. `paginate_merge_requests()` - Async stream for all MRs\n5. `paginate_mr_discussions()` - Async stream for MR discussions\n\n## Files\n- `src/gitlab/client.rs` - Add pagination methods\n\n## Acceptance Criteria\n- [ ] `MergeRequestPage` struct exists with `items`, `next_page`, `is_last_page`\n- [ ] `parse_link_header_next()` extracts `rel=\"next\"` URL from Link header\n- [ ] Pagination fallback chain: Link header > x-next-page > full-page heuristic\n- [ ] `paginate_merge_requests()` returns `Pin>>>`\n- [ ] `paginate_mr_discussions()` returns `Pin>>>`\n- [ ] MR endpoint uses `scope=all&state=all` to include all MRs\n- [ ] `cargo test client` passes\n\n## TDD Loop\nRED: `cargo test fetch_merge_requests` -> method not found\nGREEN: Add pagination methods\nVERIFY: `cargo test client`\n\n## Struct Definitions\n```rust\n#[derive(Debug)]\npub struct MergeRequestPage {\n pub items: Vec,\n pub next_page: Option,\n pub is_last_page: bool,\n}\n```\n\n## Link Header Parsing (RFC 8288)\n```rust\n/// Parse Link header to extract rel=\"next\" URL.\nfn parse_link_header_next(headers: &reqwest::header::HeaderMap) -> Option {\n headers\n .get(\"link\")\n .and_then(|v| v.to_str().ok())\n .and_then(|link_str| {\n // Format: ; rel=\"next\", ; rel=\"last\"\n for part in link_str.split(',') {\n let part = part.trim();\n if part.contains(\"rel=\\\"next\\\"\") || part.contains(\"rel=next\") {\n if let Some(start) = part.find('<') {\n if let Some(end) = part.find('>') {\n return Some(part[start + 1..end].to_string());\n }\n }\n }\n }\n None\n })\n}\n```\n\n## Pagination Fallback Chain\n```rust\nlet next_page = match (link_next, x_next_page, items.len() as u32 == per_page) {\n (Some(_), _, _) => Some(page + 1), // Link header present: continue\n (None, Some(np), _) => Some(np), // x-next-page present: use it\n (None, None, true) => Some(page + 1), // Full page, no headers: try next\n (None, None, false) => None, // Partial page: we're done\n};\n```\n\n## Fetch Single Page\n```rust\npub async fn fetch_merge_requests_page(\n &self,\n gitlab_project_id: i64,\n updated_after: Option,\n cursor_rewind_seconds: u32,\n page: u32,\n per_page: u32,\n) -> Result {\n let mut params = vec![\n (\"scope\", \"all\".to_string()),\n (\"state\", \"all\".to_string()),\n (\"order_by\", \"updated_at\".to_string()),\n (\"sort\", \"asc\".to_string()),\n (\"per_page\", per_page.to_string()),\n (\"page\", page.to_string()),\n ];\n // Apply cursor rewind for safety\n // ...\n}\n```\n\n## Async Stream Pattern\n```rust\npub fn paginate_merge_requests(\n &self,\n gitlab_project_id: i64,\n updated_after: Option,\n cursor_rewind_seconds: u32,\n) -> Pin> + Send + '_>> {\n Box::pin(async_stream::try_stream! {\n let mut page = 1u32;\n let per_page = 100u32;\n loop {\n let page_result = self.fetch_merge_requests_page(...).await?;\n for mr in page_result.items {\n yield mr;\n }\n if page_result.is_last_page {\n break;\n }\n match page_result.next_page {\n Some(np) => page = np,\n None => break,\n }\n }\n })\n}\n```\n\n## Edge Cases\n- `scope=all` required to include all MRs (not just authored by current user)\n- `state=all` required to include merged/closed (GitLab defaults may exclude)\n- `locked` state cannot be filtered server-side (use local SQL filtering)\n- Cursor rewind should clamp to 0 to avoid negative timestamps","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:41.633065Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:13:05.613625Z","closed_at":"2026-01-27T00:13:05.613440Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-iba","depends_on_id":"bd-5ta","type":"blocks","created_at":"2026-01-26T22:08:54.364647Z","created_by":"tayloreernisse"}]} {"id":"bd-jov","title":"[CP1] Discussion and note transformers","description":"Transform GitLab discussion/note payloads to normalized database schema.\n\n## Module\nsrc/gitlab/transformers/discussion.rs\n\n## Structs\n\n### NormalizedDiscussion\n- gitlab_discussion_id: String\n- project_id: i64\n- issue_id: i64\n- noteable_type: String (\"Issue\")\n- individual_note: bool\n- first_note_at, last_note_at: Option\n- last_seen_at: i64\n- resolvable, resolved: bool\n\n### NormalizedNote\n- gitlab_id: i64\n- project_id: i64\n- note_type: Option\n- is_system: bool\n- author_username: String\n- body: String\n- created_at, updated_at, last_seen_at: i64\n- position: i32 (array index in notes[])\n- resolvable, resolved: bool\n- resolved_by: Option\n- resolved_at: Option\n\n## Functions\n\n### transform_discussion(gitlab_discussion, local_project_id, local_issue_id) -> NormalizedDiscussion\n- Compute first_note_at/last_note_at from notes array min/max created_at\n- Compute resolvable (any note resolvable)\n- Compute resolved (resolvable AND all resolvable notes resolved)\n\n### transform_notes(gitlab_discussion, local_project_id) -> Vec\n- Enumerate notes to get position (array index)\n- Set is_system from note.system\n- Convert timestamps to ms epoch\n\nFiles: src/gitlab/transformers/discussion.rs\nTests: tests/discussion_transformer_tests.rs\nDone when: Unit tests pass for discussion/note transformation with system note flagging","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:43:04.481361Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.759691Z","deleted_at":"2026-01-25T17:02:01.759684Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-k7b","title":"[CP1] gi show issue command","description":"Show issue details with discussions.\n\n## Module\nsrc/cli/commands/show.rs\n\n## Clap Definition\nShow {\n #[arg(value_parser = [\"issue\", \"mr\"])]\n entity: String,\n \n iid: i64,\n \n #[arg(long)]\n project: Option,\n}\n\n## Output Format\nIssue #1234: Authentication redesign\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\nProject: group/project-one\nState: opened\nAuthor: @johndoe\nCreated: 2024-01-15\nUpdated: 2024-03-20\nLabels: enhancement, auth\nURL: https://gitlab.example.com/group/project-one/-/issues/1234\n\nDescription:\n We need to redesign the authentication flow to support...\n\nDiscussions (5):\n\n @janedoe (2024-01-16):\n I agree we should move to JWT-based auth...\n\n @johndoe (2024-01-16):\n What about refresh token strategy?\n\n @bobsmith (2024-01-17):\n Have we considered OAuth2?\n\n## Ambiguity Handling\nIf multiple projects have same iid, either:\n- Prompt for --project flag\n- Show error listing which projects have that iid\n\nFiles: src/cli/commands/show.rs\nDone when: Issue detail view displays all fields including threaded discussions","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:58:26.904813Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.944183Z","deleted_at":"2026-01-25T17:02:01.944179Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} +{"id":"bd-lcb","title":"Epic: CP2 Gate E - CLI Complete","description":"## Background\nGate E validates all CLI commands are functional and user-friendly. This is the final usability gate - even if all data is correct, users need good CLI UX to access it.\n\n## Acceptance Criteria (Pass/Fail)\n\n### List Command\n- [ ] `gi list mrs` shows MR table with columns: iid, title, state, author, branches, updated\n- [ ] `gi list mrs --state=opened` filters to only opened MRs\n- [ ] `gi list mrs --state=merged` filters to only merged MRs\n- [ ] `gi list mrs --state=closed` filters to only closed MRs\n- [ ] `gi list mrs --state=locked` filters locally (not server-side filter)\n- [ ] `gi list mrs --draft` shows only draft MRs\n- [ ] `gi list mrs --no-draft` excludes draft MRs\n- [ ] Draft MRs show `[DRAFT]` prefix in title column\n- [ ] `gi list mrs --author=username` filters by author\n- [ ] `gi list mrs --assignee=username` filters by assignee\n- [ ] `gi list mrs --reviewer=username` filters by reviewer\n- [ ] `gi list mrs --target-branch=main` filters by target branch\n- [ ] `gi list mrs --source-branch=feature/x` filters by source branch\n- [ ] `gi list mrs --label=bugfix` filters by label\n- [ ] `gi list mrs --limit=N` limits output\n\n### Show Command\n- [ ] `gi show mr ` displays full MR detail\n- [ ] Show includes: title, description, state, draft status, author\n- [ ] Show includes: assignees, reviewers, labels\n- [ ] Show includes: source_branch, target_branch\n- [ ] Show includes: detailed_merge_status (e.g., \"mergeable\")\n- [ ] Show includes: merge_user and merged_at for merged MRs\n- [ ] Show includes: discussions with author and date\n- [ ] DiffNote shows file context: `[src/file.ts:45]`\n- [ ] Multi-line DiffNote shows range: `[src/file.ts:45-48]`\n- [ ] Resolved discussions show `[RESOLVED]` marker\n\n### Count Command\n- [ ] `gi count mrs` shows total count\n- [ ] Count shows state breakdown: opened, merged, closed\n\n### Sync Status\n- [ ] `gi sync-status` shows MR cursor position\n- [ ] Sync status shows last sync timestamp\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate E: CLI Complete ===\"\n\n# 1. Test list command (basic)\necho \"Step 1: Basic list...\"\ngi list mrs --limit=5 || { echo \"FAIL: list mrs failed\"; exit 1; }\n\n# 2. Test state filters\necho \"Step 2: State filters...\"\nfor state in opened merged closed; do\n echo \" Testing --state=$state\"\n gi list mrs --state=$state --limit=3 || echo \" Warning: No $state MRs\"\ndone\n\n# 3. Test draft filters\necho \"Step 3: Draft filters...\"\ngi list mrs --draft --limit=3 || echo \" Note: No draft MRs found\"\ngi list mrs --no-draft --limit=3 || echo \" Note: All MRs are drafts?\"\n\n# 4. Check [DRAFT] prefix\necho \"Step 4: Check [DRAFT] prefix...\"\nDRAFT_IID=$(sqlite3 \"$DB_PATH\" \"SELECT iid FROM merge_requests WHERE draft = 1 LIMIT 1;\")\nif [ -n \"$DRAFT_IID\" ]; then\n if gi list mrs --limit=100 | grep -q \"\\[DRAFT\\]\"; then\n echo \" PASS: [DRAFT] prefix found\"\n else\n echo \" FAIL: Draft MR exists but no [DRAFT] prefix in output\"\n fi\nelse\n echo \" Skip: No draft MRs to test\"\nfi\n\n# 5. Test author/assignee/reviewer filters\necho \"Step 5: User filters...\"\nAUTHOR=$(sqlite3 \"$DB_PATH\" \"SELECT author_username FROM merge_requests LIMIT 1;\")\nif [ -n \"$AUTHOR\" ]; then\n echo \" Testing --author=$AUTHOR\"\n gi list mrs --author=\"$AUTHOR\" --limit=3\nfi\n\nREVIEWER=$(sqlite3 \"$DB_PATH\" \"SELECT username FROM mr_reviewers LIMIT 1;\")\nif [ -n \"$REVIEWER\" ]; then\n echo \" Testing --reviewer=$REVIEWER\"\n gi list mrs --reviewer=\"$REVIEWER\" --limit=3\nfi\n\n# 6. Test branch filters\necho \"Step 6: Branch filters...\"\nTARGET=$(sqlite3 \"$DB_PATH\" \"SELECT target_branch FROM merge_requests LIMIT 1;\")\nif [ -n \"$TARGET\" ]; then\n echo \" Testing --target-branch=$TARGET\"\n gi list mrs --target-branch=\"$TARGET\" --limit=3\nfi\n\n# 7. Test show command\necho \"Step 7: Show command...\"\nMR_IID=$(sqlite3 \"$DB_PATH\" \"SELECT iid FROM merge_requests LIMIT 1;\")\ngi show mr \"$MR_IID\" || { echo \"FAIL: show mr failed\"; exit 1; }\n\n# 8. Test show with DiffNote context\necho \"Step 8: Show with DiffNote...\"\nDIFFNOTE_MR=$(sqlite3 \"$DB_PATH\" \"\n SELECT DISTINCT m.iid\n FROM merge_requests m\n JOIN discussions d ON d.merge_request_id = m.id\n JOIN notes n ON n.discussion_id = d.id\n WHERE n.position_new_path IS NOT NULL\n LIMIT 1;\n\")\nif [ -n \"$DIFFNOTE_MR\" ]; then\n echo \" Testing MR with DiffNotes: !$DIFFNOTE_MR\"\n OUTPUT=$(gi show mr \"$DIFFNOTE_MR\")\n if echo \"$OUTPUT\" | grep -qE '\\[[^]]+:[0-9]+\\]'; then\n echo \" PASS: File context [path:line] found\"\n else\n echo \" FAIL: DiffNote should show [path:line] context\"\n fi\nelse\n echo \" Skip: No MRs with DiffNotes\"\nfi\n\n# 9. Test count command\necho \"Step 9: Count command...\"\ngi count mrs || { echo \"FAIL: count mrs failed\"; exit 1; }\n\n# 10. Test sync-status\necho \"Step 10: Sync status...\"\ngi sync-status || echo \" Note: sync-status may need implementation\"\n\necho \"\"\necho \"=== Gate E: PASSED ===\"\n```\n\n## Test Commands (Quick Verification)\n```bash\n# List with all column types visible:\ngi list mrs --limit=10\n\n# Show a specific MR:\ngi show mr 42\n\n# Count with breakdown:\ngi count mrs\n\n# Complex filter:\ngi list mrs --state=opened --reviewer=alice --target-branch=main --limit=5\n```\n\n## Expected Output Formats\n\n### gi list mrs\n```\nMerge Requests (showing 5 of 1,234)\n\n !847 Refactor auth to use JWT tokens merged @johndoe main <- feature/jwt 3d ago\n !846 Fix memory leak in websocket handler opened @janedoe main <- fix/websocket 5d ago\n !845 [DRAFT] Add dark mode CSS variables opened @bobsmith main <- ui/dark-mode 1w ago\n !844 Update dependencies to latest versions closed @alice main <- chore/deps 2w ago\n```\n\n### gi show mr 847\n```\nMerge Request !847: Refactor auth to use JWT tokens\n================================================================================\n\nProject: group/project-one\nState: merged\nDraft: No\nAuthor: @johndoe\nAssignees: @janedoe, @bobsmith\nReviewers: @alice, @charlie\nLabels: enhancement, auth, reviewed\nSource: feature/jwt\nTarget: main\nMerge Status: merged\nMerged By: @alice\nMerged At: 2024-03-20 14:30:00\n\nDescription:\n Moving away from session cookies to JWT-based authentication...\n\nDiscussions (3):\n\n @janedoe (2024-03-16) [src/auth/jwt.ts:45]:\n Should we use a separate signing key for refresh tokens?\n\n @johndoe (2024-03-16):\n Good point. I'll add a separate key with rotation support.\n\n @alice (2024-03-18) [RESOLVED]:\n Looks good! Just one nit about the token expiry constant.\n```\n\n### gi count mrs\n```\nMerge Requests: 1,234\n opened: 89\n merged: 1,045\n closed: 100\n```\n\n## Dependencies\nThis gate requires:\n- bd-3js (CLI commands implementation)\n- All previous gates must pass first\n\n## Edge Cases\n- Ambiguous MR iid across projects: should prompt for `--project` or show error\n- Very long titles: should truncate with `...` in list view\n- Empty description: should show \"No description\" or empty section\n- No discussions: should show \"No discussions\" message\n- Unicode in titles/descriptions: should render correctly","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:02.411132Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.061166Z","closed_at":"2026-01-27T00:48:21.061125Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-lcb","depends_on_id":"bd-3js","type":"blocks","created_at":"2026-01-26T22:08:55.957747Z","created_by":"tayloreernisse"}]} +{"id":"bd-mk3","title":"Update ingest command for merge_requests type","description":"## Background\nCLI entry point for MR ingestion. Routes `--type=merge_requests` to the orchestrator. Must ensure `--full` resets both MR cursor AND discussion watermarks. This is the user-facing command that kicks off the entire MR sync pipeline.\n\n## Approach\nUpdate `src/cli/commands/ingest.rs` to handle `merge_requests` type:\n1. Add `merge_requests` branch to the resource type match statement\n2. Validate resource type early with helpful error message\n3. Pass `full` flag through to orchestrator (it handles the watermark reset internally)\n\n## Files\n- `src/cli/commands/ingest.rs` - Add merge_requests branch to `run_ingest`\n\n## Acceptance Criteria\n- [ ] `gi ingest --type=merge_requests` runs MR ingestion successfully\n- [ ] `gi ingest --type=merge_requests --full` resets cursor AND discussion watermarks\n- [ ] `gi ingest --type=invalid` returns helpful error listing valid types\n- [ ] Progress output shows MR counts, discussion counts, and skip counts\n- [ ] Default type remains `issues` for backward compatibility\n- [ ] `cargo test ingest_command` passes\n\n## TDD Loop\nRED: `gi ingest --type=merge_requests` -> \"invalid type: merge_requests\"\nGREEN: Add merge_requests to match statement in run_ingest\nVERIFY: `gi ingest --type=merge_requests --help` shows merge_requests as valid\n\n## Function Signature\n```rust\npub async fn run_ingest(\n config: &Config,\n args: &IngestArgs,\n) -> Result<(), GiError>\n```\n\n## IngestArgs Reference (existing)\n```rust\n#[derive(Parser, Debug)]\npub struct IngestArgs {\n /// Resource type to ingest\n #[arg(long, short = 't', default_value = \"issues\")]\n pub r#type: String,\n \n /// Filter to specific project (by path or ID)\n #[arg(long, short = 'p')]\n pub project: Option,\n \n /// Force run even if another ingest is in progress\n #[arg(long, short = 'f')]\n pub force: bool,\n \n /// Full sync - reset cursor and refetch all\n #[arg(long)]\n pub full: bool,\n}\n```\n\n## Code Change\n```rust\nuse crate::core::errors::GiError;\nuse crate::ingestion::orchestrator::Orchestrator;\n\npub async fn run_ingest(\n config: &Config,\n args: &IngestArgs,\n) -> Result<(), GiError> {\n let resource_type = args.r#type.as_str();\n \n // Validate resource type early\n match resource_type {\n \"issues\" | \"merge_requests\" => {}\n _ => {\n return Err(GiError::InvalidArgument {\n name: \"type\".to_string(),\n value: resource_type.to_string(),\n expected: \"issues or merge_requests\".to_string(),\n });\n }\n }\n \n // Acquire single-flight lock (unless --force)\n if !args.force {\n acquire_ingest_lock(config, resource_type)?;\n }\n \n // Get projects to ingest (filtered if --project specified)\n let projects = get_projects_to_ingest(config, args.project.as_deref())?;\n \n for project in projects {\n println!(\"Ingesting {} for {}...\", resource_type, project.path);\n \n let orchestrator = Orchestrator::new(\n &config,\n project.id,\n project.gitlab_id,\n )?;\n \n let result = orchestrator.run_ingestion(resource_type, args.full).await?;\n \n // Print results based on resource type\n match resource_type {\n \"issues\" => {\n println!(\" {}: {} issues fetched, {} upserted\",\n project.path, result.issues_fetched, result.issues_upserted);\n }\n \"merge_requests\" => {\n println!(\" {}: {} MRs fetched, {} new labels, {} assignees, {} reviewers\",\n project.path,\n result.mrs_fetched,\n result.labels_created,\n result.assignees_linked,\n result.reviewers_linked,\n );\n println!(\" Discussions: {} synced, {} notes ({} DiffNotes)\",\n result.discussions_synced,\n result.notes_synced,\n result.diffnotes_count,\n );\n if result.mrs_skipped_discussion_sync > 0 {\n println!(\" Skipped discussion sync for {} unchanged MRs\",\n result.mrs_skipped_discussion_sync);\n }\n if result.failed_discussion_syncs > 0 {\n eprintln!(\" Warning: {} MRs failed discussion sync (will retry next run)\",\n result.failed_discussion_syncs);\n }\n }\n _ => unreachable!(),\n }\n }\n \n // Release lock\n if !args.force {\n release_ingest_lock(config, resource_type)?;\n }\n \n Ok(())\n}\n```\n\n## Output Format\n```\nIngesting merge_requests for group/project-one...\n group/project-one: 567 MRs fetched, 12 new labels, 89 assignees, 45 reviewers\n Discussions: 456 synced, 1,234 notes (89 DiffNotes)\n Skipped discussion sync for 444 unchanged MRs\n\nTotal: 567 MRs, 456 discussions, 1,234 notes\n```\n\n## Full Sync Behavior\nWhen `--full` is passed:\n1. MR cursor reset to NULL (handled by `ingest_merge_requests` with `full_sync: true`)\n2. Discussion watermarks reset to NULL (handled by `reset_discussion_watermarks` called from ingestion)\n3. All MRs re-fetched from GitLab API\n4. All discussions re-fetched for every MR\n\n## Error Types (from GiError enum)\n```rust\n// In src/core/errors.rs\npub enum GiError {\n InvalidArgument {\n name: String,\n value: String,\n expected: String,\n },\n LockError {\n resource: String,\n message: String,\n },\n // ... other variants\n}\n```\n\n## Edge Cases\n- Default type is `issues` for backward compatibility with CP1\n- Project filter (`--project`) can limit to specific project by path or ID\n- Force flag (`--force`) bypasses single-flight lock for debugging\n- If no projects configured, return helpful error about running `gi project add` first\n- Empty project (no MRs): completes successfully with \"0 MRs fetched\"","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:43.034952Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:28:52.711235Z","closed_at":"2026-01-27T00:28:52.711166Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-mk3","depends_on_id":"bd-10f","type":"blocks","created_at":"2026-01-26T22:08:55.003544Z","created_by":"tayloreernisse"}]} {"id":"bd-o7b","title":"[CP1] gi show issue command","description":"## Background\n\nThe `gi show issue ` command displays detailed information about a single issue including metadata, description, labels, and all discussions with their notes. It provides a complete view similar to the GitLab web UI.\n\n## Approach\n\n### Module: src/cli/commands/show.rs\n\n### Clap Definition\n\n```rust\n#[derive(Args)]\npub struct ShowArgs {\n /// Entity type\n #[arg(value_parser = [\"issue\", \"mr\"])]\n pub entity: String,\n\n /// Entity IID\n pub iid: i64,\n\n /// Project path (required if ambiguous)\n #[arg(long)]\n pub project: Option,\n}\n```\n\n### Handler Function\n\n```rust\npub async fn handle_show(args: ShowArgs, conn: &Connection) -> Result<()>\n```\n\n### Logic (for entity=\"issue\")\n\n1. **Find issue**: Query by iid, optionally filtered by project\n - If multiple projects have same iid, require --project or error\n2. **Load metadata**: title, state, author, created_at, updated_at, web_url\n3. **Load labels**: JOIN through issue_labels to labels table\n4. **Load discussions**: All discussions for this issue\n5. **Load notes**: All notes for each discussion, ordered by position\n6. **Format output**: Rich display with sections\n\n### Output Format (matches PRD)\n\n```\nIssue #1234: Authentication redesign\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\nProject: group/project-one\nState: opened\nAuthor: @johndoe\nCreated: 2024-01-15\nUpdated: 2024-03-20\nLabels: enhancement, auth\nURL: https://gitlab.example.com/group/project-one/-/issues/1234\n\nDescription:\n We need to redesign the authentication flow to support...\n\nDiscussions (5):\n\n @janedoe (2024-01-16):\n I agree we should move to JWT-based auth...\n\n @johndoe (2024-01-16):\n What about refresh token strategy?\n\n @bobsmith (2024-01-17):\n Have we considered OAuth2?\n```\n\n### Queries\n\n```sql\n-- Find issue\nSELECT i.*, p.path as project_path\nFROM issues i\nJOIN projects p ON i.project_id = p.id\nWHERE i.iid = ? AND (p.path = ? OR ? IS NULL)\n\n-- Get labels\nSELECT l.name FROM labels l\nJOIN issue_labels il ON l.id = il.label_id\nWHERE il.issue_id = ?\n\n-- Get discussions with notes\nSELECT d.*, n.* FROM discussions d\nJOIN notes n ON d.id = n.discussion_id\nWHERE d.issue_id = ?\nORDER BY d.first_note_at, n.position\n```\n\n## Acceptance Criteria\n\n- [ ] Shows issue metadata (title, state, author, dates, URL)\n- [ ] Shows labels as comma-separated list\n- [ ] Shows description (truncated if very long)\n- [ ] Shows discussions grouped with notes indented\n- [ ] Handles --project filter correctly\n- [ ] Errors clearly if iid is ambiguous without --project\n\n## Files\n\n- src/cli/commands/mod.rs (add `pub mod show;`)\n- src/cli/commands/show.rs (create)\n- src/cli/mod.rs (add Show variant to Commands enum)\n\n## TDD Loop\n\nRED:\n```rust\n#[tokio::test] async fn show_issue_displays_metadata()\n#[tokio::test] async fn show_issue_displays_labels()\n#[tokio::test] async fn show_issue_displays_discussions()\n#[tokio::test] async fn show_issue_requires_project_when_ambiguous()\n```\n\nGREEN: Implement handler with queries and formatting\n\nVERIFY: `cargo test show_issue`\n\n## Edge Cases\n\n- Issue with no labels - show \"Labels: (none)\"\n- Issue with no discussions - show \"Discussions: (none)\"\n- Issue with very long description - truncate with \"...\"\n- System notes in discussions - filter out or show with [system] prefix\n- Individual notes (not threaded) - show without reply indentation","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-25T17:02:38.384702Z","created_by":"tayloreernisse","updated_at":"2026-01-25T23:05:25.688102Z","closed_at":"2026-01-25T23:05:25.688043Z","close_reason":"Implemented gi show issue command with metadata, labels, and discussions display","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-o7b","depends_on_id":"bd-208","type":"blocks","created_at":"2026-01-25T17:04:05.701560Z","created_by":"tayloreernisse"},{"issue_id":"bd-o7b","depends_on_id":"bd-hbo","type":"blocks","created_at":"2026-01-25T17:04:05.725767Z","created_by":"tayloreernisse"}]} {"id":"bd-ozy","title":"[CP1] Ingestion orchestrator","description":"## Background\n\nThe ingestion orchestrator coordinates issue sync followed by dependent discussion sync. It implements the CP1 canonical pattern: fetch issues, identify which need discussion sync (updated_at advanced), then execute discussion sync with bounded concurrency.\n\n## Approach\n\n### Module: src/ingestion/orchestrator.rs\n\n### Main Function\n\n```rust\npub async fn ingest_project_issues(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n project_id: i64, // Local DB project ID\n gitlab_project_id: i64,\n) -> Result\n\n#[derive(Debug, Default)]\npub struct IngestProjectResult {\n pub issues_fetched: usize,\n pub issues_upserted: usize,\n pub labels_created: usize,\n pub discussions_fetched: usize,\n pub notes_fetched: usize,\n pub system_notes_count: usize,\n pub issues_skipped_discussion_sync: usize,\n}\n```\n\n### Orchestration Steps\n\n1. **Call issue ingestion**: `ingest_issues(conn, client, config, project_id, gitlab_project_id)`\n2. **Get issues needing discussion sync**: From IngestIssuesResult.issues_needing_discussion_sync\n3. **Execute bounded discussion sync**:\n - Use `tokio::task::LocalSet` for single-threaded runtime\n - Respect `config.sync.dependent_concurrency` (default: 5)\n - For each IssueForDiscussionSync:\n - Call `ingest_issue_discussions(...)`\n - Aggregate results\n4. **Calculate skipped count**: total_issues - issues_needing_discussion_sync.len()\n\n### Bounded Concurrency Pattern\n\n```rust\nuse futures::stream::{self, StreamExt};\n\nlet local_set = LocalSet::new();\nlocal_set.run_until(async {\n stream::iter(issues_needing_sync)\n .map(|issue| async {\n ingest_issue_discussions(\n conn, client, config,\n project_id, gitlab_project_id,\n issue.iid, issue.local_issue_id, issue.updated_at,\n ).await\n })\n .buffer_unordered(config.sync.dependent_concurrency)\n .try_collect::>()\n .await\n}).await\n```\n\nNote: Single-threaded runtime means concurrency is I/O-bound, not parallel execution.\n\n## Acceptance Criteria\n\n- [ ] Orchestrator calls issue ingestion first\n- [ ] Only issues with updated_at > discussions_synced_for_updated_at get discussion sync\n- [ ] Bounded concurrency respects dependent_concurrency config\n- [ ] Results aggregated from both issue and discussion ingestion\n- [ ] issues_skipped_discussion_sync accurately reflects unchanged issues\n\n## Files\n\n- src/ingestion/mod.rs (add `pub mod orchestrator;`)\n- src/ingestion/orchestrator.rs (create)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/orchestrator_tests.rs\n#[tokio::test] async fn orchestrates_issue_then_discussion_sync()\n#[tokio::test] async fn skips_discussion_sync_for_unchanged_issues()\n#[tokio::test] async fn respects_bounded_concurrency()\n#[tokio::test] async fn aggregates_results_correctly()\n```\n\nGREEN: Implement orchestrator with bounded concurrency\n\nVERIFY: `cargo test orchestrator`\n\n## Edge Cases\n\n- All issues unchanged - no discussion sync calls\n- All issues new - all get discussion sync\n- dependent_concurrency=1 - sequential discussion fetches\n- Issue ingestion fails - orchestrator returns error, no discussion sync","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.289941Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:54:07.447647Z","closed_at":"2026-01-25T22:54:07.447577Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-ozy","depends_on_id":"bd-208","type":"blocks","created_at":"2026-01-25T17:04:05.583955Z","created_by":"tayloreernisse"},{"issue_id":"bd-ozy","depends_on_id":"bd-hbo","type":"blocks","created_at":"2026-01-25T17:04:05.605851Z","created_by":"tayloreernisse"}]} +{"id":"bd-ser","title":"Implement MR ingestion module","description":"## Background\nMR ingestion module with cursor-based sync. Follows the same pattern as issue ingestion from CP1. Discussion sync eligibility is determined via DB query AFTER ingestion (not in-memory collection) to avoid memory growth on large projects.\n\n## Approach\nCreate `src/ingestion/merge_requests.rs` with:\n1. `IngestMergeRequestsResult` - Aggregated stats\n2. `ingest_merge_requests()` - Main ingestion function\n3. `upsert_merge_request()` - Single MR upsert\n4. Helper functions for labels, assignees, reviewers, cursor management\n\n## Files\n- `src/ingestion/merge_requests.rs` - New module\n- `src/ingestion/mod.rs` - Export new module\n- `tests/mr_ingestion_tests.rs` - Integration tests\n\n## Acceptance Criteria\n- [ ] `IngestMergeRequestsResult` has: fetched, upserted, labels_created, assignees_linked, reviewers_linked\n- [ ] `ingest_merge_requests()` returns `Result`\n- [ ] Page-boundary cursor updates (not item-count modulo)\n- [ ] Tuple-based cursor filtering: `(updated_at, gitlab_id)`\n- [ ] Transaction per MR for atomicity\n- [ ] Raw payload stored for each MR\n- [ ] Labels: clear-and-relink pattern (removes stale)\n- [ ] Assignees: clear-and-relink pattern\n- [ ] Reviewers: clear-and-relink pattern\n- [ ] `reset_discussion_watermarks()` for --full sync\n- [ ] `cargo test mr_ingestion` passes\n\n## TDD Loop\nRED: `cargo test ingest_mr` -> module not found\nGREEN: Add ingestion module with full logic\nVERIFY: `cargo test mr_ingestion`\n\n## Main Function Signature\n```rust\npub async fn ingest_merge_requests(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n project_id: i64, // Local DB project ID\n gitlab_project_id: i64, // GitLab project ID\n full_sync: bool, // Reset cursor if true\n) -> Result\n```\n\n## Ingestion Loop (page-based)\n```rust\nlet mut page = 1u32;\nloop {\n let page_result = client.fetch_merge_requests_page(...).await?;\n \n for mr in &page_result.items {\n // Tuple cursor filtering\n if let (Some(cursor_ts), Some(cursor_id)) = (cursor_updated_at, cursor_gitlab_id) {\n if mr_updated_at < cursor_ts { continue; }\n if mr_updated_at == cursor_ts && mr.id <= cursor_id { continue; }\n }\n \n // Begin transaction\n let tx = conn.unchecked_transaction()?;\n \n // Store raw payload\n let payload_id = store_payload(&tx, ...)?;\n \n // Transform and upsert\n let transformed = transform_merge_request(&mr, project_id)?;\n let upsert_result = upsert_merge_request(&tx, &transformed.merge_request, payload_id)?;\n \n // Clear-and-relink labels\n clear_mr_labels(&tx, local_mr_id)?;\n for label in &labels { ... }\n \n // Clear-and-relink assignees\n clear_mr_assignees(&tx, local_mr_id)?;\n for username in &transformed.assignee_usernames { ... }\n \n // Clear-and-relink reviewers\n clear_mr_reviewers(&tx, local_mr_id)?;\n for username in &transformed.reviewer_usernames { ... }\n \n tx.commit()?;\n \n // Track for cursor\n last_updated_at = Some(mr_updated_at);\n last_gitlab_id = Some(mr.id);\n }\n \n // Page-boundary cursor flush\n if let (Some(updated_at), Some(gitlab_id)) = (last_updated_at, last_gitlab_id) {\n update_cursor(conn, project_id, \"merge_requests\", updated_at, gitlab_id)?;\n }\n \n if page_result.is_last_page { break; }\n page = page_result.next_page.unwrap_or(page + 1);\n}\n```\n\n## Full Sync Watermark Reset\n```rust\nfn reset_discussion_watermarks(conn: &Connection, project_id: i64) -> Result<()> {\n conn.execute(\n \"UPDATE merge_requests\n SET discussions_synced_for_updated_at = NULL,\n discussions_sync_attempts = 0,\n discussions_sync_last_error = NULL\n WHERE project_id = ?\",\n [project_id],\n )?;\n Ok(())\n}\n```\n\n## DB Helper Functions\n- `get_cursor(conn, project_id) -> (Option, Option)` - Get (updated_at, gitlab_id)\n- `update_cursor(conn, project_id, resource_type, updated_at, gitlab_id)`\n- `reset_cursor(conn, project_id, resource_type)`\n- `upsert_merge_request(conn, mr, payload_id) -> Result`\n- `clear_mr_labels(conn, mr_id)`\n- `link_mr_label(conn, mr_id, label_id)`\n- `clear_mr_assignees(conn, mr_id)`\n- `upsert_mr_assignee(conn, mr_id, username)`\n- `clear_mr_reviewers(conn, mr_id)`\n- `upsert_mr_reviewer(conn, mr_id, username)`\n\n## Edge Cases\n- Cursor rewind may cause refetch of already-seen MRs (tuple filtering handles this)\n- Large projects: 10k+ MRs - page-based cursor prevents massive refetch on crash\n- Labels/assignees/reviewers may change - clear-and-relink ensures correctness","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:41.967459Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:15:24.526208Z","closed_at":"2026-01-27T00:15:24.526142Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-ser","depends_on_id":"bd-34o","type":"blocks","created_at":"2026-01-26T22:08:54.519486Z","created_by":"tayloreernisse"},{"issue_id":"bd-ser","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-01-26T22:08:54.440174Z","created_by":"tayloreernisse"},{"issue_id":"bd-ser","depends_on_id":"bd-iba","type":"blocks","created_at":"2026-01-26T22:08:54.593550Z","created_by":"tayloreernisse"}]} {"id":"bd-v6i","title":"[CP1] gi ingest --type=issues command","description":"## Background\n\nThe `gi ingest --type=issues` command is the main entry point for issue ingestion. It acquires a single-flight lock, calls the orchestrator for each configured project, and outputs progress/summary to the user.\n\n## Approach\n\n### Module: src/cli/commands/ingest.rs\n\n### Clap Definition\n\n```rust\n#[derive(Args)]\npub struct IngestArgs {\n /// Resource type to ingest\n #[arg(long, value_parser = [\"issues\", \"merge_requests\"])]\n pub r#type: String,\n\n /// Filter to single project\n #[arg(long)]\n pub project: Option,\n\n /// Override stale sync lock\n #[arg(long)]\n pub force: bool,\n}\n```\n\n### Handler Function\n\n```rust\npub async fn handle_ingest(args: IngestArgs, config: &Config) -> Result<()>\n```\n\n### Logic\n\n1. **Acquire single-flight lock**: `acquire_sync_lock(conn, args.force)?`\n2. **Get projects to sync**:\n - If `args.project` specified, filter to that one\n - Otherwise, get all configured projects from DB\n3. **For each project**:\n - Print \"Ingesting issues for {project_path}...\"\n - Call `ingest_project_issues(conn, client, config, project_id, gitlab_project_id)`\n - Print \"{N} issues fetched, {M} new labels\"\n4. **Print discussion sync summary**:\n - \"Fetching discussions ({N} issues with updates)...\"\n - \"{N} discussions, {M} notes (excluding {K} system notes)\"\n - \"Skipped discussion sync for {N} unchanged issues.\"\n5. **Release lock**: Lock auto-released when handler returns\n\n### Output Format (matches PRD)\n\n```\nIngesting issues...\n\n group/project-one: 1,234 issues fetched, 45 new labels\n\nFetching discussions (312 issues with updates)...\n\n group/project-one: 312 issues → 1,234 discussions, 5,678 notes\n\nTotal: 1,234 issues, 1,234 discussions, 5,678 notes (excluding 1,234 system notes)\nSkipped discussion sync for 922 unchanged issues.\n```\n\n## Acceptance Criteria\n\n- [ ] Clap args parse --type, --project, --force correctly\n- [ ] Single-flight lock acquired before sync starts\n- [ ] Lock error message is clear if concurrent run attempted\n- [ ] Progress output shows per-project counts\n- [ ] Summary includes unchanged issues skipped count\n- [ ] --force flag allows overriding stale lock\n\n## Files\n\n- src/cli/commands/mod.rs (add `pub mod ingest;`)\n- src/cli/commands/ingest.rs (create)\n- src/cli/mod.rs (add Ingest variant to Commands enum)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/cli_ingest_tests.rs\n#[tokio::test] async fn ingest_issues_acquires_lock()\n#[tokio::test] async fn ingest_issues_fails_on_concurrent_run()\n#[tokio::test] async fn ingest_issues_respects_project_filter()\n#[tokio::test] async fn ingest_issues_force_overrides_stale_lock()\n```\n\nGREEN: Implement handler with lock and orchestrator calls\n\nVERIFY: `cargo test cli_ingest`\n\n## Edge Cases\n\n- No projects configured - return early with helpful message\n- Project filter matches nothing - error with \"project not found\"\n- Lock already held - clear error \"Sync already in progress\"\n- Ctrl-C during sync - lock should be released (via Drop or SIGINT handler)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.312565Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:56:44.090142Z","closed_at":"2026-01-25T22:56:44.090086Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-v6i","depends_on_id":"bd-ozy","type":"blocks","created_at":"2026-01-25T17:04:05.629772Z","created_by":"tayloreernisse"}]} {"id":"bd-xhz","title":"[CP1] GitLab client pagination methods","description":"## Background\n\nGitLab pagination methods enable fetching large result sets (issues, discussions) as async streams. The client uses `x-next-page` headers to determine continuation and applies cursor rewind for tuple-based incremental sync.\n\n## Approach\n\nAdd pagination methods to GitLabClient using `async-stream` crate:\n\n### Methods to Add\n\n```rust\nimpl GitLabClient {\n /// Paginate through issues for a project.\n pub fn paginate_issues(\n &self,\n gitlab_project_id: i64,\n updated_after: Option, // ms epoch cursor\n cursor_rewind_seconds: u32,\n ) -> Pin> + Send + '_>>\n\n /// Paginate through discussions for an issue.\n pub fn paginate_issue_discussions(\n &self,\n gitlab_project_id: i64,\n issue_iid: i64,\n ) -> Pin> + Send + '_>>\n\n /// Make request and return response with headers for pagination.\n async fn request_with_headers(\n &self,\n path: &str,\n params: &[(&str, String)],\n ) -> Result<(T, HeaderMap)>\n}\n```\n\n### Pagination Logic\n\n1. Start at page 1, per_page=100\n2. For issues: add scope=all, state=all, order_by=updated_at, sort=asc\n3. Apply cursor rewind: `updated_after = cursor - rewind_seconds` (clamped to 0)\n4. Yield each item from response\n5. Check `x-next-page` header for continuation\n6. Stop when header is empty/absent OR response is empty\n\n### Cursor Rewind\n\n```rust\nif let Some(ts) = updated_after {\n let rewind_ms = (cursor_rewind_seconds as i64) * 1000;\n let rewound = (ts - rewind_ms).max(0); // Clamp to avoid underflow\n // Convert to ISO 8601 for updated_after param\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `paginate_issues` returns Stream of GitLabIssue\n- [ ] `paginate_issues` adds scope=all, state=all, order_by=updated_at, sort=asc\n- [ ] `paginate_issues` applies cursor rewind with max(0) clamping\n- [ ] `paginate_issue_discussions` returns Stream of GitLabDiscussion\n- [ ] Both methods follow x-next-page header until empty\n- [ ] Both methods stop on empty response (fallback)\n- [ ] `request_with_headers` returns (T, HeaderMap) tuple\n\n## Files\n\n- src/gitlab/client.rs (edit - add methods)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/pagination_tests.rs\n#[tokio::test] async fn fetches_all_pages_when_multiple_exist()\n#[tokio::test] async fn respects_per_page_parameter()\n#[tokio::test] async fn follows_x_next_page_header_until_empty()\n#[tokio::test] async fn falls_back_to_empty_page_stop_if_headers_missing()\n#[tokio::test] async fn applies_cursor_rewind_for_tuple_semantics()\n#[tokio::test] async fn clamps_negative_rewind_to_zero()\n```\n\nGREEN: Implement pagination methods with async-stream\n\nVERIFY: `cargo test pagination`\n\n## Edge Cases\n\n- cursor_updated_at near zero - rewind must not underflow (use max(0))\n- GitLab returns empty x-next-page - treat as end of pages\n- GitLab omits pagination headers entirely - use empty response as stop condition\n- DateTime conversion fails - omit updated_after and fetch all (safe fallback)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.222168Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:28:39.192876Z","closed_at":"2026-01-25T22:28:39.192815Z","close_reason":"Implemented paginate_issues and paginate_issue_discussions with async-stream, cursor rewind with max(0) clamping, x-next-page header following, 4 unit tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-xhz","depends_on_id":"bd-1np","type":"blocks","created_at":"2026-01-25T17:04:05.398212Z","created_by":"tayloreernisse"},{"issue_id":"bd-xhz","depends_on_id":"bd-2ys","type":"blocks","created_at":"2026-01-25T17:04:05.371440Z","created_by":"tayloreernisse"}]} {"id":"bd-ymd","title":"[CP1] Final validation - Gate A through D","description":"Run all tests and verify all internal gates pass.\n\n## Gate A: Issues Only (Must Pass First)\n- [ ] gi ingest --type=issues fetches all issues from configured projects\n- [ ] Issues stored with correct schema, including last_seen_at\n- [ ] Cursor-based sync is resumable (re-run fetches only new/updated)\n- [ ] Incremental cursor updates every 100 issues\n- [ ] Raw payloads stored for each issue\n- [ ] gi list issues and gi count issues work\n\n## Gate B: Labels Correct (Must Pass)\n- [ ] Labels extracted and stored (name-only)\n- [ ] Label links created correctly\n- [ ] Stale label links removed on re-sync (verified with test)\n- [ ] Label count per issue matches GitLab\n\n## Gate C: Dependent Discussion Sync (Must Pass)\n- [ ] Discussions fetched for issues with updated_at advancement\n- [ ] Notes stored with is_system flag correctly set\n- [ ] Raw payloads stored for discussions and notes\n- [ ] discussions_synced_for_updated_at watermark updated after sync\n- [ ] Unchanged issues skip discussion refetch (verified with test)\n- [ ] Bounded concurrency (dependent_concurrency respected)\n\n## Gate D: Resumability Proof (Must Pass)\n- [ ] Kill mid-run, rerun; bounded redo (cursor progress preserved)\n- [ ] No redundant discussion refetch after crash recovery\n- [ ] Single-flight lock prevents concurrent runs\n\n## Final Gate (Must Pass)\n- [ ] All unit tests pass (cargo test)\n- [ ] All integration tests pass (mocked with wiremock)\n- [ ] cargo clippy passes with no warnings\n- [ ] cargo fmt --check passes\n- [ ] Compiles with --release\n\n## Validation Commands\ncargo test\ncargo clippy -- -D warnings\ncargo fmt --check\ncargo build --release\n\nFiles: All CP1 files\nDone when: All gate criteria pass","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:59:26.795633Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:02.132613Z","deleted_at":"2026-01-25T17:02:02.132608Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} diff --git a/.beads/last-touched b/.beads/last-touched index 5a44361..d668d5f 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -bd-2um +bd-lcb diff --git a/AGENTS.md b/AGENTS.md index 2b5b266..c7af02a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -17,153 +17,11 @@ Build extensible pieces of logic that can easily be integrated with other pieces DRY principles should be loosely held. Architecture MUST be clear and well thought-out. Ask the user for clarification whenever ambiguity is discovered around architecture, or you think a better approach than planned exists. -## Beads Rust Workflow Integration - -This project uses [beads_viewer](https://github.com/Dicklesworthstone/beads_viewer) for issue tracking. Issues are stored in `.beads/` and tracked in git. - -### Essential Commands - -```bash -# View issues (launches TUI - NOT FOR AGENT USE, human only) -bv - -# CLI commands for agents (use --json for machine-readable output) -br ready --json # Show issues ready to work (no blockers) -br list --status=open --json # All open issues -br show --json # Full issue details with dependencies -br create --title="..." --type=task --priority=2 -br update --status=in_progress -br close --reason="Completed" -br close # Close multiple issues at once -br sync # Commit and push changes -``` - -### Robot Mode (Agent-Optimized bv Commands) - -Use `bv --robot-*` flags for structured JSON output optimized for AI agents: - -```bash -# Essential robot commands -bv --robot-triage # THE MEGA-COMMAND: unified analysis, recommendations, health -bv --robot-next # Single top recommendation (minimal output) -bv --robot-plan # Dependency-respecting execution plan -bv --robot-priority # Priority recommendations with reasoning -bv --robot-insights # Deep graph analysis (PageRank, bottlenecks, etc.) - -# File impact analysis (check before editing) -bv --robot-impact # Risk assessment for modifying files -bv --robot-file-beads # What beads have touched this file? -bv --robot-file-hotspots # High-churn files (conflict zones) -bv --robot-related # Find related beads - -# Filtering options (work with most robot commands) -bv --robot-triage --robot-by-label=backend -bv --robot-priority --robot-min-confidence=0.7 -bv --robot-insights --label=api # Scope to label subgraph -``` - -Run `bv -robot-help` for complete robot mode documentation. - -### Workflow Pattern - -1. **Start**: Run `br ready` to find actionable work -2. **Claim**: Use `br update --status=in_progress` -3. **Work**: Implement the task -4. **Complete**: Use `br close ` -5. **Sync**: Always run `br sync` at session end - -### Key Concepts - -- **Dependencies**: Issues can block other issues. `br ready` shows only unblocked work. -- **Priority**: P0=critical, P1=high, P2=medium, P3=low, P4=backlog (use numbers, not words) -- **Types**: task, bug, feature, epic, question, docs -- **Blocking**: `br dep add ` to add dependencies - -### Session Protocol - -**Before ending any session, run this checklist:** - -```bash -git status # Check what changed -git add # Stage code changes -br sync # Commit beads changes -git commit -m "..." # Commit code -br sync # Commit any new beads changes -git push # Push to remote -``` - -### Best Practices - -- Check `br ready` at session start to find available work -- Update status as you work (in_progress → closed) -- Create new issues with `br create` when you discover tasks -- Use descriptive titles and set appropriate priority/type -- Always `br sync` before ending session - - - - - --- -## Beads Workflow Integration +## Third-Party Library Usage -This project uses [beads_viewer](https://github.com/Dicklesworthstone/beads_viewer) for issue tracking. Issues are stored in `.beads/` and tracked in git. - -### Essential Commands - -```bash -# View issues (launches TUI - avoid in automated sessions) -bv - -# CLI commands for agents (use these instead) -bd ready # Show issues ready to work (no blockers) -bd list --status=open # All open issues -bd show # Full issue details with dependencies -bd create --title="..." --type=task --priority=2 -bd update --status=in_progress -bd close --reason="Completed" -bd close # Close multiple issues at once -bd sync # Commit and push changes -``` - -### Workflow Pattern - -1. **Start**: Run `bd ready` to find actionable work -2. **Claim**: Use `bd update --status=in_progress` -3. **Work**: Implement the task -4. **Complete**: Use `bd close ` -5. **Sync**: Always run `bd sync` at session end - -### Key Concepts - -- **Dependencies**: Issues can block other issues. `bd ready` shows only unblocked work. -- **Priority**: P0=critical, P1=high, P2=medium, P3=low, P4=backlog (use numbers, not words) -- **Types**: task, bug, feature, epic, question, docs -- **Blocking**: `bd dep add ` to add dependencies - -### Session Protocol - -**Before ending any session, run this checklist:** - -```bash -git status # Check what changed -git add # Stage code changes -bd sync # Commit beads changes -git commit -m "..." # Commit code -bd sync # Commit any new beads changes -git push # Push to remote -``` - -### Best Practices - -- Check `bd ready` at session start to find available work -- Update status as you work (in_progress → closed) -- Create new issues with `bd create` when you discover tasks -- Use descriptive titles and set appropriate priority/type -- Always `bd sync` before ending session - - +If you aren't 100% sure how to use a third-party library, **SEARCH ONLINE** to find the latest documentation and mid-2025 best practices. --- diff --git a/RUST_CLI_TOOLS_BEST_PRACTICES_GUIDE.md b/RUST_CLI_TOOLS_BEST_PRACTICES_GUIDE.md new file mode 100644 index 0000000..0515689 --- /dev/null +++ b/RUST_CLI_TOOLS_BEST_PRACTICES_GUIDE.md @@ -0,0 +1,2729 @@ +# The Definitive Guide to High-Performance CLI and Automation Tools with Rust (mid-2025 Edition) + +This guide synthesizes modern best practices for building blazingly fast, user-friendly, and production-ready CLI tools and automation systems with the **latest Rust nightly**, clap, anyhow, and tokio. It moves beyond basic argument parsing to provide battle-tested patterns for real-world command-line applications. + +## Prerequisites & Toolchain Configuration + +Ensure your environment uses the **current Rust nightly** (pinned via `rust-toolchain.toml`) plus the latest crate releases (wildcard constraints) for **clap**, **anyhow**, and **tokio**. The 2024 edition provides better async ergonomics and should be your default. + +```toml +# Cargo.toml - Base configuration for CLI tools (nightly + latest crates) +[package] +name = "myctl" +version = "0.1.0" +edition = "2024" +authors = ["Your Name "] +description = "A blazingly fast CLI tool" +license = "MIT OR Apache-2.0" +repository = "https://github.com/yourusername/myctl" +keywords = ["cli", "automation", "tool"] +categories = ["command-line-utilities"] + +[[bin]] +name = "myctl" +path = "src/main.rs" + +[dependencies] +# Core CLI framework +clap = { version = "*", features = ["derive", "cargo", "env", "unicode", "wrap_help"] } +clap_complete = "*" +clap_mangen = "*" + +# Error handling +anyhow = "*" +thiserror = "*" + +# Async runtime +tokio = { version = "*", features = ["rt-multi-thread", "macros", "fs", "process", "io-util", "time", "signal"] } + +# Serialization +serde = { version = "*", features = ["derive"] } +serde_json = "*" +toml = "*" + +# User interaction +dialoguer = "*" +indicatif = "*" +console = "*" +colored = "*" + +# System interaction +directories = "*" +which = "*" +shell-words = "*" + +# HTTP client for API interactions +reqwest = { version = "*", features = ["json", "rustls-tls"], default-features = false } + +# Logging +tracing = "*" +tracing-subscriber = { version = "*", features = ["env-filter", "json"] } + +[dev-dependencies] +assert_cmd = "*" +predicates = "*" +tempfile = "*" +insta = { version = "*", features = ["yaml", "json"] } + +[profile.release] +lto = true +codegen-units = 1 +strip = true +panic = "abort" +opt-level = "z" # Optimize for binary size +``` + +### Essential Development Tools + +```bash +# Install development tools +cargo install cargo-binstall # Install binaries faster +cargo install cargo-dist # Cross-platform binary distribution +cargo install cargo-insta # Snapshot testing +cargo install hyperfine # CLI benchmarking +cargo install cargo-bloat # Analyze binary size + +# Platform-specific tools +cargo binstall cargo-zigbuild # Better cross-compilation +cargo binstall cross # Docker-based cross-compilation +``` + +--- + +## 1. Project Structure & Architecture + +CLI tools require a different structure than libraries or web services. Prioritize modularity and testability. + +### ✅ DO: Use a Scalable Project Layout + +``` +myctl/ +├── Cargo.toml +├── build.rs # Build script for completions +├── src/ +│ ├── main.rs # Entry point - minimal logic +│ ├── cli.rs # CLI structure and parsing +│ ├── commands/ # Command implementations +│ │ ├── mod.rs +│ │ ├── init.rs +│ │ ├── deploy.rs +│ │ └── status.rs +│ ├── config/ # Configuration management +│ │ ├── mod.rs +│ │ └── schema.rs +│ ├── client/ # API/service clients +│ │ └── mod.rs +│ └── utils/ # Shared utilities +│ ├── mod.rs +│ ├── progress.rs +│ └── terminal.rs +├── tests/ # Integration tests +│ └── integration/ +└── completions/ # Generated shell completions +``` + +### ✅ DO: Keep `main.rs` Minimal + +```rust +// src/main.rs +use anyhow::Result; +use myctl::cli::Cli; +use tracing_subscriber::EnvFilter; + +#[tokio::main] +async fn main() -> Result<()> { + // Initialize tracing early + tracing_subscriber::fmt() + .with_env_filter(EnvFilter::from_default_env()) + .with_writer(std::io::stderr) + .init(); + + // Run the actual CLI + myctl::run().await +} + +// src/lib.rs +use anyhow::Result; +use clap::Parser; + +pub mod cli; +pub mod commands; +pub mod config; +pub mod utils; + +pub async fn run() -> Result<()> { + let cli = cli::Cli::parse(); + commands::execute(cli).await +} +``` + +--- + +## 2. Clap v4 Patterns: Beyond Basic Parsing + +Clap 4.5 introduces improved derive macros and better async support. Master both the derive and builder APIs for maximum flexibility. + +### ✅ DO: Use Derive API with Advanced Features + +```rust +// src/cli.rs +use clap::{Parser, Subcommand, Args, ValueEnum}; +use std::path::PathBuf; + +#[derive(Parser)] +#[command( + name = "myctl", + about = "A powerful automation tool", + version, + author, + long_about = None, + // Enable colored help automatically + color = clap::ColorChoice::Auto, + // Custom help template + help_template = "{before-help}{name} {version}\n{author}\n{about}\n\n{usage-heading} {usage}\n\n{all-args}{after-help}", +)] +pub struct Cli { + /// Global configuration file + #[arg(short, long, global = true, env = "MYCTL_CONFIG")] + pub config: Option, + + /// Output format + #[arg( + short, + long, + global = true, + value_enum, + default_value = "auto", + env = "MYCTL_OUTPUT" + )] + pub output: OutputFormat, + + /// Increase logging verbosity + #[arg(short, long, action = clap::ArgAction::Count, global = true)] + pub verbose: u8, + + /// Suppress all output + #[arg(short, long, global = true, conflicts_with = "verbose")] + pub quiet: bool, + + #[command(subcommand)] + pub command: Commands, +} + +#[derive(ValueEnum, Clone, Copy, Debug)] +pub enum OutputFormat { + /// Human-readable output with colors + Auto, + /// Plain text without formatting + Plain, + /// JSON output for scripting + Json, + /// YAML output + Yaml, + /// Table format + Table, +} + +#[derive(Subcommand)] +pub enum Commands { + /// Initialize a new project + Init(InitArgs), + + /// Deploy resources + Deploy { + #[command(flatten)] + common: DeployCommonArgs, + + #[command(subcommand)] + target: DeployTarget, + }, + + /// Show status of resources + Status { + /// Filter by resource name pattern + #[arg(short, long)] + filter: Option, + + /// Watch for changes + #[arg(short, long)] + watch: bool, + }, + + /// Manage configurations + Config(ConfigArgs), +} + +#[derive(Args)] +pub struct InitArgs { + /// Project name + #[arg(value_name = "NAME")] + pub name: String, + + /// Project template + #[arg(short, long, default_value = "default")] + pub template: String, + + /// Skip interactive prompts + #[arg(long)] + pub non_interactive: bool, +} + +#[derive(Args)] +pub struct DeployCommonArgs { + /// Dry run - show what would be deployed + #[arg(long)] + pub dry_run: bool, + + /// Force deployment without confirmation + #[arg(short, long)] + pub force: bool, + + /// Parallel deployment count + #[arg(short, long, default_value = "4", value_parser = clap::value_parser!(u8).range(1..=32))] + pub parallel: u8, +} + +#[derive(Subcommand)] +pub enum DeployTarget { + /// Deploy to production + Production { + /// Production environment name + env: String, + }, + /// Deploy to staging + Staging, + /// Deploy to local development + Local { + /// Local port to use + #[arg(short, long, default_value = "8080")] + port: u16, + }, +} + +// Advanced: Custom type with validation +#[derive(Clone, Debug)] +pub struct ResourcePattern(String); + +impl std::str::FromStr for ResourcePattern { + type Err = String; + + fn from_str(s: &str) -> Result { + if s.is_empty() { + return Err("Resource pattern cannot be empty".to_string()); + } + + // Validate pattern syntax + if s.contains("**") && s.contains("?") { + return Err("Cannot mix ** and ? in patterns".to_string()); + } + + Ok(ResourcePattern(s.to_string())) + } +} +``` + +### ✅ DO: Implement Shell Completions + +```rust +// build.rs +use clap::CommandFactory; +use clap_complete::{generate_to, shells::*}; +use std::env; +use std::io::Error; + +include!("src/cli.rs"); + +fn main() -> Result<(), Error> { + let outdir = match env::var_os("OUT_DIR") { + None => return Ok(()), + Some(outdir) => outdir, + }; + + let mut cmd = Cli::command(); + let name = cmd.get_name().to_string(); + + // Generate completions for all shells + generate_to(Bash, &mut cmd, &name, &outdir)?; + generate_to(Zsh, &mut cmd, &name, &outdir)?; + generate_to(Fish, &mut cmd, &name, &outdir)?; + generate_to(PowerShell, &mut cmd, &name, &outdir)?; + generate_to(Elvish, &mut cmd, &name, &outdir)?; + + println!("cargo:rerun-if-changed=src/cli.rs"); + Ok(()) +} +``` + +### ✅ DO: Implement Dynamic Completions + +```rust +use clap::{ArgMatches, Command}; +use clap_complete::dynamic::CompletionCandidate; + +// Provide dynamic completions for resource names +fn complete_resource_name(current: &str) -> Vec { + // In real app, this would query your data source + let resources = vec!["web-server", "database", "cache", "queue"]; + + resources + .into_iter() + .filter(|r| r.starts_with(current)) + .map(|r| CompletionCandidate::new(r)) + .collect() +} + +// Register dynamic completion +pub fn augment_args(cmd: Command) -> Command { + cmd.arg( + clap::Arg::new("resource") + .value_parser(clap::builder::NonEmptyStringValueParser::new()) + .add(clap_complete::dynamic::ValueHint::Unknown) + .value_hint(clap::ValueHint::Other) + ) +} +``` + +--- + +## 3. Error Handling with Anyhow + +CLI tools need excellent error messages. Anyhow provides the perfect balance of ergonomics and informativeness. + +### ✅ DO: Use Context for Better Error Messages + +```rust +use anyhow::{anyhow, bail, Context, Result}; +use std::fs; +use std::path::Path; + +pub async fn load_config(path: &Path) -> Result { + // Add context to filesystem operations + let contents = fs::read_to_string(path) + .with_context(|| format!("Failed to read config file at {}", path.display()))?; + + // Add context to parsing operations + let config: Config = toml::from_str(&contents) + .with_context(|| format!("Invalid TOML in config file {}", path.display()))?; + + // Validate with custom errors + validate_config(&config) + .with_context(|| "Configuration validation failed")?; + + Ok(config) +} + +fn validate_config(config: &Config) -> Result<()> { + if config.timeout_seconds == 0 { + bail!("Timeout must be greater than 0"); + } + + if config.endpoints.is_empty() { + return Err(anyhow!("At least one endpoint must be configured")); + } + + for (name, endpoint) in &config.endpoints { + if endpoint.url.scheme() != "https" && !config.allow_insecure { + bail!( + "Endpoint '{}' uses insecure protocol '{}'. \ + Use HTTPS or set 'allow_insecure = true'", + name, + endpoint.url.scheme() + ); + } + } + + Ok(()) +} +``` + +### ✅ DO: Create Helpful Error Displays + +```rust +use console::style; +use std::fmt::Write; + +pub fn display_error(err: &anyhow::Error) -> String { + let mut output = String::new(); + + // Primary error + writeln!( + &mut output, + "{} {}", + style("Error:").red().bold(), + err + ).unwrap(); + + // Chain of causes + let mut source = err.source(); + while let Some(cause) = source { + writeln!( + &mut output, + " {} {}", + style("Caused by:").yellow(), + cause + ).unwrap(); + source = cause.source(); + } + + // Add helpful suggestions based on error type + if let Some(suggestion) = suggest_fix(err) { + writeln!( + &mut output, + "\n{} {}", + style("Suggestion:").green(), + suggestion + ).unwrap(); + } + + output +} + +fn suggest_fix(err: &anyhow::Error) -> Option<&'static str> { + let msg = err.to_string(); + + if msg.contains("EACCES") || msg.contains("Permission denied") { + Some("Try running with elevated permissions (sudo on Unix)") + } else if msg.contains("ENOENT") || msg.contains("No such file") { + Some("Check if the file path is correct and the file exists") + } else if msg.contains("EADDRINUSE") || msg.contains("Address already in use") { + Some("Another process is using this port. Try a different port or stop the other process") + } else if msg.contains("certificate") || msg.contains("SSL") { + Some("This might be a certificate issue. Try --insecure to bypass (not recommended for production)") + } else { + None + } +} +``` + +### ✅ DO: Use Custom Error Types When Needed + +```rust +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ConfigError { + #[error("Configuration file not found at {path}")] + NotFound { path: PathBuf }, + + #[error("Invalid configuration: {message}")] + Invalid { message: String }, + + #[error("Missing required field: {field}")] + MissingField { field: &'static str }, + + #[error("Environment variable {var} not set")] + MissingEnv { var: String }, +} + +// Convert to anyhow::Error when needed +impl From for anyhow::Error { + fn from(err: ConfigError) -> Self { + anyhow::Error::new(err) + } +} +``` + +--- + +## 4. Async CLI Patterns with Tokio + +Modern CLI tools often need concurrent operations. Tokio provides the foundation for high-performance async CLIs. + +### ✅ DO: Structure Async Commands Properly + +```rust +// src/commands/mod.rs +use anyhow::Result; +use tokio::task::JoinSet; +use std::time::Duration; + +pub async fn execute(cli: Cli) -> Result<()> { + match cli.command { + Commands::Deploy { common, target } => { + deploy::execute(common, target, &cli).await + } + Commands::Status { filter, watch } => { + if watch { + status::watch(filter, &cli).await + } else { + status::show(filter, &cli).await + } + } + // ... other commands + } +} + +// src/commands/deploy.rs +use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; +use tokio::sync::Semaphore; +use std::sync::Arc; + +pub async fn execute( + args: DeployCommonArgs, + target: DeployTarget, + cli: &Cli, +) -> Result<()> { + let resources = discover_resources(&target).await?; + + if args.dry_run { + return show_deployment_plan(&resources, cli); + } + + if !args.force && !confirm_deployment(&resources).await? { + bail!("Deployment cancelled by user"); + } + + // Deploy with parallelism control + let semaphore = Arc::new(Semaphore::new(args.parallel as usize)); + let multi_progress = MultiProgress::new(); + let mut tasks = JoinSet::new(); + + for resource in resources { + let sem = semaphore.clone(); + let pb = create_progress_bar(&multi_progress, &resource); + + tasks.spawn(async move { + let _permit = sem.acquire().await?; + deploy_resource(resource, pb).await + }); + } + + // Collect results + let mut failed = Vec::new(); + while let Some(result) = tasks.join_next().await { + match result { + Ok(Ok(())) => {}, + Ok(Err(e)) => failed.push(e), + Err(e) => failed.push(anyhow!("Task panicked: {}", e)), + } + } + + if failed.is_empty() { + success!("All resources deployed successfully"); + Ok(()) + } else { + error!("{} resources failed to deploy", failed.len()); + for (i, err) in failed.iter().enumerate() { + eprintln!(" {}. {}", i + 1, err); + } + bail!("Deployment failed") + } +} + +async fn deploy_resource( + resource: Resource, + progress: ProgressBar, +) -> Result<()> { + progress.set_message("Validating..."); + validate_resource(&resource).await?; + + progress.set_message("Uploading..."); + progress.set_position(25); + upload_resource(&resource).await?; + + progress.set_message("Configuring..."); + progress.set_position(50); + configure_resource(&resource).await?; + + progress.set_message("Starting..."); + progress.set_position(75); + start_resource(&resource).await?; + + progress.set_message("Verifying..."); + progress.set_position(90); + verify_resource(&resource).await?; + + progress.finish_with_message("✓ Deployed"); + Ok(()) +} +``` + +### ✅ DO: Handle Signals Gracefully + +```rust +use tokio::signal; +use tokio::sync::broadcast; + +pub struct SignalHandler { + shutdown_tx: broadcast::Sender<()>, +} + +impl SignalHandler { + pub fn new() -> (Self, broadcast::Receiver<()>) { + let (shutdown_tx, shutdown_rx) = broadcast::channel(1); + + let handler = Self { shutdown_tx }; + + // Spawn signal handling task + tokio::spawn(async move { + handler.handle_signals().await; + }); + + (handler, shutdown_rx) + } + + async fn handle_signals(self) { + let ctrl_c = async { + signal::ctrl_c() + .await + .expect("Failed to install Ctrl+C handler"); + }; + + #[cfg(unix)] + let terminate = async { + signal::unix::signal(signal::unix::SignalKind::terminate()) + .expect("Failed to install signal handler") + .recv() + .await; + }; + + #[cfg(not(unix))] + let terminate = std::future::pending::<()>(); + + tokio::select! { + _ = ctrl_c => { + info!("Received Ctrl+C, initiating graceful shutdown..."); + }, + _ = terminate => { + info!("Received terminate signal, initiating graceful shutdown..."); + }, + } + + let _ = self.shutdown_tx.send(()); + } +} + +// Usage in long-running command +pub async fn watch_resources(filter: Option) -> Result<()> { + let (_handler, mut shutdown_rx) = SignalHandler::new(); + let mut interval = tokio::time::interval(Duration::from_secs(2)); + + loop { + tokio::select! { + _ = shutdown_rx.recv() => { + info!("Stopping watch..."); + break; + } + _ = interval.tick() => { + clear_screen(); + display_resources(&filter).await?; + } + } + } + + Ok(()) +} +``` + +### ✅ DO: Implement Timeouts and Retries + +```rust +use anyhow::Result; +use tokio::time::{timeout, sleep}; +use std::time::Duration; + +pub struct RetryConfig { + pub max_attempts: u32, + pub initial_delay: Duration, + pub max_delay: Duration, + pub exponential_base: f64, +} + +impl Default for RetryConfig { + fn default() -> Self { + Self { + max_attempts: 3, + initial_delay: Duration::from_millis(100), + max_delay: Duration::from_secs(10), + exponential_base: 2.0, + } + } +} + +pub async fn with_retry( + operation: F, + config: RetryConfig, +) -> Result +where + F: Fn() -> Fut, + Fut: std::future::Future>, +{ + let mut delay = config.initial_delay; + + for attempt in 1..=config.max_attempts { + match timeout(Duration::from_secs(30), operation()).await { + Ok(Ok(value)) => return Ok(value), + Ok(Err(e)) if attempt == config.max_attempts => { + return Err(e).context(format!( + "Operation failed after {} attempts", + config.max_attempts + )); + } + Ok(Err(e)) => { + warn!("Attempt {} failed: {}. Retrying in {:?}...", + attempt, e, delay); + sleep(delay).await; + + // Exponential backoff with jitter + delay = std::cmp::min( + config.max_delay, + Duration::from_secs_f64( + delay.as_secs_f64() * config.exponential_base + * (0.5 + rand::random::() * 0.5) + ), + ); + } + Err(_) => { + if attempt == config.max_attempts { + bail!("Operation timed out after {} attempts", config.max_attempts); + } + warn!("Attempt {} timed out. Retrying...", attempt); + } + } + } + + unreachable!() +} + +// Usage +pub async fn fetch_with_retry(url: &str) -> Result { + with_retry( + || async { + let response = reqwest::get(url).await?; + response.error_for_status()?.text().await + .context("Failed to read response body") + }, + RetryConfig::default(), + ).await +} +``` + +--- + +## 5. Configuration Management + +CLI tools need flexible configuration systems that support files, environment variables, and command-line overrides. + +### ✅ DO: Implement Layered Configuration + +```rust +// src/config/mod.rs +use anyhow::{Context, Result}; +use directories::ProjectDirs; +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(deny_unknown_fields)] +pub struct Config { + #[serde(default)] + pub api: ApiConfig, + + #[serde(default)] + pub ui: UiConfig, + + #[serde(default)] + pub defaults: DefaultsConfig, + + // Allow custom extensions + #[serde(flatten)] + pub extra: toml::Table, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ApiConfig { + #[serde(default = "default_endpoint")] + pub endpoint: String, + + #[serde(default = "default_timeout")] + pub timeout_seconds: u64, + + pub api_key: Option, + + #[serde(default)] + pub verify_tls: bool, +} + +fn default_endpoint() -> String { + "https://api.example.com".to_string() +} + +fn default_timeout() -> u64 { + 30 +} + +impl Config { + /// Load configuration from multiple sources with proper precedence + pub async fn load(cli_path: Option<&Path>) -> Result { + let mut config = Config::default(); + + // 1. Load from default locations + for path in Self::default_paths() { + if path.exists() { + config.merge_file(&path)?; + } + } + + // 2. Load from CLI-specified path + if let Some(path) = cli_path { + config.merge_file(path) + .with_context(|| format!("Failed to load config from {}", path.display()))?; + } + + // 3. Apply environment variables + config.merge_env()?; + + // 4. Validate final configuration + config.validate()?; + + Ok(config) + } + + fn default_paths() -> Vec { + let mut paths = Vec::new(); + + // System-wide config + paths.push(PathBuf::from("/etc/myctl/config.toml")); + + // User config + if let Some(proj_dirs) = ProjectDirs::from("com", "example", "myctl") { + paths.push(proj_dirs.config_dir().join("config.toml")); + } + + // Project-local config + paths.push(PathBuf::from(".myctl.toml")); + + paths + } + + fn merge_file(&mut self, path: &Path) -> Result<()> { + let contents = std::fs::read_to_string(path)?; + let file_config: Config = toml::from_str(&contents) + .with_context(|| format!("Invalid TOML in {}", path.display()))?; + + // Merge with existing config + self.merge(file_config); + Ok(()) + } + + fn merge_env(&mut self) -> Result<()> { + // Override with environment variables + if let Ok(endpoint) = std::env::var("MYCTL_API_ENDPOINT") { + self.api.endpoint = endpoint; + } + + if let Ok(key) = std::env::var("MYCTL_API_KEY") { + self.api.api_key = Some(key); + } + + if let Ok(timeout) = std::env::var("MYCTL_API_TIMEOUT") { + self.api.timeout_seconds = timeout.parse() + .context("MYCTL_API_TIMEOUT must be a number")?; + } + + Ok(()) + } + + fn validate(&self) -> Result<()> { + if self.api.timeout_seconds == 0 { + bail!("API timeout must be greater than 0"); + } + + if let Some(key) = &self.api.api_key { + if key.is_empty() { + bail!("API key cannot be empty"); + } + } + + Ok(()) + } +} + +// Create a config subcommand +pub fn config_command() -> Command { + Command::new("config") + .about("Manage configuration") + .subcommand( + Command::new("show") + .about("Show current configuration") + ) + .subcommand( + Command::new("edit") + .about("Edit configuration in your editor") + ) + .subcommand( + Command::new("validate") + .about("Validate configuration files") + ) + .subcommand( + Command::new("path") + .about("Show configuration file paths") + ) +} +``` + +### ✅ DO: Support Multiple Configuration Formats + +```rust +use serde::de::DeserializeOwned; + +pub enum ConfigFormat { + Toml, + Json, + Yaml, +} + +impl ConfigFormat { + pub fn from_path(path: &Path) -> Option { + match path.extension()?.to_str()? { + "toml" => Some(Self::Toml), + "json" => Some(Self::Json), + "yaml" | "yml" => Some(Self::Yaml), + _ => None, + } + } + + pub fn parse(&self, contents: &str) -> Result { + match self { + Self::Toml => toml::from_str(contents) + .context("Invalid TOML"), + Self::Json => serde_json::from_str(contents) + .context("Invalid JSON"), + Self::Yaml => serde_yaml::from_str(contents) + .context("Invalid YAML"), + } + } +} +``` + +--- + +## 6. Interactive CLI Features + +Modern CLI tools should provide rich interactive experiences when appropriate. + +### ✅ DO: Use Dialoguer for User Interaction + +```rust +use dialoguer::{theme::ColorfulTheme, Confirm, Input, Select, MultiSelect, Password}; +use console::style; + +pub async fn interactive_init() -> Result { + println!("{}", style("Welcome to MyCtl Setup!").bold().cyan()); + println!("This wizard will help you create a new project.\n"); + + // Text input with validation + let name: String = Input::with_theme(&ColorfulTheme::default()) + .with_prompt("Project name") + .validate_with(|input: &String| -> Result<(), &str> { + if input.is_empty() { + Err("Project name cannot be empty") + } else if !is_valid_project_name(input) { + Err("Project name can only contain letters, numbers, and hyphens") + } else { + Ok(()) + } + }) + .interact_text()?; + + // Selection from list + let template = Select::with_theme(&ColorfulTheme::default()) + .with_prompt("Select a project template") + .items(&["Web API", "CLI Tool", "Library", "Custom"]) + .default(0) + .interact()?; + + // Multi-select for features + let features = MultiSelect::with_theme(&ColorfulTheme::default()) + .with_prompt("Select features to enable") + .items(&[ + "Authentication", + "Database", + "Caching", + "Monitoring", + "CI/CD Pipeline", + ]) + .defaults(&[false, true, false, true, true]) + .interact()?; + + // Password input + let api_key = if Confirm::new() + .with_prompt("Do you want to configure API access now?") + .default(true) + .interact()? + { + Some(Password::new() + .with_prompt("API Key") + .with_confirmation("Confirm API Key", "Keys do not match") + .interact()?) + } else { + None + }; + + // Confirmation + println!("\n{}", style("Summary:").bold()); + println!(" Project: {}", style(&name).green()); + println!(" Template: {}", style(&template).green()); + println!(" Features: {} selected", style(features.len()).green()); + + if !Confirm::new() + .with_prompt("Create project with these settings?") + .default(true) + .interact()? + { + bail!("Project creation cancelled"); + } + + Ok(ProjectConfig { + name, + template, + features, + api_key, + }) +} +``` + +### ✅ DO: Implement Progress Indicators + +```rust +use indicatif::{ProgressBar, ProgressStyle, MultiProgress, ProgressIterator}; +use std::time::Duration; + +pub struct ProgressReporter { + multi: MultiProgress, + main_bar: ProgressBar, +} + +impl ProgressReporter { + pub fn new(total_steps: u64) -> Self { + let multi = MultiProgress::new(); + + let main_bar = multi.add(ProgressBar::new(total_steps)); + main_bar.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} {msg:<40} [{bar:40.cyan/blue}] {pos}/{len}") + .unwrap() + .progress_chars("#>-") + ); + + Self { multi, main_bar } + } + + pub fn add_subtask(&self, name: &str, total: u64) -> ProgressBar { + let bar = self.multi.add(ProgressBar::new(total)); + bar.set_style( + ProgressStyle::default_bar() + .template(" {msg:<38} [{bar:40.cyan/blue}] {pos}/{len}") + .unwrap() + .progress_chars("=>-") + ); + bar.set_message(name.to_string()); + bar + } + + pub fn finish_main(&self, message: &str) { + self.main_bar.finish_with_message(format!("✓ {}", message)); + } +} + +// Usage example +pub async fn process_files(files: Vec) -> Result<()> { + let progress = ProgressReporter::new(files.len() as u64); + + for (i, file) in files.iter().enumerate() { + progress.main_bar.set_message(format!("Processing {}", file.display())); + + // Create subtask progress + let file_size = file.metadata()?.len(); + let subtask = progress.add_subtask("Reading file", file_size); + + // Process with progress updates + process_file_with_progress(file, &subtask).await?; + + subtask.finish_with_message("✓ Complete"); + progress.main_bar.inc(1); + } + + progress.finish_main("All files processed"); + Ok(()) +} + +// Spinner for indeterminate progress +pub async fn long_operation(message: &str, operation: F) -> Result +where + F: FnOnce() -> Fut, + Fut: std::future::Future>, +{ + let spinner = ProgressBar::new_spinner(); + spinner.set_style( + ProgressStyle::default_spinner() + .template("{spinner:.green} {msg}") + .unwrap() + ); + spinner.set_message(message.to_string()); + spinner.enable_steady_tick(Duration::from_millis(80)); + + let result = operation().await; + + match &result { + Ok(_) => spinner.finish_with_message(format!("✓ {}", message)), + Err(_) => spinner.finish_with_message(format!("✗ {}", message)), + } + + result +} +``` + +### ✅ DO: Support Both Interactive and Non-Interactive Modes + +```rust +pub struct InteractionMode { + interactive: bool, + assume_yes: bool, + output_format: OutputFormat, +} + +impl InteractionMode { + pub fn from_cli(cli: &Cli) -> Self { + Self { + interactive: atty::is(atty::Stream::Stdin) && !cli.quiet, + assume_yes: cli.assume_yes, + output_format: cli.output, + } + } + + pub async fn confirm(&self, message: &str) -> Result { + if self.assume_yes { + return Ok(true); + } + + if !self.interactive { + bail!("Cannot prompt for confirmation in non-interactive mode. Use --yes to proceed."); + } + + Ok(Confirm::new() + .with_prompt(message) + .default(false) + .interact()?) + } + + pub async fn select_one( + &self, + prompt: &str, + options: &[T], + default: Option, + ) -> Result { + if !self.interactive { + if let Some(idx) = default { + return Ok(idx); + } + bail!("Cannot prompt for selection in non-interactive mode"); + } + + let mut select = Select::with_theme(&ColorfulTheme::default()) + .with_prompt(prompt); + + for option in options { + select = select.item(option.to_string()); + } + + if let Some(idx) = default { + select = select.default(idx); + } + + Ok(select.interact()?) + } +} +``` + +--- + +## 7. Output Formatting & Display + +CLI tools need to present information clearly across different output formats. + +### ✅ DO: Implement Structured Output + +```rust +use serde::Serialize; +use colored::Colorize; +use comfy_table::{Table, presets::UTF8_FULL}; + +pub trait Displayable: Serialize { + fn display_human(&self) -> String; + fn display_json(&self) -> Result; + fn display_yaml(&self) -> Result; + fn display_table(&self) -> String; +} + +#[derive(Serialize)] +pub struct Resource { + pub id: String, + pub name: String, + pub status: Status, + pub created_at: chrono::DateTime, +} + +#[derive(Serialize, Clone, Copy)] +pub enum Status { + Running, + Stopped, + Failed, + Unknown, +} + +impl Status { + fn colored(&self) -> String { + match self { + Status::Running => "Running".green().to_string(), + Status::Stopped => "Stopped".yellow().to_string(), + Status::Failed => "Failed".red().to_string(), + Status::Unknown => "Unknown".dimmed().to_string(), + } + } +} + +impl Displayable for Vec { + fn display_human(&self) -> String { + if self.is_empty() { + return "No resources found".dimmed().to_string(); + } + + let mut output = String::new(); + for resource in self { + output.push_str(&format!( + "{} {} ({})\n", + resource.id.bright_blue(), + resource.name, + resource.status.colored() + )); + } + output + } + + fn display_json(&self) -> Result { + Ok(serde_json::to_string_pretty(self)?) + } + + fn display_yaml(&self) -> Result { + Ok(serde_yaml::to_string(self)?) + } + + fn display_table(&self) -> String { + let mut table = Table::new(); + table.load_preset(UTF8_FULL); + table.set_header(vec!["ID", "Name", "Status", "Created"]); + + for resource in self { + table.add_row(vec![ + &resource.id, + &resource.name, + &resource.status.colored(), + &resource.created_at.format("%Y-%m-%d %H:%M").to_string(), + ]); + } + + table.to_string() + } +} + +// Generic output function +pub fn output(data: T, format: OutputFormat) -> Result<()> { + let output = match format { + OutputFormat::Auto | OutputFormat::Plain => data.display_human(), + OutputFormat::Json => data.display_json()?, + OutputFormat::Yaml => data.display_yaml()?, + OutputFormat::Table => data.display_table(), + }; + + println!("{}", output); + Ok(()) +} +``` + +### ✅ DO: Use Colors and Formatting Wisely + +```rust +use colored::*; +use console::{style, Emoji}; + +// Define consistent color scheme +pub struct Theme; + +impl Theme { + pub fn success(msg: S) -> String { + format!("{} {}", style("✓").green(), msg.to_string()) + } + + pub fn error(msg: S) -> String { + format!("{} {}", style("✗").red(), msg.to_string()) + } + + pub fn warning(msg: S) -> String { + format!("{} {}", style("⚠").yellow(), msg.to_string()) + } + + pub fn info(msg: S) -> String { + format!("{} {}", style("ℹ").blue(), msg.to_string()) + } + + pub fn highlight(text: S) -> String { + style(text.to_string()).bold().to_string() + } +} + +// Respect NO_COLOR environment variable +pub fn should_use_color() -> bool { + std::env::var("NO_COLOR").is_err() + && atty::is(atty::Stream::Stdout) + && !cfg!(windows) // Or check Windows terminal capabilities +} + +// Helper macros +#[macro_export] +macro_rules! success { + ($($arg:tt)*) => { + println!("{}", $crate::utils::Theme::success(format!($($arg)*))); + }; +} + +#[macro_export] +macro_rules! error { + ($($arg:tt)*) => { + eprintln!("{}", $crate::utils::Theme::error(format!($($arg)*))); + }; +} + +#[macro_export] +macro_rules! warning { + ($($arg:tt)*) => { + eprintln!("{}", $crate::utils::Theme::warning(format!($($arg)*))); + }; +} + +#[macro_export] +macro_rules! info { + ($($arg:tt)*) => { + println!("{}", $crate::utils::Theme::info(format!($($arg)*))); + }; +} +``` + +--- + +## 8. Testing CLI Applications + +Testing CLI tools requires special patterns to capture output and simulate user input. + +### ✅ DO: Use Integration Tests with assert_cmd + +```rust +// tests/integration/basic.rs +use assert_cmd::Command; +use predicates::prelude::*; +use tempfile::TempDir; + +#[test] +fn test_init_command() { + let temp = TempDir::new().unwrap(); + + Command::cargo_bin("myctl") + .unwrap() + .arg("init") + .arg("test-project") + .arg("--non-interactive") + .current_dir(&temp) + .assert() + .success() + .stdout(predicate::str::contains("Project created successfully")); + + // Verify files were created + assert!(temp.path().join("test-project").exists()); + assert!(temp.path().join("test-project/config.toml").exists()); +} + +#[test] +fn test_invalid_config() { + let temp = TempDir::new().unwrap(); + let config_path = temp.path().join("invalid.toml"); + std::fs::write(&config_path, "invalid = [toml").unwrap(); + + Command::cargo_bin("myctl") + .unwrap() + .arg("--config") + .arg(&config_path) + .arg("status") + .assert() + .failure() + .stderr(predicate::str::contains("Invalid TOML")); +} + +#[test] +fn test_json_output() { + Command::cargo_bin("myctl") + .unwrap() + .args(&["status", "--output", "json"]) + .assert() + .success() + .stdout(predicate::str::is_json()); +} + +// Test with timeout +#[tokio::test] +async fn test_long_running_command() { + use tokio::time::{timeout, Duration}; + + let mut cmd = Command::cargo_bin("myctl") + .unwrap() + .args(&["deploy", "local", "--port", "9999"]) + .spawn() + .unwrap(); + + // Should respond within 5 seconds + let result = timeout(Duration::from_secs(5), cmd.wait()).await; + + assert!(result.is_ok(), "Command timed out"); + assert!(result.unwrap().unwrap().success()); +} +``` + +### ✅ DO: Use Snapshot Testing with Insta + +```rust +// tests/snapshots.rs +use insta::assert_snapshot; +use assert_cmd::Command; + +#[test] +fn test_help_output() { + let output = Command::cargo_bin("myctl") + .unwrap() + .arg("--help") + .output() + .unwrap(); + + assert_snapshot!(String::from_utf8_lossy(&output.stdout)); +} + +#[test] +fn test_error_messages() { + let output = Command::cargo_bin("myctl") + .unwrap() + .arg("deploy") + .arg("nonexistent") + .output() + .unwrap(); + + assert!(!output.status.success()); + assert_snapshot!( + "deploy_error", + String::from_utf8_lossy(&output.stderr) + ); +} + +// Test with settings +#[test] +fn test_formatted_output() { + let output = get_status_output(); + + insta::with_settings!({ + filters => vec![ + // Replace timestamps with placeholder + (r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", "[TIMESTAMP]"), + // Replace UUIDs + (r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "[UUID]"), + ] + }, { + assert_snapshot!(output); + }); +} +``` + +### ✅ DO: Mock External Dependencies + +```rust +// tests/mocks.rs +use mockito::{mock, Mock}; +use std::env; + +pub struct ApiMock { + server_url: String, + mocks: Vec, +} + +impl ApiMock { + pub fn new() -> Self { + Self { + server_url: mockito::server_url(), + mocks: Vec::new(), + } + } + + pub fn mock_success(mut self) -> Self { + let m = mock("GET", "/api/status") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"status": "healthy", "version": "1.0.0"}"#) + .create(); + + self.mocks.push(m); + self + } + + pub fn mock_auth_failure(mut self) -> Self { + let m = mock("GET", mockito::Matcher::Any) + .with_status(401) + .with_body(r#"{"error": "Unauthorized"}"#) + .create(); + + self.mocks.push(m); + self + } + + pub fn run_test(self, test: F) + where + F: FnOnce() + { + // Override API endpoint + env::set_var("MYCTL_API_ENDPOINT", &self.server_url); + + test(); + + // Verify all mocks were called + for mock in self.mocks { + mock.assert(); + } + } +} + +#[test] +fn test_with_mock_api() { + ApiMock::new() + .mock_success() + .run_test(|| { + Command::cargo_bin("myctl") + .unwrap() + .arg("status") + .assert() + .success() + .stdout(predicate::str::contains("healthy")); + }); +} +``` + +### ✅ DO: Benchmark CLI Performance + +```rust +// benches/performance.rs +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use std::process::Command; +use tempfile::TempDir; + +fn benchmark_startup(c: &mut Criterion) { + c.bench_function("cli startup", |b| { + b.iter(|| { + Command::new("target/release/myctl") + .arg("--version") + .output() + .unwrap(); + }); + }); +} + +fn benchmark_config_parsing(c: &mut Criterion) { + let temp = TempDir::new().unwrap(); + let config_path = temp.path().join("config.toml"); + std::fs::write(&config_path, include_str!("../fixtures/large_config.toml")).unwrap(); + + c.bench_function("parse large config", |b| { + b.iter(|| { + Command::new("target/release/myctl") + .arg("--config") + .arg(&config_path) + .arg("config") + .arg("validate") + .output() + .unwrap(); + }); + }); +} + +// Benchmark with hyperfine in CI +#[test] +fn hyperfine_benchmarks() { + if std::env::var("CI").is_ok() { + let output = Command::new("hyperfine") + .args(&[ + "--warmup", "3", + "--min-runs", "10", + "--export-json", "bench-results.json", + "'target/release/myctl --version'", + "'target/release/myctl status --output json'", + ]) + .output() + .expect("Failed to run hyperfine"); + + assert!(output.status.success()); + } +} + +criterion_group!(benches, benchmark_startup, benchmark_config_parsing); +criterion_main!(benches); +``` + +--- + +## 9. Distribution & Installation + +Getting your CLI tool into users' hands requires careful consideration of packaging and distribution. + +### ✅ DO: Use cargo-dist for Cross-Platform Distribution + +```toml +# Cargo.toml +[package.metadata.dist] +# Automatically create GitHub releases with binaries +targets = ["x86_64-pc-windows-msvc", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "aarch64-apple-darwin"] +ci = ["github"] +installers = ["shell", "powershell", "homebrew", "msi"] +tap = "myorg/homebrew-tap" +``` + +```yaml +# .github/workflows/release.yml +name: Release + +on: + push: + tags: + - 'v*' + +jobs: + dist: + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + - os: ubuntu-latest + target: aarch64-unknown-linux-gnu + - os: windows-latest + target: x86_64-pc-windows-msvc + - os: macos-latest + target: x86_64-apple-darwin + - os: macos-latest + target: aarch64-apple-darwin + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust (nightly) + uses: dtolnay/rust-toolchain@nightly + with: + targets: ${{ matrix.target }} + + - name: Build + run: cargo build --release --target ${{ matrix.target }} + + - name: Create archive + shell: bash + run: | + if [ "${{ matrix.os }}" = "windows-latest" ]; then + 7z a myctl-${{ matrix.target }}.zip ./target/${{ matrix.target }}/release/myctl.exe + else + tar czf myctl-${{ matrix.target }}.tar.gz -C target/${{ matrix.target }}/release myctl + fi + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + name: binaries + path: myctl-* +``` + +### ✅ DO: Create Install Scripts + +```bash +#!/bin/sh +# install.sh - Universal installer script + +set -e + +REPO="myorg/myctl" +BINARY="myctl" + +# Detect OS and architecture +OS=$(uname -s | tr '[:upper:]' '[:lower:]') +ARCH=$(uname -m) + +case "$OS" in + linux*) + case "$ARCH" in + x86_64) TARGET="x86_64-unknown-linux-gnu" ;; + aarch64) TARGET="aarch64-unknown-linux-gnu" ;; + *) echo "Unsupported architecture: $ARCH"; exit 1 ;; + esac + ;; + darwin*) + case "$ARCH" in + x86_64) TARGET="x86_64-apple-darwin" ;; + arm64) TARGET="aarch64-apple-darwin" ;; + *) echo "Unsupported architecture: $ARCH"; exit 1 ;; + esac + ;; + *) echo "Unsupported OS: $OS"; exit 1 ;; +esac + +# Get latest release +LATEST=$(curl -s https://api.github.com/repos/$REPO/releases/latest | grep tag_name | cut -d '"' -f 4) +URL="https://github.com/$REPO/releases/download/$LATEST/$BINARY-$TARGET.tar.gz" + +# Download and install +echo "Downloading $BINARY $LATEST for $TARGET..." +curl -sL "$URL" | tar xz + +# Install to user's bin directory +INSTALL_DIR="${HOME}/.local/bin" +mkdir -p "$INSTALL_DIR" +mv "$BINARY" "$INSTALL_DIR/" + +echo "Installed $BINARY to $INSTALL_DIR" +echo "Make sure $INSTALL_DIR is in your PATH" +``` + +### ✅ DO: Support Package Managers + +```ruby +# Homebrew formula (homebrew-tap/Formula/myctl.rb) +class Myctl < Formula + desc "Powerful automation tool" + homepage "https://github.com/myorg/myctl" + version "0.1.0" + + on_macos do + if Hardware::CPU.arm? + url "https://github.com/myorg/myctl/releases/download/v#{version}/myctl-aarch64-apple-darwin.tar.gz" + sha256 "..." + else + url "https://github.com/myorg/myctl/releases/download/v#{version}/myctl-x86_64-apple-darwin.tar.gz" + sha256 "..." + end + end + + on_linux do + if Hardware::CPU.arm? + url "https://github.com/myorg/myctl/releases/download/v#{version}/myctl-aarch64-unknown-linux-gnu.tar.gz" + sha256 "..." + else + url "https://github.com/myorg/myctl/releases/download/v#{version}/myctl-x86_64-unknown-linux-gnu.tar.gz" + sha256 "..." + end + end + + def install + bin.install "myctl" + + # Generate completions + generate_completions_from_executable(bin/"myctl", "completions") + end + + test do + assert_match "myctl #{version}", shell_output("#{bin}/myctl --version") + end +end +``` + +### ✅ DO: Minimize Binary Size + +```toml +# Cargo.toml - Size optimizations +[profile.release-min] +inherits = "release" +opt-level = "z" # Optimize for size +lto = true # Link-time optimization +codegen-units = 1 # Single codegen unit +strip = true # Strip symbols +panic = "abort" # No unwinding + +# Use alternative allocator +[dependencies] +mimalloc = { version = "0.1", default-features = false } + +# Reduce regex size +regex = { version = "1.10", default-features = false, features = ["std", "perf"] } +``` + +```rust +// src/main.rs - Use mimalloc +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; +``` + +--- + +## 10. Real-World Patterns + +### ✅ DO: Implement Plugins/Extensions + +```rust +// Plugin system using dynamic loading +use libloading::{Library, Symbol}; +use std::path::Path; + +pub trait Plugin: Send + Sync { + fn name(&self) -> &str; + fn version(&self) -> &str; + fn execute(&self, args: &[String]) -> Result<()>; +} + +pub struct PluginManager { + plugins: Vec>, +} + +impl PluginManager { + pub fn load_from_dir(dir: &Path) -> Result { + let mut plugins = Vec::new(); + + for entry in std::fs::read_dir(dir)? { + let path = entry?.path(); + + if path.extension() == Some(std::ffi::OsStr::new("so")) + || path.extension() == Some(std::ffi::OsStr::new("dll")) + || path.extension() == Some(std::ffi::OsStr::new("dylib")) + { + match Self::load_plugin(&path) { + Ok(plugin) => { + info!("Loaded plugin: {}", plugin.name()); + plugins.push(plugin); + } + Err(e) => { + warning!("Failed to load plugin {}: {}", path.display(), e); + } + } + } + } + + Ok(Self { plugins }) + } + + unsafe fn load_plugin(path: &Path) -> Result> { + type PluginCreate = unsafe fn() -> *mut dyn Plugin; + + let lib = Library::new(path)?; + let constructor: Symbol = lib.get(b"_plugin_create")?; + let plugin = Box::from_raw(constructor()); + + std::mem::forget(lib); // Keep library loaded + Ok(plugin) + } +} + +// In external plugin crate +#[no_mangle] +pub extern "C" fn _plugin_create() -> *mut dyn Plugin { + Box::into_raw(Box::new(MyPlugin::new())) +} +``` + +### ✅ DO: Support Shell Integration + +```rust +// Generate shell functions for enhanced integration +pub fn generate_shell_integration(shell: Shell) -> String { + match shell { + Shell::Bash => r#" +# myctl bash integration +_myctl_cd() { + local dir=$(myctl workspace path "$1" 2>/dev/null) + if [ -n "$dir" ]; then + cd "$dir" + else + echo "Workspace not found: $1" >&2 + return 1 + fi +} + +alias mcd='_myctl_cd' + +# Auto-activate environment +_myctl_auto_env() { + if [ -f ".myctl.toml" ]; then + eval $(myctl env shell) + fi +} + +PROMPT_COMMAND="_myctl_auto_env;$PROMPT_COMMAND" +"#.to_string(), + + Shell::Zsh => r#" +# myctl zsh integration +myctl_cd() { + local dir=$(myctl workspace path "$1" 2>/dev/null) + if [ -n "$dir" ]; then + cd "$dir" + else + echo "Workspace not found: $1" >&2 + return 1 + fi +} + +alias mcd='myctl_cd' + +# Hook for auto-env +add-zsh-hook chpwd myctl_auto_env +myctl_auto_env() { + if [ -f ".myctl.toml" ]; then + eval $(myctl env shell) + fi +} +"#.to_string(), + + _ => String::new(), + } +} +``` + +### ✅ DO: Implement Update Checking + +```rust +use semver::Version; + +pub struct UpdateChecker { + current_version: Version, + check_url: String, +} + +impl UpdateChecker { + pub async fn check_for_updates(&self) -> Result> { + // Check only once per day + if !self.should_check()? { + return Ok(None); + } + + let response = reqwest::Client::new() + .get(&self.check_url) + .timeout(Duration::from_secs(5)) + .send() + .await?; + + let latest: Release = response.json().await?; + let latest_version = Version::parse(&latest.version)?; + + if latest_version > self.current_version { + self.record_check()?; + Ok(Some(latest)) + } else { + Ok(None) + } + } + + fn should_check(&self) -> Result { + let config_dir = directories::ProjectDirs::from("com", "example", "myctl") + .context("Failed to get config directory")?; + + let check_file = config_dir.data_dir().join("last-update-check"); + + if !check_file.exists() { + return Ok(true); + } + + let metadata = std::fs::metadata(&check_file)?; + let modified = metadata.modified()?; + let elapsed = modified.elapsed().unwrap_or(Duration::MAX); + + Ok(elapsed > Duration::from_secs(86400)) // 24 hours + } + + fn record_check(&self) -> Result<()> { + let config_dir = directories::ProjectDirs::from("com", "example", "myctl") + .context("Failed to get config directory")?; + + std::fs::create_dir_all(config_dir.data_dir())?; + let check_file = config_dir.data_dir().join("last-update-check"); + std::fs::write(check_file, "")?; + + Ok(()) + } +} + +// Check on startup (non-blocking) +pub fn spawn_update_check() { + tokio::spawn(async { + let checker = UpdateChecker::new(); + + match checker.check_for_updates().await { + Ok(Some(release)) => { + eprintln!( + "\n{} {} → {} available", + style("Update:").green().bold(), + env!("CARGO_PKG_VERSION"), + style(&release.version).green() + ); + eprintln!( + "Install with: {}\n", + style("myctl self-update").cyan() + ); + } + Ok(None) => { + // No update available + } + Err(e) => { + debug!("Update check failed: {}", e); + } + } + }); +} +``` + +### ✅ DO: Handle Long-Running Operations + +```rust +use tokio::process::Command as TokioCommand; +use tokio::io::{AsyncBufReadExt, BufReader}; + +pub async fn run_subprocess_with_output( + cmd: &str, + args: &[&str], + on_line: impl Fn(&str), +) -> Result<()> { + let mut child = TokioCommand::new(cmd) + .args(args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .context("Failed to spawn subprocess")?; + + let stdout = child.stdout.take().unwrap(); + let stderr = child.stderr.take().unwrap(); + + let stdout_reader = BufReader::new(stdout); + let stderr_reader = BufReader::new(stderr); + + let mut stdout_lines = stdout_reader.lines(); + let mut stderr_lines = stderr_reader.lines(); + + loop { + tokio::select! { + line = stdout_lines.next_line() => { + match line? { + Some(line) => on_line(&line), + None => break, + } + } + line = stderr_lines.next_line() => { + match line? { + Some(line) => on_line(&line), + None => break, + } + } + } + } + + let status = child.wait().await?; + + if !status.success() { + bail!("Command failed with status: {}", status); + } + + Ok(()) +} + +// Usage +pub async fn build_project(path: &Path) -> Result<()> { + let spinner = ProgressBar::new_spinner(); + spinner.set_message("Building project..."); + + run_subprocess_with_output( + "cargo", + &["build", "--release"], + |line| { + // Update spinner with build progress + if line.contains("Compiling") { + spinner.set_message(line); + } + } + ).await?; + + spinner.finish_with_message("✓ Build complete"); + Ok(()) +} +``` + +--- + +## 11. Advanced Automation Patterns + +### ✅ DO: Implement Task Automation DSL + +```rust +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct Workflow { + pub name: String, + pub description: Option, + pub tasks: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Task { + pub name: String, + pub run: RunConfig, + #[serde(default)] + pub when: Condition, + #[serde(default)] + pub retry: RetryConfig, + #[serde(default)] + pub depends_on: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged)] +pub enum RunConfig { + Command(String), + Script { script: String, shell: Option }, + Function { function: String, args: toml::Table }, +} + +#[derive(Debug, Serialize, Deserialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum Condition { + #[default] + Always, + OnSuccess, + OnFailure, + Expression(String), +} + +pub struct WorkflowEngine { + functions: HashMap>, +} + +#[async_trait] +pub trait TaskFunction: Send + Sync { + async fn execute(&self, args: &toml::Table) -> Result; +} + +impl WorkflowEngine { + pub async fn execute_workflow(&self, workflow: Workflow) -> Result<()> { + let mut completed = HashSet::new(); + let mut results = HashMap::new(); + + while completed.len() < workflow.tasks.len() { + let mut progress = false; + + for task in &workflow.tasks { + if completed.contains(&task.name) { + continue; + } + + // Check dependencies + if task.depends_on.iter().all(|dep| completed.contains(dep)) { + info!("Executing task: {}", task.name); + + let result = self.execute_task(task, &results).await; + + match result { + Ok(value) => { + results.insert(task.name.clone(), value); + completed.insert(task.name.clone()); + progress = true; + } + Err(e) => { + error!("Task {} failed: {}", task.name, e); + return Err(e); + } + } + } + } + + if !progress { + bail!("Circular dependency detected in workflow"); + } + } + + Ok(()) + } + + async fn execute_task( + &self, + task: &Task, + context: &HashMap, + ) -> Result { + match &task.run { + RunConfig::Command(cmd) => { + let output = shell_words::split(cmd)?; + let result = TokioCommand::new(&output[0]) + .args(&output[1..]) + .output() + .await?; + + if !result.status.success() { + bail!("Command failed: {}", cmd); + } + + Ok(json!({ + "stdout": String::from_utf8_lossy(&result.stdout), + "stderr": String::from_utf8_lossy(&result.stderr), + })) + } + + RunConfig::Script { script, shell } => { + let shell = shell.as_deref().unwrap_or("sh"); + let result = TokioCommand::new(shell) + .arg("-c") + .arg(script) + .output() + .await?; + + Ok(json!({ + "stdout": String::from_utf8_lossy(&result.stdout), + "stderr": String::from_utf8_lossy(&result.stderr), + })) + } + + RunConfig::Function { function, args } => { + let func = self.functions.get(function) + .ok_or_else(|| anyhow!("Unknown function: {}", function))?; + + func.execute(args).await + } + } + } +} +``` + +### ✅ DO: Create Smart File Watchers + +```rust +use notify::{Config, RecommendedWatcher, RecursiveMode, Watcher}; +use tokio::sync::mpsc; + +pub struct FileWatcher { + watcher: RecommendedWatcher, + rx: mpsc::Receiver, +} + +#[derive(Debug, Clone)] +pub enum WatchEvent { + Changed(PathBuf), + Created(PathBuf), + Removed(PathBuf), +} + +impl FileWatcher { + pub fn new(paths: Vec, ignore_patterns: Vec) -> Result { + let (tx, rx) = mpsc::channel(100); + let ignore = GlobSet::from_patterns(&ignore_patterns)?; + + let mut watcher = RecommendedWatcher::new( + move |res: notify::Result| { + if let Ok(event) = res { + let path = &event.paths[0]; + + // Apply ignore patterns + if ignore.is_match(path) { + return; + } + + let watch_event = match event.kind { + notify::EventKind::Create(_) => WatchEvent::Created(path.clone()), + notify::EventKind::Modify(_) => WatchEvent::Changed(path.clone()), + notify::EventKind::Remove(_) => WatchEvent::Removed(path.clone()), + _ => return, + }; + + let _ = tx.blocking_send(watch_event); + } + }, + Config::default(), + )?; + + // Watch all paths + for path in paths { + watcher.watch(&path, RecursiveMode::Recursive)?; + } + + Ok(Self { watcher, rx }) + } + + pub async fn watch( + mut self, + mut on_change: F, + ) -> Result<()> + where + F: FnMut(WatchEvent) -> Fut, + Fut: Future>, + { + let mut debounce = HashMap::new(); + let debounce_duration = Duration::from_millis(100); + + while let Some(event) = self.rx.recv().await { + let path = match &event { + WatchEvent::Changed(p) | WatchEvent::Created(p) | WatchEvent::Removed(p) => p, + }; + + // Debounce rapid changes + let now = Instant::now(); + if let Some(last) = debounce.get(path) { + if now.duration_since(*last) < debounce_duration { + continue; + } + } + debounce.insert(path.clone(), now); + + if let Err(e) = on_change(event).await { + error!("Handler error: {}", e); + } + } + + Ok(()) + } +} + +// Usage +pub async fn watch_and_rebuild(project_dir: PathBuf) -> Result<()> { + let watcher = FileWatcher::new( + vec![project_dir.join("src")], + vec!["*.tmp".to_string(), "target/*".to_string()], + )?; + + info!("Watching for changes..."); + + watcher.watch(|event| async move { + match event { + WatchEvent::Changed(path) | WatchEvent::Created(path) => { + info!("Detected change in {}", path.display()); + + // Rebuild project + long_operation("Rebuilding", || async { + run_build().await + }).await?; + + success!("Build complete"); + } + WatchEvent::Removed(_) => { + // Ignore removals + } + } + + Ok(()) + }).await +} +``` + +--- + +## 12. Performance Optimization + +### ✅ DO: Optimize Startup Time + +```rust +// Use lazy initialization for expensive operations +use once_cell::sync::Lazy; + +static CONFIG: Lazy = Lazy::new(|| { + Config::load_from_default_location() + .expect("Failed to load config") +}); + +// Defer imports until needed +pub async fn handle_rare_command() -> Result<()> { + // Only load heavy dependency when this command runs + use heavy_dependency::ComplexProcessor; + + let processor = ComplexProcessor::new(); + processor.run().await +} + +// Use compile-time includes for static data +static HELP_TEXT: &str = include_str!("../help.txt"); +static DEFAULT_CONFIG: &[u8] = include_bytes!("../default-config.toml"); + +// Profile startup time +#[cfg(feature = "profiling")] +fn main() { + let start = std::time::Instant::now(); + + let result = actual_main(); + + eprintln!("Startup time: {:?}", start.elapsed()); + + std::process::exit(match result { + Ok(()) => 0, + Err(_) => 1, + }); +} +``` + +### ✅ DO: Use Zero-Copy Parsing + +```rust +use nom::{ + IResult, + bytes::complete::{tag, take_until}, + character::complete::{line_ending, not_line_ending}, + multi::many0, + sequence::{delimited, pair}, +}; + +// Parse without allocations +pub fn parse_config_line(input: &str) -> IResult<&str, (&str, &str)> { + pair( + take_until("="), + delimited(tag("="), not_line_ending, line_ending), + )(input) +} + +// Use memory-mapped files for large inputs +use memmap2::Mmap; + +pub fn process_large_file(path: &Path) -> Result<()> { + let file = std::fs::File::open(path)?; + let mmap = unsafe { Mmap::map(&file)? }; + + // Process directly from memory-mapped data + let text = std::str::from_utf8(&mmap)?; + + for line in text.lines() { + // Zero-copy line processing + process_line(line)?; + } + + Ok(()) +} +``` + +--- + +## 13. Security Best Practices + +### ✅ DO: Validate All External Input + +```rust +use validator::{Validate, ValidationError}; + +#[derive(Debug, Validate)] +pub struct DeploymentConfig { + #[validate(length(min = 1, max = 64), regex = "IDENTIFIER_REGEX")] + pub name: String, + + #[validate(url)] + pub endpoint: String, + + #[validate(range(min = 1, max = 65535))] + pub port: u16, + + #[validate(custom = "validate_path")] + pub working_dir: PathBuf, +} + +static IDENTIFIER_REGEX: Lazy = Lazy::new(|| { + regex::Regex::new(r"^[a-zA-Z][a-zA-Z0-9_-]*$").unwrap() +}); + +fn validate_path(path: &PathBuf) -> Result<(), ValidationError> { + // Prevent directory traversal + if path.components().any(|c| matches!(c, std::path::Component::ParentDir)) { + return Err(ValidationError::new("invalid_path")); + } + + // Must be within working directory + if !path.starts_with("/home/user/projects") { + return Err(ValidationError::new("outside_working_directory")); + } + + Ok(()) +} + +// Sanitize shell commands +pub fn run_user_command(cmd: &str) -> Result<()> { + // Never pass user input directly to shell + let parts = shell_words::split(cmd)?; + + if parts.is_empty() { + bail!("Empty command"); + } + + // Whitelist allowed commands + let allowed_commands = ["ls", "cat", "grep", "find"]; + if !allowed_commands.contains(&parts[0].as_str()) { + bail!("Command not allowed: {}", parts[0]); + } + + let output = std::process::Command::new(&parts[0]) + .args(&parts[1..]) + .output()?; + + if !output.status.success() { + bail!("Command failed"); + } + + Ok(()) +} +``` + +### ✅ DO: Store Secrets Securely + +```rust +use keyring::Entry; +use aes_gcm::{ + aead::{Aead, KeyInit, OsRng}, + Aes256Gcm, Nonce, Key, +}; + +pub struct SecretStore { + app_name: String, +} + +impl SecretStore { + pub fn new(app_name: &str) -> Self { + Self { + app_name: app_name.to_string(), + } + } + + // Store in OS keychain + pub fn store_token(&self, name: &str, token: &str) -> Result<()> { + let entry = Entry::new(&self.app_name, name)?; + entry.set_password(token)?; + Ok(()) + } + + pub fn get_token(&self, name: &str) -> Result> { + let entry = Entry::new(&self.app_name, name)?; + match entry.get_password() { + Ok(token) => Ok(Some(token)), + Err(keyring::Error::NoEntry) => Ok(None), + Err(e) => Err(e.into()), + } + } + + // Encrypt sensitive files + pub fn encrypt_file(&self, path: &Path, key: &[u8; 32]) -> Result<()> { + let cipher = Aes256Gcm::new(Key::::from_slice(key)); + let nonce = Aes256Gcm::generate_nonce(&mut OsRng); + + let plaintext = std::fs::read(path)?; + let ciphertext = cipher.encrypt(&nonce, plaintext.as_ref()) + .map_err(|e| anyhow!("Encryption failed: {}", e))?; + + // Write nonce + ciphertext + let mut output = nonce.to_vec(); + output.extend_from_slice(&ciphertext); + + let encrypted_path = path.with_extension("enc"); + std::fs::write(encrypted_path, output)?; + + // Securely delete original + std::fs::remove_file(path)?; + + Ok(()) + } +} +``` + +--- + +## 14. Debugging and Diagnostics + +### ✅ DO: Implement Comprehensive Debug Mode + +```rust +pub struct DebugMode { + enabled: bool, + trace_file: Option, +} + +impl DebugMode { + pub fn from_env() -> Self { + let enabled = std::env::var("MYCTL_DEBUG").is_ok(); + + let trace_file = if enabled { + std::env::var("MYCTL_TRACE_FILE") + .ok() + .and_then(|path| File::create(path).ok()) + } else { + None + }; + + Self { enabled, trace_file } + } + + pub fn trace(&mut self, f: F) + where + F: FnOnce() -> String, + { + if self.enabled { + let msg = f(); + eprintln!("{} {}", style("[TRACE]").dim(), msg); + + if let Some(file) = &mut self.trace_file { + writeln!(file, "[{}] {}", chrono::Local::now(), msg).ok(); + } + } + } +} + +// Debug command implementation +pub async fn debug_info() -> Result<()> { + println!("{}", style("System Information").bold().underline()); + println!("Version: {}", env!("CARGO_PKG_VERSION")); + println!("Commit: {}", env!("VERGEN_GIT_SHA")); + println!("Built: {}", env!("VERGEN_BUILD_TIMESTAMP")); + println!("Rust: {}", env!("VERGEN_RUSTC_SEMVER")); + + println!("\n{}", style("Environment").bold().underline()); + for (key, value) in std::env::vars() { + if key.starts_with("MYCTL_") { + println!("{}: {}", key, value); + } + } + + println!("\n{}", style("Configuration").bold().underline()); + let config = Config::load(None).await?; + println!("{:#?}", config); + + println!("\n{}", style("Paths").bold().underline()); + if let Some(dirs) = directories::ProjectDirs::from("com", "example", "myctl") { + println!("Config: {}", dirs.config_dir().display()); + println!("Data: {}", dirs.data_dir().display()); + println!("Cache: {}", dirs.cache_dir().display()); + } + + Ok(()) +} + +// Performance tracing +#[instrument(level = "debug", skip(client))] +pub async fn api_call(client: &Client, endpoint: &str) -> Result { + let start = Instant::now(); + + let response = client.get(endpoint).send().await?; + + debug!( + elapsed = ?start.elapsed(), + status = response.status().as_u16(), + "API call completed" + ); + + Ok(response) +} +``` + +--- + +## Conclusion + +This guide provides a comprehensive foundation for building professional CLI tools with Rust. The key principles to remember: + +1. **User Experience First** - Fast startup, helpful errors, beautiful output +2. **Robustness** - Handle errors gracefully, validate inputs, test thoroughly +3. **Performance** - Profile before optimizing, use async wisely, minimize allocations +4. **Flexibility** - Support multiple platforms, output formats, and use cases +5. **Maintainability** - Structure code well, document thoroughly, automate releases + +The Rust ecosystem for CLI tools continues to evolve rapidly. Stay updated with the latest crate versions and patterns, but always prioritize user experience and reliability over using the newest features. + +For more examples and the latest updates to this guide, visit the companion repository at [github.com/rust-cli/definitive-guide](https://github.com/rust-cli/definitive-guide). diff --git a/docs/prd/checkpoint-3.md b/docs/prd/checkpoint-3.md new file mode 100644 index 0000000..826edc1 --- /dev/null +++ b/docs/prd/checkpoint-3.md @@ -0,0 +1,2659 @@ +# Checkpoint 3: Search & Sync MVP + +> **Status:** Planning +> **Prerequisite:** Checkpoints 0, 1, 2 complete (issues, MRs, discussions ingested) +> **Goal:** Deliver working semantic + lexical hybrid search with efficient incremental sync + +This checkpoint consolidates SPEC.md checkpoints 3A, 3B, 4, and 5 into a unified implementation plan. The work is structured for parallel agent execution where dependencies allow. + +All code integrates with existing `gitlab-inbox` infrastructure: +- Error handling via `GiError` and `ErrorCode` in `src/core/error.rs` +- CLI patterns matching `src/cli/commands/*.rs` (run functions, JSON/human output) +- Database via `rusqlite::Connection` with migrations in `migrations/` +- Config via `src/core/config.rs` (EmbeddingConfig already defined) +- Robot mode JSON with `{"ok": true, "data": {...}}` pattern + +--- + +## Executive Summary + +**Deliverables:** +1. Document generation from issues/MRs/discussions with FTS5 indexing +2. Ollama-powered embedding pipeline with sqlite-vec storage +3. Hybrid search (RRF-ranked vector + lexical) with rich filtering +4. Orchestrated `gi sync` command with incremental re-embedding + +**Key Design Decisions:** +- Documents are the search unit (not raw entities) +- FTS5 works standalone when Ollama unavailable (graceful degradation) +- sqlite-vec `rowid = documents.id` for simple joins +- RRF ranking avoids score normalization complexity +- Queue-based discussion fetching isolates failures + +--- + +## Phase 1: Schema Foundation + +### 1.1 Documents Schema (Migration 007) + +**File:** `migrations/007_documents.sql` + +```sql +-- Unified searchable documents (derived from issues/MRs/discussions) +CREATE TABLE documents ( + id INTEGER PRIMARY KEY, + source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')), + source_id INTEGER NOT NULL, -- local DB id in the source table + project_id INTEGER NOT NULL REFERENCES projects(id), + author_username TEXT, -- for discussions: first note author + label_names TEXT, -- JSON array (display/debug only) + created_at INTEGER, -- ms epoch UTC + updated_at INTEGER, -- ms epoch UTC + url TEXT, + title TEXT, -- null for discussions + content_text TEXT NOT NULL, -- canonical text for embedding/search + content_hash TEXT NOT NULL, -- SHA-256 for change detection + is_truncated INTEGER NOT NULL DEFAULT 0, + truncated_reason TEXT CHECK ( + truncated_reason IN ('token_limit_middle_drop','single_note_oversized','first_last_oversized') + OR truncated_reason IS NULL + ), + UNIQUE(source_type, source_id) +); + +CREATE INDEX idx_documents_project_updated ON documents(project_id, updated_at); +CREATE INDEX idx_documents_author ON documents(author_username); +CREATE INDEX idx_documents_source ON documents(source_type, source_id); +CREATE INDEX idx_documents_hash ON documents(content_hash); + +-- Fast label filtering (indexed exact-match) +CREATE TABLE document_labels ( + document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE, + label_name TEXT NOT NULL, + PRIMARY KEY(document_id, label_name) +) WITHOUT ROWID; +CREATE INDEX idx_document_labels_label ON document_labels(label_name); + +-- Fast path filtering (DiffNote file paths) +CREATE TABLE document_paths ( + document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE, + path TEXT NOT NULL, + PRIMARY KEY(document_id, path) +) WITHOUT ROWID; +CREATE INDEX idx_document_paths_path ON document_paths(path); + +-- Queue for incremental document regeneration (with retry tracking) +CREATE TABLE dirty_sources ( + source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')), + source_id INTEGER NOT NULL, + queued_at INTEGER NOT NULL, -- ms epoch UTC + attempt_count INTEGER NOT NULL DEFAULT 0, + last_attempt_at INTEGER, + last_error TEXT, + PRIMARY KEY(source_type, source_id) +); +CREATE INDEX idx_dirty_sources_retry + ON dirty_sources(attempt_count, last_attempt_at) + WHERE last_error IS NOT NULL; + +-- Resumable queue for dependent discussion fetching +CREATE TABLE pending_discussion_fetches ( + project_id INTEGER NOT NULL REFERENCES projects(id), + noteable_type TEXT NOT NULL, -- 'Issue' | 'MergeRequest' + noteable_iid INTEGER NOT NULL, + queued_at INTEGER NOT NULL, -- ms epoch UTC + attempt_count INTEGER NOT NULL DEFAULT 0, + last_attempt_at INTEGER, + last_error TEXT, + PRIMARY KEY(project_id, noteable_type, noteable_iid) +); +CREATE INDEX idx_pending_discussions_retry + ON pending_discussion_fetches(attempt_count, last_attempt_at) + WHERE last_error IS NOT NULL; +``` + +**Acceptance Criteria:** +- [ ] Migration applies cleanly on fresh DB +- [ ] Migration applies cleanly after CP2 schema +- [ ] All foreign keys enforced +- [ ] Indexes created + +--- + +### 1.2 FTS5 Index (Migration 008) + +**File:** `migrations/008_fts5.sql` + +```sql +-- Full-text search with porter stemmer and prefix indexes for type-ahead +CREATE VIRTUAL TABLE documents_fts USING fts5( + title, + content_text, + content='documents', + content_rowid='id', + tokenize='porter unicode61', + prefix='2 3 4' +); + +-- Keep FTS in sync via triggers +CREATE TRIGGER documents_ai AFTER INSERT ON documents BEGIN + INSERT INTO documents_fts(rowid, title, content_text) + VALUES (new.id, new.title, new.content_text); +END; + +CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN + INSERT INTO documents_fts(documents_fts, rowid, title, content_text) + VALUES('delete', old.id, old.title, old.content_text); +END; + +-- Only rebuild FTS when searchable text actually changes (not metadata-only updates) +CREATE TRIGGER documents_au AFTER UPDATE ON documents +WHEN old.title IS NOT new.title OR old.content_text != new.content_text +BEGIN + INSERT INTO documents_fts(documents_fts, rowid, title, content_text) + VALUES('delete', old.id, old.title, old.content_text); + INSERT INTO documents_fts(rowid, title, content_text) + VALUES (new.id, new.title, new.content_text); +END; +``` + +**Acceptance Criteria:** +- [ ] `documents_fts` created as virtual table +- [ ] Triggers fire on insert/update/delete +- [ ] Update trigger only fires when title or content_text changes (not metadata-only updates) +- [ ] FTS row count matches documents count after bulk insert +- [ ] Prefix search works for type-ahead UX + +--- + +### 1.3 Embeddings Schema (Migration 009) + +**File:** `migrations/009_embeddings.sql` + +```sql +-- NOTE: sqlite-vec vec0 virtual tables cannot participate in FK cascades. +-- We must use an explicit trigger to delete orphan embeddings when documents +-- are deleted. See documents_embeddings_ad trigger below. + +-- sqlite-vec virtual table for vector search +-- Storage rule: embeddings.rowid = documents.id +CREATE VIRTUAL TABLE embeddings USING vec0( + embedding float[768] +); + +-- Embedding provenance + change detection +CREATE TABLE embedding_metadata ( + document_id INTEGER PRIMARY KEY REFERENCES documents(id) ON DELETE CASCADE, + model TEXT NOT NULL, -- 'nomic-embed-text' + dims INTEGER NOT NULL, -- 768 + content_hash TEXT NOT NULL, -- copied from documents.content_hash + created_at INTEGER NOT NULL, -- ms epoch UTC + last_error TEXT, -- error message from last failed attempt + attempt_count INTEGER NOT NULL DEFAULT 0, + last_attempt_at INTEGER -- ms epoch UTC +); + +CREATE INDEX idx_embedding_metadata_errors + ON embedding_metadata(last_error) WHERE last_error IS NOT NULL; +CREATE INDEX idx_embedding_metadata_hash ON embedding_metadata(content_hash); + +-- CRITICAL: Delete orphan embeddings when documents are deleted. +-- vec0 virtual tables don't support FK ON DELETE CASCADE, so we need this trigger. +-- embedding_metadata has ON DELETE CASCADE, so only vec0 needs explicit cleanup +CREATE TRIGGER documents_embeddings_ad AFTER DELETE ON documents BEGIN + DELETE FROM embeddings WHERE rowid = old.id; +END; +``` + +**Acceptance Criteria:** +- [ ] `embeddings` vec0 table created +- [ ] `embedding_metadata` tracks provenance +- [ ] Error tracking fields present for retry logic +- [ ] Orphan cleanup trigger fires on document deletion + +**Dependencies:** +- Requires sqlite-vec extension loaded at runtime +- Extension loading already happens in `src/core/db.rs` +- [ ] Migration runner must load sqlite-vec *before* applying migrations (including on fresh DB) + +--- + +## Phase 2: Document Generation + +### 2.1 Document Module Structure + +**New module:** `src/documents/` + +``` +src/documents/ +├── mod.rs # Module exports +├── extractor.rs # Document extraction from entities +├── truncation.rs # Note-boundary aware truncation +└── regenerator.rs # Dirty source processing +``` + +**File:** `src/documents/mod.rs` + +```rust +//! Document generation and management. +//! +//! Extracts searchable documents from issues, MRs, and discussions. + +mod extractor; +mod regenerator; +mod truncation; + +pub use extractor::{ + extract_discussion_document, extract_issue_document, extract_mr_document, + DocumentData, SourceType, +}; +// Note: extract_*_document() return Result> +// None means the source entity was deleted from the database +pub use regenerator::regenerate_dirty_documents; +pub use truncation::{truncate_content, TruncationResult}; +``` + +**Update `src/lib.rs`:** +```rust +pub mod documents; // Add to existing modules +``` + +--- + +### 2.2 Document Types + +**File:** `src/documents/extractor.rs` + +```rust +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +/// Source type for documents. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SourceType { + Issue, + MergeRequest, + Discussion, +} + +impl SourceType { + pub fn as_str(&self) -> &'static str { + match self { + Self::Issue => "issue", + Self::MergeRequest => "merge_request", + Self::Discussion => "discussion", + } + } +} + +impl std::fmt::Display for SourceType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +/// Generated document ready for storage. +#[derive(Debug, Clone)] +pub struct DocumentData { + pub source_type: SourceType, + pub source_id: i64, + pub project_id: i64, + pub author_username: Option, + pub labels: Vec, + pub paths: Vec, // DiffNote file paths + pub created_at: i64, + pub updated_at: i64, + pub url: Option, + pub title: Option, + pub content_text: String, + pub content_hash: String, + pub is_truncated: bool, + pub truncated_reason: Option, +} + +/// Compute SHA-256 hash of content. +pub fn compute_content_hash(content: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(content.as_bytes()); + format!("{:x}", hasher.finalize()) +} +``` + +**Document Formats:** + +| Source | content_text | +|--------|-------------| +| Issue | `{title}\n\n{description}` | +| MR | `{title}\n\n{description}` | +| Discussion | Full thread with header (see below) | + +**Discussion Document Format:** +``` +[[Discussion]] Issue #234: Authentication redesign +Project: group/project-one +URL: https://gitlab.example.com/group/project-one/-/issues/234#note_12345 +Labels: ["bug", "auth"] +Files: ["src/auth/login.ts"] + +--- Thread --- + +@johndoe (2024-03-15): +I think we should move to JWT-based auth... + +@janedoe (2024-03-15): +Agreed. What about refresh token strategy? +``` + +**Acceptance Criteria:** +- [ ] Issue document: title + description concatenated +- [ ] MR document: title + description concatenated +- [ ] Discussion document: includes parent title, project, URL, labels, files, thread +- [ ] System notes (is_system=1) excluded from discussion content +- [ ] DiffNote file paths extracted to paths vector +- [ ] Labels extracted to labels vector +- [ ] SHA-256 hash computed from content_text + +--- + +### 2.3 Truncation Logic + +**File:** `src/documents/truncation.rs` + +```rust +/// Maximum content length (~8,000 tokens at 4 chars/token estimate). +pub const MAX_CONTENT_CHARS: usize = 32_000; + +/// Truncation result with metadata. +#[derive(Debug, Clone)] +pub struct TruncationResult { + pub content: String, + pub is_truncated: bool, + pub reason: Option, +} + +/// Reason for truncation. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TruncationReason { + TokenLimitMiddleDrop, + SingleNoteOversized, + FirstLastOversized, +} + +impl TruncationReason { + pub fn as_str(&self) -> &'static str { + match self { + Self::TokenLimitMiddleDrop => "token_limit_middle_drop", + Self::SingleNoteOversized => "single_note_oversized", + Self::FirstLastOversized => "first_last_oversized", + } + } +} + +/// Truncate content at note boundaries. +/// +/// Rules: +/// - Max content: 32,000 characters +/// - Truncate at NOTE boundaries (never mid-note) +/// - Preserve first N notes and last M notes +/// - Drop from middle, insert marker +pub fn truncate_content(notes: &[NoteContent], max_chars: usize) -> TruncationResult { + // Implementation handles edge cases per table below + todo!() +} + +/// Note content for truncation. +pub struct NoteContent { + pub author: String, + pub date: String, + pub body: String, +} +``` + +**Edge Cases:** +| Scenario | Handling | +|----------|----------| +| Single note > 32000 chars | Truncate at char boundary, append `[truncated]`, reason = `single_note_oversized` | +| First + last note > 32000 | Keep only first note (truncated if needed), reason = `first_last_oversized` | +| Only one note | Truncate at char boundary if needed | + +**Acceptance Criteria:** +- [ ] Notes never cut mid-content +- [ ] First and last notes preserved when possible +- [ ] Truncation marker `\n\n[... N notes omitted for length ...]\n\n` inserted +- [ ] Metadata fields set correctly +- [ ] Edge cases handled per table above + +--- + +### 2.4 CLI: `gi generate-docs` (Incremental by Default) + +**File:** `src/cli/commands/generate_docs.rs` + +```rust +//! Generate documents command - create searchable documents from entities. +//! +//! By default, runs incrementally (processes only dirty_sources queue). +//! Use --full to regenerate all documents from scratch. + +use rusqlite::Connection; +use serde::Serialize; + +use crate::core::error::Result; +use crate::documents::{DocumentData, SourceType}; +use crate::Config; + +/// Result of document generation. +#[derive(Debug, Serialize)] +pub struct GenerateDocsResult { + pub issues: usize, + pub mrs: usize, + pub discussions: usize, + pub total: usize, + pub truncated: usize, + pub skipped: usize, // Unchanged documents +} + +/// Run document generation (incremental by default). +/// +/// Incremental mode (default): +/// - Processes only items in dirty_sources queue +/// - Fast for routine syncs +/// +/// Full mode (--full): +/// - Regenerates ALL documents from scratch +/// - Use when schema changes or after migration +pub fn run_generate_docs( + config: &Config, + full: bool, + project_filter: Option<&str>, +) -> Result { + if full { + // Full mode: regenerate everything inside a single transaction + // 1. BEGIN IMMEDIATE transaction + // 2. Query all issues, MRs, discussions + // 3. For each: generate document, compute hash + // 4. Upsert into `documents` table (FTS triggers auto-fire) + // 5. Populate `document_labels` and `document_paths` + // 6. Rebuild FTS: INSERT INTO documents_fts(documents_fts) VALUES('rebuild') + // 7. COMMIT + // 8. Return counts + // + // The FTS rebuild at step 6 ensures the index is consistent + // after bulk operations. Wrapping in a transaction avoids + // partial state if the process is interrupted. + } else { + // Incremental mode: process dirty_sources only + // 1. Query dirty_sources (bounded by LIMIT) + // 2. Regenerate only those documents + // 3. Clear from dirty_sources after processing + } + todo!() +} + +/// Print human-readable output. +pub fn print_generate_docs(result: &GenerateDocsResult) { + println!("Document generation complete:"); + println!(" Issues: {:>6} documents", result.issues); + println!(" MRs: {:>6} documents", result.mrs); + println!(" Discussions: {:>6} documents", result.discussions); + println!(" ─────────────────────"); + println!(" Total: {:>6} documents", result.total); + if result.truncated > 0 { + println!(" Truncated: {:>6}", result.truncated); + } + if result.skipped > 0 { + println!(" Skipped: {:>6} (unchanged)", result.skipped); + } +} + +/// Print JSON output for robot mode. +pub fn print_generate_docs_json(result: &GenerateDocsResult) { + let output = serde_json::json!({ + "ok": true, + "data": result + }); + println!("{}", serde_json::to_string_pretty(&output).unwrap()); +} +``` + +**CLI integration in `src/cli/mod.rs`:** +```rust +/// Generate-docs subcommand arguments. +#[derive(Args)] +pub struct GenerateDocsArgs { + /// Regenerate ALL documents (not just dirty queue) + #[arg(long)] + full: bool, + + /// Only generate for specific project + #[arg(long)] + project: Option, +} +``` + +**Acceptance Criteria:** +- [ ] Creates document for each issue +- [ ] Creates document for each MR +- [ ] Creates document for each discussion +- [ ] Default mode processes dirty_sources queue only (incremental) +- [ ] `--full` regenerates all documents from scratch +- [ ] Progress bar in human mode (via `indicatif`) +- [ ] JSON output in robot mode + +--- + +## Phase 3: Lexical Search + +### 3.1 Search Module Structure + +**New module:** `src/search/` + +``` +src/search/ +├── mod.rs # Module exports +├── fts.rs # FTS5 search +├── vector.rs # Vector search (sqlite-vec) +├── hybrid.rs # Combined hybrid search +├── rrf.rs # RRF ranking algorithm +└── filters.rs # Filter parsing and application +``` + +**File:** `src/search/mod.rs` + +```rust +//! Search functionality for documents. +//! +//! Supports lexical (FTS5), semantic (vector), and hybrid search. + +mod filters; +mod fts; +mod hybrid; +mod rrf; +mod vector; + +pub use filters::{SearchFilters, apply_filters}; +pub use fts::{search_fts, FtsResult}; +pub use hybrid::{search_hybrid, HybridResult, SearchMode}; +pub use rrf::{rank_rrf, RrfResult}; +pub use vector::{search_vector, VectorResult}; +``` + +--- + +### 3.2 FTS5 Search Function + +**File:** `src/search/fts.rs` + +```rust +use rusqlite::Connection; +use crate::core::error::Result; + +/// FTS search result. +#[derive(Debug, Clone)] +pub struct FtsResult { + pub document_id: i64, + pub rank: f64, // BM25 score (lower = better match) + pub snippet: String, // Context snippet around match +} + +/// Search documents using FTS5. +/// +/// Returns matching document IDs with BM25 rank scores and snippets. +/// Lower rank values indicate better matches. +/// Uses bm25() explicitly (not the `rank` alias) and snippet() for context. +pub fn search_fts( + conn: &Connection, + query: &str, + limit: usize, +) -> Result> { + if query.trim().is_empty() { + return Ok(Vec::new()); + } + + let mut stmt = conn.prepare( + "SELECT rowid, + bm25(documents_fts), + snippet(documents_fts, 1, '', '', '...', 64) + FROM documents_fts + WHERE documents_fts MATCH ? + ORDER BY bm25(documents_fts) + LIMIT ?" + )?; + + let results = stmt + .query_map([query, &limit.to_string()], |row| { + Ok(FtsResult { + document_id: row.get(0)?, + rank: row.get(1)?, + snippet: row.get(2)?, + }) + })? + .collect::, _>>()?; + + Ok(results) +} +``` + +**Acceptance Criteria:** +- [ ] Returns matching document IDs with BM25 rank +- [ ] Porter stemming works (search/searching match) +- [ ] Prefix search works (type-ahead UX) +- [ ] Empty query returns empty results +- [ ] Nonsense query returns empty results + +--- + +### 3.3 Search Filters + +**File:** `src/search/filters.rs` + +```rust +use rusqlite::Connection; +use crate::core::error::Result; +use crate::documents::SourceType; + +/// Search filters applied post-retrieval. +#[derive(Debug, Clone, Default)] +pub struct SearchFilters { + pub source_type: Option, + pub author: Option, + pub project_id: Option, + pub after: Option, // ms epoch + pub labels: Vec, // AND logic + pub path: Option, + pub limit: usize, // Default 20, max 100 +} + +/// Path filter with prefix or exact match. +#[derive(Debug, Clone)] +pub enum PathFilter { + Prefix(String), // Trailing `/` -> LIKE 'path/%' + Exact(String), // No trailing `/` -> = 'path' +} + +impl PathFilter { + pub fn from_str(s: &str) -> Self { + if s.ends_with('/') { + Self::Prefix(s.to_string()) + } else { + Self::Exact(s.to_string()) + } + } +} + +/// Apply filters to document IDs, returning filtered set. +/// +/// IMPORTANT: Preserves ranking order from input document_ids. +/// Filters must not reorder results - maintain the RRF/search ranking. +pub fn apply_filters( + conn: &Connection, + document_ids: &[i64], + filters: &SearchFilters, +) -> Result> { + // Build dynamic WHERE clause based on filters + // Multiple --label flags use AND logic + // Path prefix vs exact match per PathFilter variant + // + // Implementation strategy to preserve ranking order: + // 1. Accept document_ids as ordered list + // 2. Build CTE with position + // 3. JOIN with filters + // 4. ORDER BY original position + // + // Example SQL pattern: + // ```sql + // WITH ranked_docs(doc_id, pos) AS ( + // SELECT column1, ROW_NUMBER() OVER() as pos + // FROM (VALUES (?),(?),(?),...) + // ) + // SELECT d.id + // FROM documents d + // JOIN ranked_docs rd ON d.id = rd.doc_id + // WHERE d.source_type = ? + // AND EXISTS ( + // SELECT 1 FROM document_labels dl + // WHERE dl.document_id = d.id AND dl.label_name = ? + // ) + // ORDER BY rd.pos + // LIMIT ? + // ``` + todo!() +} +``` + +**Supported filters:** +| Filter | SQL Column | Notes | +|--------|-----------|-------| +| `--type` | `source_type` | `issue`, `mr`, `discussion` | +| `--author` | `author_username` | Exact match | +| `--project` | `project_id` | Resolve path to ID | +| `--after` | `created_at` | `>= date` (ms epoch) | +| `--label` | `document_labels` | JOIN, multiple = AND | +| `--path` | `document_paths` | JOIN, trailing `/` = prefix | +| `--limit` | N/A | Default 20, max 100 | + +**Acceptance Criteria:** +- [ ] Each filter correctly restricts results +- [ ] Multiple `--label` flags use AND logic +- [ ] Path prefix vs exact match works correctly +- [ ] Filters compose (all applied together) +- [ ] Ranking order preserved after filtering + +--- + +### 3.4 CLI: `gi search --mode=lexical` + +**File:** `src/cli/commands/search.rs` + +```rust +//! Search command - find documents using lexical, semantic, or hybrid search. + +use console::style; +use serde::Serialize; + +use crate::core::error::Result; +use crate::core::time::ms_to_iso; +use crate::search::{SearchFilters, SearchMode, search_hybrid, HybridResult}; +use crate::Config; + +/// Search result for display. +#[derive(Debug, Serialize)] +pub struct SearchResultDisplay { + pub document_id: i64, + pub source_type: String, + pub title: Option, + pub url: Option, + pub project_path: String, + pub author: Option, + pub created_at: String, // ISO format + pub updated_at: String, // ISO format + pub score: f64, // Normalized 0-1 + pub snippet: String, // Context around match + pub labels: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub explain: Option, +} + +/// Ranking explanation for --explain flag. +#[derive(Debug, Serialize)] +pub struct ExplainData { + pub vector_rank: Option, + pub fts_rank: Option, + pub rrf_score: f64, +} + +/// Search results response. +#[derive(Debug, Serialize)] +pub struct SearchResponse { + pub query: String, + pub mode: String, + pub total_results: usize, + pub results: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub warnings: Vec, +} + +/// Run search command. +pub fn run_search( + config: &Config, + query: &str, + mode: SearchMode, + filters: SearchFilters, + explain: bool, +) -> Result { + // 1. Parse query and filters + // 2. Execute search based on mode + // 3. Apply post-retrieval filters + // 4. Format and return results + todo!() +} + +/// Print human-readable search results. +pub fn print_search_results(response: &SearchResponse, explain: bool) { + println!( + "Found {} results ({} search)\n", + response.total_results, + response.mode + ); + + for (i, result) in response.results.iter().enumerate() { + let type_prefix = match result.source_type.as_str() { + "merge_request" => "MR", + "issue" => "Issue", + "discussion" => "Discussion", + _ => &result.source_type, + }; + + let title = result.title.as_deref().unwrap_or("(untitled)"); + println!( + "[{}] {} - {} ({})", + i + 1, + style(type_prefix).cyan(), + title, + format!("{:.2}", result.score) + ); + + if explain { + if let Some(exp) = &result.explain { + let vec_str = exp.vector_rank.map(|r| format!("#{}", r)).unwrap_or_else(|| "-".into()); + let fts_str = exp.fts_rank.map(|r| format!("#{}", r)).unwrap_or_else(|| "-".into()); + println!( + " Vector: {}, FTS: {}, RRF: {:.4}", + vec_str, fts_str, exp.rrf_score + ); + } + } + + if let Some(author) = &result.author { + println!( + " @{} · {} · {}", + author, &result.created_at[..10], result.project_path + ); + } + + println!(" \"{}...\"", &result.snippet); + + if let Some(url) = &result.url { + println!(" {}", style(url).dim()); + } + println!(); + } +} + +/// Print JSON search results for robot mode. +pub fn print_search_results_json(response: &SearchResponse, elapsed_ms: u64) { + let output = serde_json::json!({ + "ok": true, + "data": response, + "meta": { + "elapsed_ms": elapsed_ms + } + }); + println!("{}", serde_json::to_string_pretty(&output).unwrap()); +} +``` + +**CLI integration in `src/cli/mod.rs`:** +```rust +/// Search subcommand arguments. +#[derive(Args)] +pub struct SearchArgs { + /// Search query + query: String, + + /// Search mode + #[arg(long, default_value = "hybrid")] + mode: String, // "hybrid" | "lexical" | "semantic" + + /// Filter by source type + #[arg(long, value_name = "TYPE")] + r#type: Option, + + /// Filter by author username + #[arg(long)] + author: Option, + + /// Filter by project path + #[arg(long)] + project: Option, + + /// Filter by creation date (after) + #[arg(long)] + after: Option, + + /// Filter by label (can specify multiple) + #[arg(long, action = clap::ArgAction::Append)] + label: Vec, + + /// Filter by file path + #[arg(long)] + path: Option, + + /// Maximum results + #[arg(long, default_value = "20")] + limit: usize, + + /// Show ranking breakdown + #[arg(long)] + explain: bool, +} +``` + +**Acceptance Criteria:** +- [ ] Works without Ollama running +- [ ] All filters functional +- [ ] Human-readable output with snippets +- [ ] JSON output matches schema +- [ ] Empty results show helpful message +- [ ] "No data indexed" message if documents table empty + +--- + +## Phase 4: Embedding Pipeline + +### 4.1 Embedding Module Structure + +**New module:** `src/embedding/` + +``` +src/embedding/ +├── mod.rs # Module exports +├── ollama.rs # Ollama API client +├── pipeline.rs # Batch embedding orchestration +└── change_detector.rs # Detect documents needing re-embedding +``` + +**File:** `src/embedding/mod.rs` + +```rust +//! Embedding generation and storage. +//! +//! Uses Ollama for embedding generation and sqlite-vec for storage. + +mod change_detector; +mod ollama; +mod pipeline; + +pub use change_detector::detect_embedding_changes; +pub use ollama::{OllamaClient, OllamaConfig, check_ollama_health}; +pub use pipeline::{embed_documents, EmbedResult}; +``` + +--- + +### 4.2 Ollama Client + +**File:** `src/embedding/ollama.rs` + +```rust +use reqwest::Client; +use serde::{Deserialize, Serialize}; + +use crate::core::error::{GiError, Result}; + +/// Ollama client configuration. +#[derive(Debug, Clone)] +pub struct OllamaConfig { + pub base_url: String, // "http://localhost:11434" + pub model: String, // "nomic-embed-text" + pub timeout_secs: u64, // Request timeout +} + +impl Default for OllamaConfig { + fn default() -> Self { + Self { + base_url: "http://localhost:11434".into(), + model: "nomic-embed-text".into(), + timeout_secs: 60, + } + } +} + +/// Ollama API client. +pub struct OllamaClient { + client: Client, + config: OllamaConfig, +} + +/// Batch embed request. +#[derive(Serialize)] +struct EmbedRequest { + model: String, + input: Vec, +} + +/// Batch embed response. +#[derive(Deserialize)] +struct EmbedResponse { + model: String, + embeddings: Vec>, +} + +/// Model info from /api/tags. +#[derive(Deserialize)] +struct TagsResponse { + models: Vec, +} + +#[derive(Deserialize)] +struct ModelInfo { + name: String, +} + +impl OllamaClient { + pub fn new(config: OllamaConfig) -> Self { + let client = Client::builder() + .timeout(std::time::Duration::from_secs(config.timeout_secs)) + .build() + .expect("Failed to create HTTP client"); + + Self { client, config } + } + + /// Check if Ollama is available and model is loaded. + pub async fn health_check(&self) -> Result<()> { + let url = format!("{}/api/tags", self.config.base_url); + + let response = self.client.get(&url).send().await.map_err(|e| { + GiError::OllamaUnavailable { + base_url: self.config.base_url.clone(), + source: Some(e), + } + })?; + + let tags: TagsResponse = response.json().await?; + + let model_available = tags.models.iter().any(|m| m.name.starts_with(&self.config.model)); + + if !model_available { + return Err(GiError::OllamaModelNotFound { + model: self.config.model.clone(), + }); + } + + Ok(()) + } + + /// Generate embeddings for a batch of texts. + /// + /// Returns 768-dimensional vectors for each input text. + pub async fn embed_batch(&self, texts: Vec) -> Result>> { + let url = format!("{}/api/embed", self.config.base_url); + + let request = EmbedRequest { + model: self.config.model.clone(), + input: texts, + }; + + let response = self.client + .post(&url) + .json(&request) + .send() + .await + .map_err(|e| GiError::OllamaUnavailable { + base_url: self.config.base_url.clone(), + source: Some(e), + })?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(GiError::EmbeddingFailed { + document_id: 0, // Batch failure + reason: format!("HTTP {}: {}", status, body), + }); + } + + let embed_response: EmbedResponse = response.json().await?; + Ok(embed_response.embeddings) + } +} + +/// Quick health check without full client. +pub async fn check_ollama_health(base_url: &str) -> bool { + let client = Client::new(); + client + .get(format!("{}/api/tags", base_url)) + .send() + .await + .is_ok() +} +``` + +**Endpoints:** +| Endpoint | Purpose | +|----------|---------| +| `GET /api/tags` | Health check, verify model available | +| `POST /api/embed` | Batch embedding (preferred) | + +**Acceptance Criteria:** +- [ ] Health check detects Ollama availability +- [ ] Batch embedding works with up to 32 texts +- [ ] Clear error messages for common failures + +--- + +### 4.3 Error Handling Extensions + +**File:** `src/core/error.rs` (extend existing) + +Add to `ErrorCode`: +```rust +pub enum ErrorCode { + // ... existing variants ... + InvalidEnumValue, + OllamaUnavailable, + OllamaModelNotFound, + EmbeddingFailed, +} + +impl ErrorCode { + pub fn exit_code(&self) -> i32 { + match self { + // ... existing mappings ... + Self::InvalidEnumValue => 13, + Self::OllamaUnavailable => 14, + Self::OllamaModelNotFound => 15, + Self::EmbeddingFailed => 16, + } + } +} + +impl std::fmt::Display for ErrorCode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let code = match self { + // ... existing mappings ... + Self::InvalidEnumValue => "INVALID_ENUM_VALUE", + Self::OllamaUnavailable => "OLLAMA_UNAVAILABLE", + Self::OllamaModelNotFound => "OLLAMA_MODEL_NOT_FOUND", + Self::EmbeddingFailed => "EMBEDDING_FAILED", + }; + write!(f, "{code}") + } +} +``` + +Add to `GiError`: +```rust +pub enum GiError { + // ... existing variants ... + + #[error("Cannot connect to Ollama at {base_url}. Is it running?")] + OllamaUnavailable { + base_url: String, + #[source] + source: Option, + }, + + #[error("Ollama model '{model}' not found. Run: ollama pull {model}")] + OllamaModelNotFound { model: String }, + + #[error("Embedding failed for document {document_id}: {reason}")] + EmbeddingFailed { document_id: i64, reason: String }, +} + +impl GiError { + pub fn code(&self) -> ErrorCode { + match self { + // ... existing mappings ... + Self::OllamaUnavailable { .. } => ErrorCode::OllamaUnavailable, + Self::OllamaModelNotFound { .. } => ErrorCode::OllamaModelNotFound, + Self::EmbeddingFailed { .. } => ErrorCode::EmbeddingFailed, + } + } + + pub fn suggestion(&self) -> Option<&'static str> { + match self { + // ... existing mappings ... + Self::OllamaUnavailable { .. } => Some("Start Ollama: ollama serve"), + Self::OllamaModelNotFound { model } => Some("Pull the model: ollama pull nomic-embed-text"), + Self::EmbeddingFailed { .. } => Some("Check Ollama logs or retry with 'gi embed --retry-failed'"), + } + } +} +``` + +--- + +### 4.4 Embedding Pipeline + +**File:** `src/embedding/pipeline.rs` + +```rust +use indicatif::{ProgressBar, ProgressStyle}; +use rusqlite::Connection; + +use crate::core::error::Result; +use crate::embedding::OllamaClient; + +/// Batch size for embedding requests. +const BATCH_SIZE: usize = 32; + +/// SQLite page size for paging through pending documents. +const DB_PAGE_SIZE: usize = 500; + +/// Which documents to embed. +#[derive(Debug, Clone, Copy)] +pub enum EmbedSelection { + /// New or changed documents (default). + Pending, + /// Only previously failed documents. + RetryFailed, +} + +/// Result of embedding run. +#[derive(Debug, Default)] +pub struct EmbedResult { + pub embedded: usize, + pub failed: usize, + pub skipped: usize, +} + +/// Embed documents that need embedding. +/// +/// Process: +/// 1. Page through documents needing embedding (DB_PAGE_SIZE at a time) +/// 2. Batch documents (32 per Ollama request) +/// 3. Fire concurrent HTTP requests via FuturesUnordered (capped by concurrency) +/// 4. Collect results and write to SQLite sequentially (rusqlite is !Send) +/// 5. On failure: record error with actual content_hash, continue with next batch +/// +/// Architecture note: rusqlite::Connection is !Send, so all DB reads/writes +/// happen on the main thread. Only HTTP calls are concurrent. +pub async fn embed_documents( + conn: &Connection, + client: &OllamaClient, + concurrency: usize, + progress_callback: Option>, +) -> Result { + use futures::stream::{FuturesUnordered, StreamExt}; + + let mut result = EmbedResult::default(); + let mut total_pending = count_pending_documents(conn)?; + + if total_pending == 0 { + return Ok(result); + } + + // Page through pending documents to avoid loading all into memory + loop { + let pending = find_pending_documents(conn, DB_PAGE_SIZE)?; + if pending.is_empty() { + break; + } + + // Launch concurrent HTTP requests, collect results + let mut futures = FuturesUnordered::new(); + + for batch in pending.chunks(BATCH_SIZE) { + let texts: Vec = batch.iter().map(|d| d.content.clone()).collect(); + let batch_meta: Vec<(i64, String)> = batch + .iter() + .map(|d| (d.id, d.content_hash.clone())) + .collect(); + + futures.push(async move { + let embed_result = client.embed_batch(texts).await; + (batch_meta, embed_result) + }); + + // Cap in-flight requests + if futures.len() >= concurrency { + if let Some((meta, res)) = futures.next().await { + collect_writes(conn, &meta, res, &mut result)?; + } + } + } + + // Drain remaining futures + while let Some((meta, res)) = futures.next().await { + collect_writes(conn, &meta, res, &mut result)?; + } + + if let Some(ref cb) = progress_callback { + cb(result.embedded + result.failed, total_pending); + } + } + + Ok(result) +} + +/// Collect embedding results and write to DB (sequential, on main thread). +fn collect_writes( + conn: &Connection, + batch_meta: &[(i64, String)], + embed_result: Result>>, + result: &mut EmbedResult, +) -> Result<()> { + let tx = conn.transaction()?; + match embed_result { + Ok(embeddings) => { + for ((doc_id, hash), embedding) in batch_meta.iter().zip(embeddings.iter()) { + store_embedding(&tx, *doc_id, embedding, hash)?; + result.embedded += 1; + } + } + Err(e) => { + for (doc_id, hash) in batch_meta { + record_embedding_error(&tx, *doc_id, hash, &e.to_string())?; + result.failed += 1; + } + } + } + tx.commit()?; + Ok(()) +} + +struct PendingDocument { + id: i64, + content: String, + content_hash: String, +} + +/// Count total pending documents (for progress reporting). +fn count_pending_documents(conn: &Connection) -> Result { + let count: usize = conn.query_row( + "SELECT COUNT(*) + FROM documents d + LEFT JOIN embedding_metadata em ON d.id = em.document_id + WHERE em.document_id IS NULL + OR em.content_hash != d.content_hash", + [], + |row| row.get(0), + )?; + Ok(count) +} + +fn find_pending_documents( + conn: &Connection, + limit: usize, + selection: EmbedSelection, +) -> Result> { + let sql = match selection { + EmbedSelection::Pending => + "SELECT d.id, d.content_text, d.content_hash + FROM documents d + LEFT JOIN embedding_metadata em ON d.id = em.document_id + WHERE em.document_id IS NULL + OR em.content_hash != d.content_hash + LIMIT ?", + EmbedSelection::RetryFailed => + "SELECT d.id, d.content_text, d.content_hash + FROM documents d + JOIN embedding_metadata em ON d.id = em.document_id + WHERE em.last_error IS NOT NULL + LIMIT ?", + }; + let mut stmt = conn.prepare(sql)?; + + let docs = stmt + .query_map([limit], |row| { + Ok(PendingDocument { + id: row.get(0)?, + content: row.get(1)?, + content_hash: row.get(2)?, + }) + })? + .collect::, _>>()?; + + Ok(docs) +} + +fn store_embedding( + conn: &Connection, + document_id: i64, + embedding: &[f32], + content_hash: &str, +) -> Result<()> { + // Convert embedding to bytes for sqlite-vec + // sqlite-vec expects raw little-endian bytes, not the array directly + let embedding_bytes: Vec = embedding + .iter() + .flat_map(|f| f.to_le_bytes()) + .collect(); + + // Store in sqlite-vec (rowid = document_id) + conn.execute( + "INSERT OR REPLACE INTO embeddings(rowid, embedding) VALUES (?, ?)", + rusqlite::params![document_id, embedding_bytes], + )?; + + // Update metadata + let now = crate::core::time::now_ms(); + conn.execute( + "INSERT OR REPLACE INTO embedding_metadata + (document_id, model, dims, content_hash, created_at, last_error, attempt_count, last_attempt_at) + VALUES (?, 'nomic-embed-text', 768, ?, ?, NULL, 0, ?)", + rusqlite::params![document_id, content_hash, now, now], + )?; + + Ok(()) +} + +fn record_embedding_error( + conn: &Connection, + document_id: i64, + content_hash: &str, + error: &str, +) -> Result<()> { + let now = crate::core::time::now_ms(); + conn.execute( + "INSERT INTO embedding_metadata + (document_id, model, dims, content_hash, created_at, last_error, attempt_count, last_attempt_at) + VALUES (?, 'nomic-embed-text', 768, ?, ?, ?, 1, ?) + ON CONFLICT(document_id) DO UPDATE SET + last_error = excluded.last_error, + attempt_count = attempt_count + 1, + last_attempt_at = excluded.last_attempt_at", + rusqlite::params![document_id, content_hash, now, error, now], + )?; + + Ok(()) +} +``` + +**Acceptance Criteria:** +- [ ] New documents get embedded +- [ ] Changed documents (hash mismatch) get re-embedded +- [ ] Unchanged documents skipped +- [ ] Failures recorded in `embedding_metadata.last_error` +- [ ] Failures record actual content_hash (not empty string) +- [ ] Writes batched in transactions for performance +- [ ] Concurrency parameter respected +- [ ] Progress reported during embedding + +--- + +### 4.5 CLI: `gi embed` + +**File:** `src/cli/commands/embed.rs` + +```rust +//! Embed command - generate embeddings for documents. + +use indicatif::{ProgressBar, ProgressStyle}; +use serde::Serialize; + +use crate::core::error::Result; +use crate::embedding::{embed_documents, EmbedResult, OllamaClient, OllamaConfig}; +use crate::Config; + +/// Run embedding command. +pub async fn run_embed( + config: &Config, + retry_failed: bool, +) -> Result { + let ollama_config = OllamaConfig { + base_url: config.embedding.base_url.clone(), + model: config.embedding.model.clone(), + timeout_secs: 120, + }; + + let client = OllamaClient::new(ollama_config); + + // Health check + client.health_check().await?; + + // Run embedding + let result = embed_documents( + &conn, + &client, + config.embedding.concurrency as usize, + None, + ).await?; + + Ok(result) +} + +/// Print human-readable output. +pub fn print_embed(result: &EmbedResult, elapsed_secs: u64) { + println!("Embedding complete:"); + println!(" Embedded: {:>6} documents", result.embedded); + println!(" Failed: {:>6} documents", result.failed); + println!(" Skipped: {:>6} documents", result.skipped); + println!(" Elapsed: {}m {}s", elapsed_secs / 60, elapsed_secs % 60); +} + +/// Print JSON output for robot mode. +pub fn print_embed_json(result: &EmbedResult, elapsed_ms: u64) { + let output = serde_json::json!({ + "ok": true, + "data": { + "embedded": result.embedded, + "failed": result.failed, + "skipped": result.skipped + }, + "meta": { + "elapsed_ms": elapsed_ms + } + }); + println!("{}", serde_json::to_string_pretty(&output).unwrap()); +} +``` + +**CLI integration:** +```rust +/// Embed subcommand arguments. +#[derive(Args)] +pub struct EmbedArgs { + /// Retry only previously failed documents + #[arg(long)] + retry_failed: bool, +} +``` + +**Acceptance Criteria:** +- [ ] Embeds documents without embeddings +- [ ] Re-embeds documents with changed hash +- [ ] `--retry-failed` only processes failed documents +- [ ] Progress bar with count +- [ ] Clear error if Ollama unavailable + +--- + +### 4.6 CLI: `gi stats` + +**File:** `src/cli/commands/stats.rs` + +```rust +//! Stats command - display document and embedding statistics. + +use rusqlite::Connection; +use serde::Serialize; + +use crate::core::error::Result; +use crate::Config; + +/// Document statistics. +#[derive(Debug, Serialize)] +pub struct Stats { + pub documents: DocumentStats, + pub embeddings: EmbeddingStats, + pub fts: FtsStats, +} + +#[derive(Debug, Serialize)] +pub struct DocumentStats { + pub issues: usize, + pub mrs: usize, + pub discussions: usize, + pub total: usize, + pub truncated: usize, +} + +#[derive(Debug, Serialize)] +pub struct EmbeddingStats { + pub embedded: usize, + pub pending: usize, + pub failed: usize, + pub coverage_pct: f64, +} + +#[derive(Debug, Serialize)] +pub struct FtsStats { + pub indexed: usize, +} + +/// Integrity check result. +#[derive(Debug, Serialize)] +pub struct IntegrityCheck { + pub documents_count: usize, + pub fts_count: usize, + pub embeddings_count: usize, + pub metadata_count: usize, + pub orphaned_embeddings: usize, + pub hash_mismatches: usize, + pub ok: bool, +} + +/// Run stats command. +pub fn run_stats(config: &Config) -> Result { + // Query counts from database + todo!() +} + +/// Run integrity check (--check flag). +/// +/// Verifies: +/// - documents count == documents_fts count +/// - embeddings.rowid all exist in documents.id +/// - embedding_metadata.content_hash == documents.content_hash +pub fn run_integrity_check(config: &Config) -> Result { + // 1. Count documents + // 2. Count FTS entries + // 3. Find orphaned embeddings (no matching document) + // 4. Find hash mismatches between embedding_metadata and documents + // 5. Return check results + todo!() +} + +/// Print human-readable stats. +pub fn print_stats(stats: &Stats) { + println!("Document Statistics:"); + println!(" Issues: {:>6} documents", stats.documents.issues); + println!(" MRs: {:>6} documents", stats.documents.mrs); + println!(" Discussions: {:>6} documents", stats.documents.discussions); + println!(" Total: {:>6} documents", stats.documents.total); + if stats.documents.truncated > 0 { + println!(" Truncated: {:>6}", stats.documents.truncated); + } + println!(); + println!("Embedding Coverage:"); + println!(" Embedded: {:>6} ({:.1}%)", stats.embeddings.embedded, stats.embeddings.coverage_pct); + println!(" Pending: {:>6}", stats.embeddings.pending); + println!(" Failed: {:>6}", stats.embeddings.failed); + println!(); + println!("FTS Index:"); + println!(" Indexed: {:>6} documents", stats.fts.indexed); +} + +/// Print integrity check results. +pub fn print_integrity_check(check: &IntegrityCheck) { + println!("Integrity Check:"); + println!(" Documents: {:>6}", check.documents_count); + println!(" FTS entries: {:>6}", check.fts_count); + println!(" Embeddings: {:>6}", check.embeddings_count); + println!(" Metadata: {:>6}", check.metadata_count); + if check.orphaned_embeddings > 0 { + println!(" Orphaned embeddings: {:>6} (WARN)", check.orphaned_embeddings); + } + if check.hash_mismatches > 0 { + println!(" Hash mismatches: {:>6} (WARN)", check.hash_mismatches); + } + println!(); + println!(" Status: {}", if check.ok { "OK" } else { "ISSUES FOUND" }); +} + +/// Print JSON stats for robot mode. +pub fn print_stats_json(stats: &Stats) { + let output = serde_json::json!({ + "ok": true, + "data": stats + }); + println!("{}", serde_json::to_string_pretty(&output).unwrap()); +} +``` + +**CLI integration:** +```rust +/// Stats subcommand arguments. +#[derive(Args)] +pub struct StatsArgs { + /// Run integrity checks (document/FTS/embedding consistency) + #[arg(long)] + check: bool, +} +``` + +**Acceptance Criteria:** +- [ ] Shows document counts by type +- [ ] Shows embedding coverage +- [ ] Shows FTS index count +- [ ] Identifies truncated documents +- [ ] `--check` verifies document/FTS/embedding consistency +- [ ] JSON output for scripting + +--- + +## Phase 5: Hybrid Search + +### 5.1 Vector Search Function + +**File:** `src/search/vector.rs` + +```rust +use rusqlite::Connection; +use crate::core::error::Result; + +/// Vector search result. +#[derive(Debug, Clone)] +pub struct VectorResult { + pub document_id: i64, + pub distance: f64, // Lower = more similar +} + +/// Search documents using vector similarity. +/// +/// Uses sqlite-vec for efficient vector search. +/// Returns document IDs sorted by distance (lower = better match). +/// +/// IMPORTANT: sqlite-vec KNN queries require: +/// - k parameter for number of results +/// - embedding passed as raw little-endian bytes +pub fn search_vector( + conn: &Connection, + query_embedding: &[f32], + limit: usize, +) -> Result> { + // Convert embedding to bytes for sqlite-vec + let embedding_bytes: Vec = query_embedding + .iter() + .flat_map(|f| f.to_le_bytes()) + .collect(); + + let mut stmt = conn.prepare( + "SELECT rowid, distance + FROM embeddings + WHERE embedding MATCH ? AND k = ? + ORDER BY distance + LIMIT ?" + )?; + + let results = stmt + .query_map(rusqlite::params![embedding_bytes, limit, limit], |row| { + Ok(VectorResult { + document_id: row.get(0)?, + distance: row.get(1)?, + }) + })? + .collect::, _>>()?; + + Ok(results) +} +``` + +**Acceptance Criteria:** +- [ ] Returns document IDs with distances +- [ ] Lower distance = better match +- [ ] Works with 768-dim vectors +- [ ] Uses k parameter for KNN query +- [ ] Embedding passed as bytes + +--- + +### 5.2 RRF Ranking + +**File:** `src/search/rrf.rs` + +```rust +use std::collections::HashMap; + +/// RRF ranking constant. +const RRF_K: f64 = 60.0; + +/// RRF-ranked result. +#[derive(Debug, Clone)] +pub struct RrfResult { + pub document_id: i64, + pub rrf_score: f64, // Raw RRF score + pub normalized_score: f64, // Normalized to 0-1 + pub vector_rank: Option, + pub fts_rank: Option, +} + +/// Rank documents using Reciprocal Rank Fusion. +/// +/// Algorithm: +/// RRF_score(d) = Σ 1 / (k + rank_i(d)) +/// +/// Where: +/// - k = 60 (tunable constant) +/// - rank_i(d) = rank of document d in retriever i (1-indexed) +/// - Sum over all retrievers where document appears +pub fn rank_rrf( + vector_results: &[(i64, f64)], // (doc_id, distance) + fts_results: &[(i64, f64)], // (doc_id, bm25_score) +) -> Vec { + let mut scores: HashMap, Option)> = HashMap::new(); + + // Add vector results (1-indexed ranks) + for (rank, (doc_id, _)) in vector_results.iter().enumerate() { + let rrf_contribution = 1.0 / (RRF_K + (rank + 1) as f64); + let entry = scores.entry(*doc_id).or_insert((0.0, None, None)); + entry.0 += rrf_contribution; + entry.1 = Some(rank + 1); + } + + // Add FTS results (1-indexed ranks) + for (rank, (doc_id, _)) in fts_results.iter().enumerate() { + let rrf_contribution = 1.0 / (RRF_K + (rank + 1) as f64); + let entry = scores.entry(*doc_id).or_insert((0.0, None, None)); + entry.0 += rrf_contribution; + entry.2 = Some(rank + 1); + } + + // Convert to results and sort by RRF score descending + let mut results: Vec<_> = scores + .into_iter() + .map(|(doc_id, (rrf_score, vector_rank, fts_rank))| { + RrfResult { + document_id: doc_id, + rrf_score, + normalized_score: 0.0, // Will be set below + vector_rank, + fts_rank, + } + }) + .collect(); + + results.sort_by(|a, b| b.rrf_score.partial_cmp(&a.rrf_score).unwrap()); + + // Normalize scores to 0-1 + if let Some(max_score) = results.first().map(|r| r.rrf_score) { + for result in &mut results { + result.normalized_score = result.rrf_score / max_score; + } + } + + results +} +``` + +**Acceptance Criteria:** +- [ ] Documents in both lists score higher +- [ ] Documents in one list still included +- [ ] Normalized score = rrfScore / max(rrfScore) +- [ ] Raw RRF score available in `--explain` output + +--- + +### 5.3 Adaptive Recall + +**File:** `src/search/hybrid.rs` + +```rust +use rusqlite::Connection; + +use crate::core::error::Result; +use crate::embedding::OllamaClient; +use crate::search::{SearchFilters, search_fts, search_vector, rank_rrf, RrfResult}; + +/// Base recall for unfiltered search. +const BASE_RECALL: usize = 50; + +/// Expanded recall when filters are applied. +const FILTERED_RECALL: usize = 200; + +/// Search mode. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SearchMode { + Hybrid, // Vector + FTS with RRF + Lexical, // FTS only + Semantic, // Vector only +} + +impl SearchMode { + pub fn from_str(s: &str) -> Option { + match s.to_lowercase().as_str() { + "hybrid" => Some(Self::Hybrid), + "lexical" | "fts" => Some(Self::Lexical), + "semantic" | "vector" => Some(Self::Semantic), + _ => None, + } + } + + pub fn as_str(&self) -> &'static str { + match self { + Self::Hybrid => "hybrid", + Self::Lexical => "lexical", + Self::Semantic => "semantic", + } + } +} + +/// Hybrid search result. +#[derive(Debug)] +pub struct HybridResult { + pub document_id: i64, + pub score: f64, + pub vector_rank: Option, + pub fts_rank: Option, + pub rrf_score: f64, +} + +/// Execute hybrid search. +/// +/// Adaptive recall: expands topK when filters are applied to prevent +/// "no results" when relevant docs exist but would be filtered out. +/// +/// IMPORTANT: All modes use RRF consistently to ensure rank fields +/// are populated correctly for --explain output. +pub async fn search_hybrid( + conn: &Connection, + client: Option<&OllamaClient>, + query: &str, + mode: SearchMode, + filters: &SearchFilters, +) -> Result<(Vec, Vec)> { + let mut warnings: Vec = Vec::new(); + // Determine recall based on filters + let top_k = if filters.has_any_filter() { + FILTERED_RECALL + } else { + BASE_RECALL + }; + + match mode { + SearchMode::Lexical => { + // FTS only - use RRF with empty vector results for consistent ranking + let fts_results = search_fts(conn, query, top_k)?; + + let fts_tuples: Vec<_> = fts_results.iter().map(|r| (r.document_id, r.rank)).collect(); + let ranked = rank_rrf(&[], &fts_tuples); + + let results = ranked + .into_iter() + .map(|r| HybridResult { + document_id: r.document_id, + score: r.normalized_score, + vector_rank: r.vector_rank, + fts_rank: r.fts_rank, + rrf_score: r.rrf_score, + }) + .collect(); + Ok((results, warnings)) + } + SearchMode::Semantic => { + // Vector only - requires client + let client = client.ok_or_else(|| crate::core::error::GiError::OllamaUnavailable { + base_url: "unknown".into(), + source: None, + })?; + + let query_embedding = client.embed_batch(vec![query.to_string()]).await?; + let embedding = query_embedding.into_iter().next().unwrap(); + + let vec_results = search_vector(conn, &embedding, top_k)?; + + // Use RRF with empty FTS results for consistent ranking + let vec_tuples: Vec<_> = vec_results.iter().map(|r| (r.document_id, r.distance)).collect(); + let ranked = rank_rrf(&vec_tuples, &[]); + + let results = ranked + .into_iter() + .map(|r| HybridResult { + document_id: r.document_id, + score: r.normalized_score, + vector_rank: r.vector_rank, + fts_rank: r.fts_rank, + rrf_score: r.rrf_score, + }) + .collect(); + Ok((results, warnings)) + } + SearchMode::Hybrid => { + // Both retrievers with RRF fusion + let fts_results = search_fts(conn, query, top_k)?; + + let vec_results = if let Some(client) = client { + let query_embedding = client.embed_batch(vec![query.to_string()]).await?; + let embedding = query_embedding.into_iter().next().unwrap(); + search_vector(conn, &embedding, top_k)? + } else { + // Graceful degradation: use FTS only + warnings.push("Embedding service unavailable, using lexical search only".into()); + Vec::new() + }; + + // RRF fusion + let vec_tuples: Vec<_> = vec_results.iter().map(|r| (r.document_id, r.distance)).collect(); + let fts_tuples: Vec<_> = fts_results.iter().map(|r| (r.document_id, r.rank)).collect(); + + let ranked = rank_rrf(&vec_tuples, &fts_tuples); + + let results = ranked + .into_iter() + .map(|r| HybridResult { + document_id: r.document_id, + score: r.normalized_score, + vector_rank: r.vector_rank, + fts_rank: r.fts_rank, + rrf_score: r.rrf_score, + }) + .collect(); + Ok((results, warnings)) + } + } +} +``` + +**Acceptance Criteria:** +- [ ] Unfiltered search uses topK=50 +- [ ] Any filter triggers topK=200 +- [ ] Final results still limited by `--limit` + +--- + +### 5.4 Graceful Degradation + +When Ollama unavailable during hybrid/semantic search: +1. Log warning: "Embedding service unavailable, using lexical search only" +2. Fall back to FTS-only search +3. Include warning in response + +**Acceptance Criteria:** +- [ ] Default mode is hybrid +- [ ] `--mode=lexical` works without Ollama +- [ ] `--mode=semantic` requires Ollama +- [ ] Graceful degradation when Ollama down +- [ ] `--explain` shows rank breakdown +- [ ] All Phase 3 filters work in hybrid mode + +--- + +## Phase 6: Sync Orchestration + +### 6.1 Dirty Source Tracking + +**File:** `src/ingestion/dirty_tracker.rs` + +```rust +use rusqlite::Connection; +use crate::core::error::Result; +use crate::core::time::now_ms; +use crate::documents::SourceType; + +/// Maximum dirty sources to process per sync run. +const MAX_DIRTY_SOURCES_PER_RUN: usize = 500; + +/// Mark a source as dirty (needs document regeneration). +/// +/// Called during entity upsert operations. +/// Uses INSERT OR IGNORE to avoid duplicates. +pub fn mark_dirty( + conn: &Connection, + source_type: SourceType, + source_id: i64, +) -> Result<()> { + conn.execute( + "INSERT OR IGNORE INTO dirty_sources (source_type, source_id, queued_at) + VALUES (?, ?, ?)", + rusqlite::params![source_type.as_str(), source_id, now_ms()], + )?; + Ok(()) +} + +/// Get dirty sources ordered by queue time (bounded). +/// +/// Limits results to prevent unbounded processing during large syncs. +pub fn get_dirty_sources(conn: &Connection) -> Result> { + let mut stmt = conn.prepare( + "SELECT source_type, source_id FROM dirty_sources ORDER BY queued_at LIMIT ?" + )?; + + let results = stmt + .query_map([MAX_DIRTY_SOURCES_PER_RUN], |row| { + let type_str: String = row.get(0)?; + let source_type = match type_str.as_str() { + "issue" => SourceType::Issue, + "merge_request" => SourceType::MergeRequest, + "discussion" => SourceType::Discussion, + other => return Err(rusqlite::Error::FromSqlConversionFailure( + 0, + rusqlite::types::Type::Text, + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("invalid source_type: {other}"), + )), + )), + }; + Ok((source_type, row.get(1)?)) + })? + .collect::, _>>()?; + + Ok(results) +} + +/// Clear dirty source after processing. +pub fn clear_dirty( + conn: &Connection, + source_type: SourceType, + source_id: i64, +) -> Result<()> { + conn.execute( + "DELETE FROM dirty_sources WHERE source_type = ? AND source_id = ?", + rusqlite::params![source_type.as_str(), source_id], + )?; + Ok(()) +} +``` + +**Acceptance Criteria:** +- [ ] Upserted entities added to dirty_sources +- [ ] Duplicates ignored +- [ ] Queue cleared after document regeneration +- [ ] Processing bounded per run (max 500) + +--- + +### 6.2 Pending Discussion Queue + +**File:** `src/ingestion/discussion_queue.rs` + +```rust +use rusqlite::Connection; +use crate::core::error::Result; +use crate::core::time::now_ms; + +/// Noteable type for discussion fetching. +#[derive(Debug, Clone, Copy)] +pub enum NoteableType { + Issue, + MergeRequest, +} + +impl NoteableType { + pub fn as_str(&self) -> &'static str { + match self { + Self::Issue => "Issue", + Self::MergeRequest => "MergeRequest", + } + } +} + +/// Pending discussion fetch entry. +pub struct PendingFetch { + pub project_id: i64, + pub noteable_type: NoteableType, + pub noteable_iid: i64, + pub attempt_count: i64, +} + +/// Queue a discussion fetch for an entity. +pub fn queue_discussion_fetch( + conn: &Connection, + project_id: i64, + noteable_type: NoteableType, + noteable_iid: i64, +) -> Result<()> { + conn.execute( + "INSERT OR REPLACE INTO pending_discussion_fetches + (project_id, noteable_type, noteable_iid, queued_at, attempt_count, last_attempt_at, last_error) + VALUES (?, ?, ?, ?, 0, NULL, NULL)", + rusqlite::params![project_id, noteable_type.as_str(), noteable_iid, now_ms()], + )?; + Ok(()) +} + +/// Get pending fetches with exponential backoff. +/// +/// Only returns items that have waited long enough based on attempt_count. +/// Backoff formula: min_wait_ms = 1000 * 2^attempt_count (capped at 1 hour) +/// +/// Limited to `max_items` to bound API calls per sync run. +pub fn get_pending_fetches(conn: &Connection, max_items: usize) -> Result> { + let now = now_ms(); + + let mut stmt = conn.prepare( + "SELECT project_id, noteable_type, noteable_iid, attempt_count + FROM pending_discussion_fetches + WHERE last_attempt_at IS NULL + OR (? - last_attempt_at) > MIN(3600000, 1000 * (1 << attempt_count)) + ORDER BY attempt_count ASC, queued_at ASC + LIMIT ?" + )?; + + let results = stmt + .query_map(rusqlite::params![now, max_items], |row| { + let type_str: String = row.get(1)?; + let noteable_type = if type_str == "Issue" { + NoteableType::Issue + } else { + NoteableType::MergeRequest + }; + Ok(PendingFetch { + project_id: row.get(0)?, + noteable_type, + noteable_iid: row.get(2)?, + attempt_count: row.get(3)?, + }) + })? + .collect::, _>>()?; + + Ok(results) +} + +/// Mark fetch as successful and remove from queue. +pub fn complete_fetch( + conn: &Connection, + project_id: i64, + noteable_type: NoteableType, + noteable_iid: i64, +) -> Result<()> { + conn.execute( + "DELETE FROM pending_discussion_fetches + WHERE project_id = ? AND noteable_type = ? AND noteable_iid = ?", + rusqlite::params![project_id, noteable_type.as_str(), noteable_iid], + )?; + Ok(()) +} + +/// Record fetch failure. +pub fn record_fetch_error( + conn: &Connection, + project_id: i64, + noteable_type: NoteableType, + noteable_iid: i64, + error: &str, +) -> Result<()> { + conn.execute( + "UPDATE pending_discussion_fetches + SET attempt_count = attempt_count + 1, + last_attempt_at = ?, + last_error = ? + WHERE project_id = ? AND noteable_type = ? AND noteable_iid = ?", + rusqlite::params![now_ms(), error, project_id, noteable_type.as_str(), noteable_iid], + )?; + Ok(()) +} +``` + +**Acceptance Criteria:** +- [ ] Updated entities queued for discussion fetch +- [ ] Success removes from queue +- [ ] Failure increments attempt_count +- [ ] Processing bounded per run (max 100) +- [ ] Exponential backoff respects attempt_count + +--- + +### 6.3 Document Regenerator + +**File:** `src/documents/regenerator.rs` + +```rust +use rusqlite::Connection; + +use crate::core::error::Result; +use crate::documents::{ + extract_issue_document, extract_mr_document, extract_discussion_document, + DocumentData, SourceType, +}; +use crate::ingestion::dirty_tracker::{get_dirty_sources, clear_dirty}; + +/// Result of regeneration run. +#[derive(Debug, Default)] +pub struct RegenerateResult { + pub regenerated: usize, + pub unchanged: usize, + pub errored: usize, +} + +/// Regenerate documents from dirty queue. +/// +/// Process: +/// 1. Query dirty_sources ordered by queued_at +/// 2. For each: regenerate document, compute new hash +/// 3. ALWAYS upsert document (labels/paths may change even if content_hash unchanged) +/// 4. Track whether content_hash changed (for stats) +/// 5. Delete from dirty_sources (or record error on failure) +pub fn regenerate_dirty_documents(conn: &Connection) -> Result { + let dirty = get_dirty_sources(conn)?; + let mut result = RegenerateResult::default(); + + for (source_type, source_id) in &dirty { + match regenerate_one(conn, *source_type, *source_id) { + Ok(changed) => { + if changed { + result.regenerated += 1; + } else { + result.unchanged += 1; + } + clear_dirty(conn, *source_type, *source_id)?; + } + Err(e) => { + // Fail-soft: record error but continue processing remaining items + record_dirty_error(conn, *source_type, *source_id, &e.to_string())?; + result.errored += 1; + } + } + } + + Ok(result) +} + +/// Regenerate a single document. Returns true if content_hash changed. +/// +/// If the source entity has been deleted, the corresponding document +/// is also deleted (cascade cleans up labels, paths, embeddings). +fn regenerate_one( + conn: &Connection, + source_type: SourceType, + source_id: i64, +) -> Result { + // Extractors return Option: None means source entity was deleted + let doc = match source_type { + SourceType::Issue => extract_issue_document(conn, source_id)?, + SourceType::MergeRequest => extract_mr_document(conn, source_id)?, + SourceType::Discussion => extract_discussion_document(conn, source_id)?, + }; + + let Some(doc) = doc else { + // Source was deleted — remove the document (cascade handles FTS/embeddings) + delete_document(conn, source_type, source_id)?; + return Ok(true); + }; + + let existing_hash = get_existing_hash(conn, source_type, source_id)?; + let changed = existing_hash.as_ref() != Some(&doc.content_hash); + + // Always upsert: labels/paths can change independently of content_hash + upsert_document(conn, &doc)?; + + Ok(changed) +} + +/// Delete a document by source identity (cascade handles FTS trigger, labels, paths, embeddings). +fn delete_document( + conn: &Connection, + source_type: SourceType, + source_id: i64, +) -> Result<()> { + conn.execute( + "DELETE FROM documents WHERE source_type = ? AND source_id = ?", + rusqlite::params![source_type.as_str(), source_id], + )?; + Ok(()) +} + +/// Record a regeneration error on a dirty source for retry. +fn record_dirty_error( + conn: &Connection, + source_type: SourceType, + source_id: i64, + error: &str, +) -> Result<()> { + conn.execute( + "UPDATE dirty_sources + SET attempt_count = attempt_count + 1, + last_attempt_at = ?, + last_error = ? + WHERE source_type = ? AND source_id = ?", + rusqlite::params![crate::core::time::now_ms(), error, source_type.as_str(), source_id], + )?; + Ok(()) +} + +fn get_existing_hash( + conn: &Connection, + source_type: SourceType, + source_id: i64, +) -> Result> { + let mut stmt = conn.prepare( + "SELECT content_hash FROM documents WHERE source_type = ? AND source_id = ?" + )?; + + let hash: Option = stmt + .query_row(rusqlite::params![source_type.as_str(), source_id], |row| row.get(0)) + .ok(); + + Ok(hash) +} + +fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> { + // Upsert main document + conn.execute( + "INSERT INTO documents + (source_type, source_id, project_id, author_username, label_names, + created_at, updated_at, url, title, content_text, content_hash, + is_truncated, truncated_reason) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(source_type, source_id) DO UPDATE SET + author_username = excluded.author_username, + label_names = excluded.label_names, + updated_at = excluded.updated_at, + url = excluded.url, + title = excluded.title, + content_text = excluded.content_text, + content_hash = excluded.content_hash, + is_truncated = excluded.is_truncated, + truncated_reason = excluded.truncated_reason", + rusqlite::params![ + doc.source_type.as_str(), + doc.source_id, + doc.project_id, + doc.author_username, + serde_json::to_string(&doc.labels)?, + doc.created_at, + doc.updated_at, + doc.url, + doc.title, + doc.content_text, + doc.content_hash, + doc.is_truncated, + doc.truncated_reason, + ], + )?; + + // Get inserted/updated document ID + let doc_id = get_document_id(conn, doc.source_type, doc.source_id)?; + + // Update labels + conn.execute( + "DELETE FROM document_labels WHERE document_id = ?", + [doc_id], + )?; + for label in &doc.labels { + conn.execute( + "INSERT INTO document_labels (document_id, label_name) VALUES (?, ?)", + rusqlite::params![doc_id, label], + )?; + } + + // Update paths + conn.execute( + "DELETE FROM document_paths WHERE document_id = ?", + [doc_id], + )?; + for path in &doc.paths { + conn.execute( + "INSERT INTO document_paths (document_id, path) VALUES (?, ?)", + rusqlite::params![doc_id, path], + )?; + } + + Ok(()) +} + +fn get_document_id( + conn: &Connection, + source_type: SourceType, + source_id: i64, +) -> Result { + let id: i64 = conn.query_row( + "SELECT id FROM documents WHERE source_type = ? AND source_id = ?", + rusqlite::params![source_type.as_str(), source_id], + |row| row.get(0), + )?; + Ok(id) +} +``` + +**Acceptance Criteria:** +- [ ] Dirty sources get documents regenerated +- [ ] Hash comparison prevents unnecessary updates +- [ ] FTS triggers fire on document update +- [ ] Queue cleared after processing + +--- + +### 6.4 CLI: `gi sync` + +**File:** `src/cli/commands/sync.rs` + +```rust +//! Sync command - orchestrate full sync pipeline. + +use serde::Serialize; + +use crate::core::error::Result; +use crate::Config; + +/// Sync result summary. +#[derive(Debug, Serialize)] +pub struct SyncResult { + pub issues_updated: usize, + pub mrs_updated: usize, + pub discussions_fetched: usize, + pub documents_regenerated: usize, + pub documents_embedded: usize, +} + +/// Sync options. +#[derive(Debug, Default)] +pub struct SyncOptions { + pub full: bool, // Reset cursors, fetch everything + pub force: bool, // Override stale lock + pub no_embed: bool, // Skip embedding step + pub no_docs: bool, // Skip document regeneration +} + +/// Run sync orchestration. +/// +/// Steps: +/// 1. Acquire app lock with heartbeat +/// 2. Ingest delta (issues, MRs) based on cursors +/// 3. Process pending_discussion_fetches queue (bounded) +/// 4. Apply rolling backfill window (configurable, default 14 days) +/// 5. Regenerate documents from dirty_sources +/// 6. Embed documents with changed content_hash +/// 7. Release lock, record sync_run +pub async fn run_sync(config: &Config, options: SyncOptions) -> Result { + // Implementation uses existing ingestion orchestrator + // and new document/embedding pipelines + todo!() +} + +/// Print human-readable sync output. +pub fn print_sync(result: &SyncResult, elapsed_secs: u64) { + println!("Sync complete:"); + println!(" Issues updated: {:>6}", result.issues_updated); + println!(" MRs updated: {:>6}", result.mrs_updated); + println!(" Discussions fetched: {:>6}", result.discussions_fetched); + println!(" Documents regenerated: {:>6}", result.documents_regenerated); + println!(" Documents embedded: {:>6}", result.documents_embedded); + println!(" Elapsed: {}m {}s", elapsed_secs / 60, elapsed_secs % 60); +} + +/// Print JSON sync output for robot mode. +pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64) { + let output = serde_json::json!({ + "ok": true, + "data": result, + "meta": { + "elapsed_ms": elapsed_ms + } + }); + println!("{}", serde_json::to_string_pretty(&output).unwrap()); +} +``` + +**CLI integration:** +```rust +/// Sync subcommand arguments. +#[derive(Args)] +pub struct SyncArgs { + /// Reset cursors, fetch everything + #[arg(long)] + full: bool, + + /// Override stale lock + #[arg(long)] + force: bool, + + /// Skip embedding step + #[arg(long)] + no_embed: bool, + + /// Skip document regeneration + #[arg(long)] + no_docs: bool, +} +``` + +**Acceptance Criteria:** +- [ ] Orchestrates full sync pipeline +- [ ] Respects app lock +- [ ] `--full` resets cursors +- [ ] `--no-embed` skips embedding +- [ ] `--no-docs` skips document regeneration +- [ ] Progress reporting in human mode +- [ ] JSON summary in robot mode + +--- + +## Testing Strategy + +### Unit Tests + +| Module | Test File | Coverage | +|--------|-----------|----------| +| Document extractor | `src/documents/extractor.rs` (mod tests) | Issue/MR/discussion extraction | +| Truncation | `src/documents/truncation.rs` (mod tests) | All edge cases | +| RRF ranking | `src/search/rrf.rs` (mod tests) | Score computation, merging | +| Content hash | `src/documents/extractor.rs` (mod tests) | Deterministic hashing | + +### Integration Tests + +| Feature | Test File | Coverage | +|---------|-----------|----------| +| FTS search | `tests/fts_search.rs` | Stemming, empty results | +| Embedding storage | `tests/embedding.rs` | sqlite-vec operations | +| Hybrid search | `tests/hybrid_search.rs` | Combined retrieval | +| Sync orchestration | `tests/sync.rs` | Full pipeline | + +### Golden Query Suite + +**File:** `tests/fixtures/golden_queries.json` + +```json +[ + { + "query": "authentication redesign", + "expected_urls": [".../-/issues/234", ".../-/merge_requests/847"], + "min_results": 1, + "max_rank": 10 + } +] +``` + +Each query must have at least one expected URL in top 10 results. + +--- + +## CLI Smoke Tests + +| Command | Expected | Pass Criteria | +|---------|----------|---------------| +| `gi generate-docs` | Progress, count | Completes, count > 0 | +| `gi generate-docs` (re-run) | 0 regenerated | Hash comparison works | +| `gi embed` | Progress, count | Completes, count matches docs | +| `gi embed` (re-run) | 0 embedded | Skips unchanged | +| `gi stats` | Coverage stats | Shows 100% after embed | +| `gi search "auth" --mode=lexical` | Results | Works without Ollama | +| `gi search "auth"` | Hybrid results | Vector + FTS combined | +| `gi search "auth" --explain` | Rank breakdown | Shows vector/FTS/RRF | +| `gi search "auth" --type=mr` | Filtered results | Only MRs | +| `gi search "auth" --label=bug` | Filtered results | Only labeled docs | +| `gi search "nonexistent123"` | No results | Graceful empty state | +| `gi sync` | Full pipeline | All steps complete | +| `gi sync --no-embed` | Skip embedding | Docs generated, not embedded | + +--- + +## Data Integrity Checks + +- [ ] `documents` count = issues + MRs + discussions +- [ ] `documents_fts` count = `documents` count +- [ ] `embeddings` count = `documents` count (after full embed) +- [ ] `embedding_metadata.content_hash` = `documents.content_hash` for all rows +- [ ] All `document_labels` reference valid documents +- [ ] All `document_paths` reference valid documents +- [ ] No orphaned embeddings (embeddings.rowid without matching documents.id) +- [ ] Discussion documents exclude system notes +- [ ] Discussion documents include parent title + +--- + +## Success Criteria + +Checkpoint 3 is complete when: + +1. **Lexical search works without Ollama** + - `gi search "query" --mode=lexical` returns relevant results + - All filters functional + +2. **Semantic search works with Ollama** + - `gi embed` completes successfully + - `gi search "query"` returns semantically relevant results + - `--explain` shows ranking breakdown + +3. **Hybrid search combines both** + - Documents appearing in both retrievers rank higher + - Graceful degradation when Ollama unavailable + +4. **Incremental sync is efficient** + - `gi sync` only processes changed entities + - Re-embedding only happens for changed documents + - Progress visible during long syncs + +5. **Data integrity maintained** + - All counts match between tables + - No orphaned records + - Hashes consistent + +6. **Tests pass** + - Unit tests for core algorithms + - Integration tests for pipelines + - Golden queries return expected results diff --git a/docs/robot-mode-design.md b/docs/robot-mode-design.md new file mode 100644 index 0000000..836a396 --- /dev/null +++ b/docs/robot-mode-design.md @@ -0,0 +1,239 @@ +# Robot Mode Design + +## Overview + +Robot mode optimizes the `gi` CLI for AI agent consumption with structured JSON output, meaningful exit codes, and token-efficient responses. + +## Activation + +```bash +# Explicit flag +gi --robot list issues + +# Auto-detection (when stdout is not a TTY) +gi list issues | jq . + +# Environment variable +GI_ROBOT=1 gi list issues +``` + +## Global Flags + +| Flag | Description | +|------|-------------| +| `--robot` | Force JSON output, structured errors | +| `--quiet` | Suppress progress/spinners (implied by --robot) | + +## Exit Codes + +| Code | ErrorCode | Meaning | +|------|-----------|---------| +| 0 | - | Success | +| 1 | INTERNAL_ERROR | Unknown/internal error | +| 2 | CONFIG_NOT_FOUND | Config file missing | +| 3 | CONFIG_INVALID | Config file malformed | +| 4 | TOKEN_NOT_SET | GitLab token not configured | +| 5 | GITLAB_AUTH_FAILED | Authentication failed | +| 6 | GITLAB_NOT_FOUND | Resource not found | +| 7 | GITLAB_RATE_LIMITED | Rate limited | +| 8 | GITLAB_NETWORK_ERROR | Network/connection error | +| 9 | DB_LOCKED | Database locked by another process | +| 10 | DB_ERROR | Database error | +| 11 | MIGRATION_FAILED | Migration failed | +| 12 | IO_ERROR | File I/O error | +| 13 | TRANSFORM_ERROR | Data transformation error | + +## Error Output Format + +When `--robot` is active, errors are JSON on stderr: + +```json +{ + "error": { + "code": "CONFIG_NOT_FOUND", + "message": "Config file not found at ~/.config/gi/config.toml", + "suggestion": "Run 'gi init' to create configuration" + } +} +``` + +## Success Output Format + +All commands return consistent JSON structure: + +```json +{ + "ok": true, + "data": { ... }, + "meta": { + "count": 50, + "total": 1234, + "elapsed_ms": 45 + } +} +``` + +## Command-Specific Output + +### gi list issues --robot + +```json +{ + "ok": true, + "data": { + "issues": [ + { + "iid": 123, + "project": "group/project", + "title": "Bug in login", + "state": "opened", + "author": "username", + "assignees": ["user1"], + "labels": ["bug", "priority::high"], + "discussions": { "total": 5, "unresolved": 2 }, + "updated_at": "2024-01-15T10:30:00Z", + "web_url": "https://..." + } + ] + }, + "meta": { "showing": 50, "total": 234 } +} +``` + +### gi show issue 123 --robot + +```json +{ + "ok": true, + "data": { + "issue": { + "iid": 123, + "project": "group/project", + "title": "Bug in login", + "description": "Full markdown...", + "state": "opened", + "author": "username", + "created_at": "2024-01-10T08:00:00Z", + "updated_at": "2024-01-15T10:30:00Z", + "discussions": [ + { + "id": "abc123", + "resolved": false, + "notes": [ + { + "author": "user1", + "body": "Comment text...", + "created_at": "2024-01-11T09:00:00Z", + "system": false + } + ] + } + ] + } + } +} +``` + +### gi ingest --type issues --robot + +```json +{ + "ok": true, + "data": { + "resource_type": "issues", + "projects": [ + { + "path": "group/project", + "issues_synced": 45, + "discussions_synced": 123 + } + ], + "totals": { + "issues": 45, + "discussions": 123 + } + }, + "meta": { "elapsed_ms": 3400 } +} +``` + +### gi count issues --robot + +```json +{ + "ok": true, + "data": { + "entity": "issues", + "count": 1234, + "breakdown": { + "opened": 456, + "closed": 778 + } + } +} +``` + +### gi doctor --robot + +```json +{ + "ok": true, + "data": { + "success": true, + "checks": { + "config": { "status": "ok", "path": "~/.config/gi/config.toml" }, + "database": { "status": "ok", "version": 6 }, + "gitlab": { "status": "ok", "user": "username" }, + "projects": [ + { "path": "group/project", "status": "ok" } + ] + } + } +} +``` + +### gi sync-status --robot + +```json +{ + "ok": true, + "data": { + "last_sync": { + "status": "completed", + "resource_type": "issues", + "started_at": "2024-01-15T10:00:00Z", + "completed_at": "2024-01-15T10:00:45Z", + "duration_ms": 45000 + }, + "cursors": [ + { + "project": "group/project", + "resource_type": "issues", + "cursor": "2024-01-15T10:00:00Z" + } + ] + } +} +``` + +## Implementation Plan + +### Phase 1: Core Infrastructure +1. Add `--robot` global flag to Cli struct +2. Create `RobotOutput` trait for consistent JSON serialization +3. Add exit code mapping from ErrorCode +4. Implement TTY detection with `atty` crate + +### Phase 2: Command Updates +1. Update all commands to check robot mode +2. Add JSON output variants for commands missing them (count, ingest, sync-status) +3. Suppress progress bars in robot mode + +### Phase 3: Error Handling +1. Update main.rs error handler for robot mode +2. Add suggestion field to GiError variants +3. Emit structured JSON errors to stderr + +### Phase 4: Documentation +1. Update AGENTS.md with robot mode commands +2. Add --robot examples to help text