diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 0b1d587..eeb258a 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -33,7 +33,7 @@ {"id":"bd-1np","title":"[CP1] GitLab types for issues, discussions, notes","description":"## Background\n\nGitLab types define the Rust structs for deserializing GitLab API responses. These types are the foundation for all ingestion work - issues, discussions, and notes must be correctly typed for serde to parse them.\n\n## Approach\n\nAdd types to `src/gitlab/types.rs` with serde derives:\n\n### GitLabIssue\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabIssue {\n pub id: i64, // GitLab global ID\n pub iid: i64, // Project-scoped issue number\n pub project_id: i64,\n pub title: String,\n pub description: Option,\n pub state: String, // \"opened\" | \"closed\"\n pub created_at: String, // ISO 8601\n pub updated_at: String, // ISO 8601\n pub closed_at: Option,\n pub author: GitLabAuthor,\n pub labels: Vec, // Array of label names (CP1 canonical)\n pub web_url: String,\n}\n```\n\nNOTE: `labels_details` intentionally NOT modeled - varies across GitLab versions.\n\n### GitLabAuthor\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabAuthor {\n pub id: i64,\n pub username: String,\n pub name: String,\n}\n```\n\n### GitLabDiscussion\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabDiscussion {\n pub id: String, // String ID like \"6a9c1750b37d...\"\n pub individual_note: bool, // true = standalone comment\n pub notes: Vec,\n}\n```\n\n### GitLabNote\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabNote {\n pub id: i64,\n #[serde(rename = \"type\")]\n pub note_type: Option, // \"DiscussionNote\" | \"DiffNote\" | null\n pub body: String,\n pub author: GitLabAuthor,\n pub created_at: String, // ISO 8601\n pub updated_at: String, // ISO 8601\n pub system: bool, // true for system-generated notes\n #[serde(default)]\n pub resolvable: bool,\n #[serde(default)]\n pub resolved: bool,\n pub resolved_by: Option,\n pub resolved_at: Option,\n pub position: Option,\n}\n```\n\n### GitLabNotePosition\n\n```rust\n#[derive(Debug, Clone, Deserialize)]\npub struct GitLabNotePosition {\n pub old_path: Option,\n pub new_path: Option,\n pub old_line: Option,\n pub new_line: Option,\n}\n```\n\n## Acceptance Criteria\n\n- [ ] GitLabIssue deserializes from API response JSON\n- [ ] GitLabAuthor embedded correctly in issue and note\n- [ ] GitLabDiscussion with notes array deserializes\n- [ ] GitLabNote handles null note_type (use Option)\n- [ ] GitLabNote uses #[serde(rename = \"type\")] for reserved keyword\n- [ ] resolvable/resolved default to false via #[serde(default)]\n- [ ] All timestamp fields are String (ISO 8601 parsed elsewhere)\n\n## Files\n\n- src/gitlab/types.rs (edit - add types)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/gitlab_types_tests.rs\n#[test] fn deserializes_gitlab_issue_from_json()\n#[test] fn deserializes_gitlab_discussion_from_json()\n#[test] fn handles_null_note_type()\n#[test] fn handles_missing_resolvable_field()\n#[test] fn deserializes_labels_as_string_array()\n```\n\nGREEN: Add type definitions with serde attributes\n\nVERIFY: `cargo test gitlab_types`\n\n## Edge Cases\n\n- note_type can be null, \"DiscussionNote\", or \"DiffNote\"\n- labels array can be empty\n- description can be null\n- resolved_by/resolved_at can be null\n- position is only present for DiffNotes","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.150472Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:17:08.842965Z","closed_at":"2026-01-25T22:17:08.842895Z","close_reason":"Implemented GitLabAuthor, GitLabIssue, GitLabDiscussion, GitLabNote, GitLabNotePosition types with 10 passing tests","compaction_level":0,"original_size":0} {"id":"bd-1o1","title":"OBSERV: Add -v/--verbose and --log-format CLI flags","description":"## Background\nUsers and agents need CLI-controlled verbosity without knowing RUST_LOG syntax. The -v flag convention (cargo, curl, ssh) is universally understood. --log-format json enables lore sync 2>&1 | jq workflows without reading log files.\n\n## Approach\nAdd two new global flags to the Cli struct in src/cli/mod.rs (insert after the quiet field at line ~37):\n\n```rust\n/// Increase log verbosity (-v, -vv, -vvv)\n#[arg(short = 'v', long = \"verbose\", action = clap::ArgAction::Count, global = true)]\npub verbose: u8,\n\n/// Log format for stderr output: text (default) or json\n#[arg(long = \"log-format\", global = true, value_parser = [\"text\", \"json\"], default_value = \"text\")]\npub log_format: String,\n```\n\nThe existing Cli struct (src/cli/mod.rs:13-42) has these global flags: config, robot, json, color, quiet. The new flags follow the same pattern.\n\nNote: clap::ArgAction::Count allows -v, -vv, -vvv as a single flag with increasing count (0, 1, 2, 3).\n\n## Acceptance Criteria\n- [ ] lore -v sync parses without error (verbose=1)\n- [ ] lore -vv sync parses (verbose=2)\n- [ ] lore -vvv sync parses (verbose=3)\n- [ ] lore --log-format json sync parses (log_format=\"json\")\n- [ ] lore --log-format text sync parses (default)\n- [ ] lore --log-format xml sync errors (invalid value)\n- [ ] Existing commands unaffected (verbose defaults to 0, log_format to \"text\")\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/cli/mod.rs (modify Cli struct, lines 13-42)\n\n## TDD Loop\nRED: Write test that parses Cli with -v flag and asserts verbose=1\nGREEN: Add the two fields to Cli struct\nVERIFY: cargo test -p lore && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- -v and -q together: both parse fine; conflict resolution happens in subscriber setup (bd-2rr), not here\n- -v flag must be global=true so it works before and after subcommands: lore -v sync AND lore sync -v\n- --log-format is a string, not enum, to keep Cli struct simple","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-04T15:53:55.421339Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:10:22.585947Z","closed_at":"2026-02-04T17:10:22.585905Z","close_reason":"Added -v/--verbose (count) and --log-format (text|json) global CLI flags","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-1o1","depends_on_id":"bd-2nx","type":"parent-child","created_at":"2026-02-04T15:53:55.422103Z","created_by":"tayloreernisse"}]} {"id":"bd-1o4h","title":"OBSERV: Define StageTiming struct in src/core/metrics.rs","description":"## Background\nStageTiming is the materialized view of span timing data. It's the data structure that flows through robot JSON output, sync_runs.metrics_json, and the human-readable timing summary. Defined in a new file because it's genuinely new functionality that doesn't fit existing modules.\n\n## Approach\nCreate src/core/metrics.rs:\n\n```rust\nuse serde::Serialize;\n\nfn is_zero(v: &usize) -> bool { *v == 0 }\n\n#[derive(Debug, Clone, Serialize)]\npub struct StageTiming {\n pub name: String,\n #[serde(skip_serializing_if = \"Option::is_none\")]\n pub project: Option,\n pub elapsed_ms: u64,\n pub items_processed: usize,\n #[serde(skip_serializing_if = \"is_zero\")]\n pub items_skipped: usize,\n #[serde(skip_serializing_if = \"is_zero\")]\n pub errors: usize,\n #[serde(skip_serializing_if = \"Vec::is_empty\")]\n pub sub_stages: Vec,\n}\n```\n\nRegister module in src/core/mod.rs (line ~11, add):\n```rust\npub mod metrics;\n```\n\nThe is_zero helper is a private function used by serde's skip_serializing_if. It must take &usize (reference) and return bool.\n\n## Acceptance Criteria\n- [ ] StageTiming serializes to JSON matching PRD Section 4.6.2 example\n- [ ] items_skipped omitted when 0\n- [ ] errors omitted when 0\n- [ ] sub_stages omitted when empty vec\n- [ ] project omitted when None\n- [ ] name, elapsed_ms, items_processed always present\n- [ ] Struct is Debug + Clone + Serialize\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/metrics.rs (new file)\n- src/core/mod.rs (register module, add line after existing pub mod declarations)\n\n## TDD Loop\nRED:\n - test_stage_timing_serialization: create StageTiming with sub_stages, serialize, assert JSON structure\n - test_stage_timing_zero_fields_omitted: errors=0, items_skipped=0, assert no \"errors\" or \"items_skipped\" keys\n - test_stage_timing_empty_sub_stages: sub_stages=vec![], assert no \"sub_stages\" key\nGREEN: Create metrics.rs with StageTiming struct and is_zero helper\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- is_zero must be a function, not a closure (serde skip_serializing_if requires a function path)\n- Vec::is_empty is a method on Vec, and serde accepts \"Vec::is_empty\" as a path for skip_serializing_if\n- Recursive StageTiming (sub_stages contains StageTiming): serde handles this naturally, no special handling needed","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-04T15:54:31.907234Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:21:40.915842Z","closed_at":"2026-02-04T17:21:40.915794Z","close_reason":"Created src/core/metrics.rs with StageTiming struct, serde skip_serializing_if for zero/empty fields, 5 tests","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-1o4h","depends_on_id":"bd-3er","type":"parent-child","created_at":"2026-02-04T15:54:31.910015Z","created_by":"tayloreernisse"}]} -{"id":"bd-1oo","title":"Register migration 015 in db.rs and create migration 016 for mr_file_changes","description":"## Background\n\nThis bead creates the `mr_file_changes` table that stores which files each MR touched, enabling Gate 4 (file-history) and Gate 5 (trace). It maps MRs to the file paths they modify.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.1 (Schema).\n\n## Codebase Context — CRITICAL Migration Numbering\n\n- **LATEST_SCHEMA_VERSION = 14** (MIGRATIONS array in db.rs includes 001-014)\n- **Migration 015 exists on disk** (`migrations/015_commit_shas_and_closes_watermark.sql`) but is **NOT registered** in `src/core/db.rs` MIGRATIONS array\n- `merge_commit_sha` and `squash_commit_sha` are already on merge_requests (added by 015 SQL) and already used in `src/ingestion/merge_requests.rs`\n- `closes_issues_synced_for_updated_at` also added by 015 and used in orchestrator.rs\n- **This bead must FIRST register migration 015 in db.rs**, then create migration 016 for mr_file_changes\n- pending_dependent_fetches already has `job_type='mr_diffs'` in CHECK constraint (migration 011)\n- Schema version auto-computes: `LATEST_SCHEMA_VERSION = MIGRATIONS.len() as i32`\n\n## Approach\n\n### Step 1: Register existing migration 015 in db.rs\n\nAdd to MIGRATIONS array in `src/core/db.rs` (after the \"014\" entry):\n\n```rust\n(\n \"015\",\n include_str!(\"../../migrations/015_commit_shas_and_closes_watermark.sql\"),\n),\n```\n\nThis makes LATEST_SCHEMA_VERSION = 15.\n\n### Step 2: Create migration 016 for mr_file_changes\n\nCreate `migrations/016_mr_file_changes.sql`:\n\n```sql\n-- Migration 016: MR file changes table\n-- Powers file-history and trace commands (Gates 4-5)\n\nCREATE TABLE mr_file_changes (\n id INTEGER PRIMARY KEY,\n merge_request_id INTEGER NOT NULL REFERENCES merge_requests(id) ON DELETE CASCADE,\n project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,\n old_path TEXT,\n new_path TEXT NOT NULL,\n change_type TEXT NOT NULL CHECK (change_type IN ('added', 'modified', 'renamed', 'deleted')),\n UNIQUE(merge_request_id, new_path)\n);\n\nCREATE INDEX idx_mfc_project_path ON mr_file_changes(project_id, new_path);\nCREATE INDEX idx_mfc_project_old_path ON mr_file_changes(project_id, old_path) WHERE old_path IS NOT NULL;\nCREATE INDEX idx_mfc_mr ON mr_file_changes(merge_request_id);\nCREATE INDEX idx_mfc_renamed ON mr_file_changes(project_id, change_type) WHERE change_type = 'renamed';\n\nINSERT INTO schema_version (version, applied_at, description)\nVALUES (16, strftime('%s', 'now') * 1000, 'MR file changes table');\n```\n\n### Step 3: Register migration 016 in db.rs\n\n```rust\n(\n \"016\",\n include_str!(\"../../migrations/016_mr_file_changes.sql\"),\n),\n```\n\nLATEST_SCHEMA_VERSION will auto-compute to 16.\n\n## Acceptance Criteria\n\n- [ ] Migration 015 registered in MIGRATIONS array in src/core/db.rs\n- [ ] Migration file exists at `migrations/016_mr_file_changes.sql`\n- [ ] `mr_file_changes` table has columns: id, merge_request_id, project_id, old_path, new_path, change_type\n- [ ] UNIQUE constraint on (merge_request_id, new_path)\n- [ ] CHECK constraint on change_type: added, modified, renamed, deleted\n- [ ] 4 indexes: project+new_path, project+old_path (partial), mr_id, project+renamed (partial)\n- [ ] Migration 016 registered in MIGRATIONS array\n- [ ] LATEST_SCHEMA_VERSION auto-computes to 16\n- [ ] `lore migrate` applies both 015 and 016 successfully on a v14 database\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/db.rs` (register migrations 015 AND 016 in MIGRATIONS array)\n- `migrations/016_mr_file_changes.sql` (NEW)\n\n## TDD Loop\n\nRED: `lore migrate` on v14 database says \"already up to date\" (015 not registered)\n\nGREEN: Register 015 in db.rs, create 016 file, register 016 in db.rs. `lore migrate` applies both.\n\nVERIFY:\n```bash\ncargo check --all-targets\nlore --robot migrate\nsqlite3 ~/.local/share/lore/lore.db '.schema mr_file_changes'\nsqlite3 ~/.local/share/lore/lore.db \"SELECT version FROM schema_version ORDER BY version DESC LIMIT 1\"\n```\n\n## Edge Cases\n\n- Databases already at v15 via manual migration: 015 will be skipped, only 016 applied\n- old_path is NULL for added files, populated for renamed/deleted\n- No lines_added/lines_removed columns (spec does not require them; removed to match spec exactly)\n- Partial indexes only index relevant rows for rename chain BFS performance\n","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:08.837816Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:53:54.984109Z","compaction_level":0,"original_size":0,"labels":["gate-4","phase-b","schema"],"dependencies":[{"issue_id":"bd-1oo","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:08.843541Z","created_by":"tayloreernisse"},{"issue_id":"bd-1oo","depends_on_id":"bd-hu3","type":"blocks","created_at":"2026-02-02T21:34:16.505965Z","created_by":"tayloreernisse"}]} +{"id":"bd-1oo","title":"Register migration 015 in db.rs and create migration 016 for mr_file_changes","description":"## Background\n\nThis bead creates the `mr_file_changes` table that stores which files each MR touched, enabling Gate 4 (file-history) and Gate 5 (trace). It maps MRs to the file paths they modify.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.1 (Schema).\n\n## Codebase Context — CRITICAL Migration Numbering\n\n- **LATEST_SCHEMA_VERSION = 14** (MIGRATIONS array in db.rs includes 001-014)\n- **Migration 015 exists on disk** (`migrations/015_commit_shas_and_closes_watermark.sql`) but is **NOT registered** in `src/core/db.rs` MIGRATIONS array\n- `merge_commit_sha` and `squash_commit_sha` are already on merge_requests (added by 015 SQL) and already used in `src/ingestion/merge_requests.rs`\n- `closes_issues_synced_for_updated_at` also added by 015 and used in orchestrator.rs\n- **This bead must FIRST register migration 015 in db.rs**, then create migration 016 for mr_file_changes\n- pending_dependent_fetches already has `job_type='mr_diffs'` in CHECK constraint (migration 011)\n- Schema version auto-computes: `LATEST_SCHEMA_VERSION = MIGRATIONS.len() as i32`\n\n## Approach\n\n### Step 1: Register existing migration 015 in db.rs\n\nAdd to MIGRATIONS array in `src/core/db.rs` (after the \"014\" entry):\n\n```rust\n(\n \"015\",\n include_str!(\"../../migrations/015_commit_shas_and_closes_watermark.sql\"),\n),\n```\n\nThis makes LATEST_SCHEMA_VERSION = 15.\n\n### Step 2: Create migration 016 for mr_file_changes\n\nCreate `migrations/016_mr_file_changes.sql`:\n\n```sql\n-- Migration 016: MR file changes table\n-- Powers file-history and trace commands (Gates 4-5)\n\nCREATE TABLE mr_file_changes (\n id INTEGER PRIMARY KEY,\n merge_request_id INTEGER NOT NULL REFERENCES merge_requests(id) ON DELETE CASCADE,\n project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,\n old_path TEXT,\n new_path TEXT NOT NULL,\n change_type TEXT NOT NULL CHECK (change_type IN ('added', 'modified', 'renamed', 'deleted')),\n UNIQUE(merge_request_id, new_path)\n);\n\nCREATE INDEX idx_mfc_project_path ON mr_file_changes(project_id, new_path);\nCREATE INDEX idx_mfc_project_old_path ON mr_file_changes(project_id, old_path) WHERE old_path IS NOT NULL;\nCREATE INDEX idx_mfc_mr ON mr_file_changes(merge_request_id);\nCREATE INDEX idx_mfc_renamed ON mr_file_changes(project_id, change_type) WHERE change_type = 'renamed';\n\nINSERT INTO schema_version (version, applied_at, description)\nVALUES (16, strftime('%s', 'now') * 1000, 'MR file changes table');\n```\n\n### Step 3: Register migration 016 in db.rs\n\n```rust\n(\n \"016\",\n include_str!(\"../../migrations/016_mr_file_changes.sql\"),\n),\n```\n\nLATEST_SCHEMA_VERSION will auto-compute to 16.\n\n## Acceptance Criteria\n\n- [ ] Migration 015 registered in MIGRATIONS array in src/core/db.rs\n- [ ] Migration file exists at `migrations/016_mr_file_changes.sql`\n- [ ] `mr_file_changes` table has columns: id, merge_request_id, project_id, old_path, new_path, change_type\n- [ ] UNIQUE constraint on (merge_request_id, new_path)\n- [ ] CHECK constraint on change_type: added, modified, renamed, deleted\n- [ ] 4 indexes: project+new_path, project+old_path (partial), mr_id, project+renamed (partial)\n- [ ] Migration 016 registered in MIGRATIONS array\n- [ ] LATEST_SCHEMA_VERSION auto-computes to 16\n- [ ] `lore migrate` applies both 015 and 016 successfully on a v14 database\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/db.rs` (register migrations 015 AND 016 in MIGRATIONS array)\n- `migrations/016_mr_file_changes.sql` (NEW)\n\n## TDD Loop\n\nRED: `lore migrate` on v14 database says \"already up to date\" (015 not registered)\n\nGREEN: Register 015 in db.rs, create 016 file, register 016 in db.rs. `lore migrate` applies both.\n\nVERIFY:\n```bash\ncargo check --all-targets\nlore --robot migrate\nsqlite3 ~/.local/share/lore/lore.db '.schema mr_file_changes'\nsqlite3 ~/.local/share/lore/lore.db \"SELECT version FROM schema_version ORDER BY version DESC LIMIT 1\"\n```\n\n## Edge Cases\n\n- Databases already at v15 via manual migration: 015 will be skipped, only 016 applied\n- old_path is NULL for added files, populated for renamed/deleted\n- No lines_added/lines_removed columns (spec does not require them; removed to match spec exactly)\n- Partial indexes only index relevant rows for rename chain BFS performance\n","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:08.837816Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:40:46.766136Z","closed_at":"2026-02-05T21:40:46.766074Z","close_reason":"Completed: registered migration 015 in db.rs MIGRATIONS array, created migration 016 (mr_file_changes table with 4 indexes, CHECK constraint, UNIQUE constraint), registered 016 in db.rs. LATEST_SCHEMA_VERSION auto-computes to 16. cargo check, clippy, and fmt all pass.","compaction_level":0,"original_size":0,"labels":["gate-4","phase-b","schema"],"dependencies":[{"issue_id":"bd-1oo","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:08.843541Z","created_by":"tayloreernisse"},{"issue_id":"bd-1oo","depends_on_id":"bd-hu3","type":"blocks","created_at":"2026-02-02T21:34:16.505965Z","created_by":"tayloreernisse"}]} {"id":"bd-1qf","title":"[CP1] Discussion and note transformers","description":"## Background\n\nDiscussion and note transformers convert GitLab API discussion responses into our normalized schema. They compute derived fields like `first_note_at`, `last_note_at`, resolvable/resolved status, and note positions. These are pure functions with no I/O.\n\n## Approach\n\nCreate transformer module with:\n\n### Structs\n\n```rust\n// src/gitlab/transformers/discussion.rs\n\npub struct NormalizedDiscussion {\n pub gitlab_discussion_id: String,\n pub project_id: i64,\n pub issue_id: i64,\n pub noteable_type: String, // \"Issue\"\n pub individual_note: bool,\n pub first_note_at: Option, // min(note.created_at) in ms epoch\n pub last_note_at: Option, // max(note.created_at) in ms epoch\n pub last_seen_at: i64,\n pub resolvable: bool, // any note is resolvable\n pub resolved: bool, // all resolvable notes are resolved\n}\n\npub struct NormalizedNote {\n pub gitlab_id: i64,\n pub project_id: i64,\n pub note_type: Option, // \"DiscussionNote\" | \"DiffNote\" | null\n pub is_system: bool, // from note.system\n pub author_username: String,\n pub body: String,\n pub created_at: i64, // ms epoch\n pub updated_at: i64, // ms epoch\n pub last_seen_at: i64,\n pub position: i32, // 0-indexed array position\n pub resolvable: bool,\n pub resolved: bool,\n pub resolved_by: Option,\n pub resolved_at: Option,\n}\n```\n\n### Functions\n\n```rust\npub fn transform_discussion(\n gitlab_discussion: &GitLabDiscussion,\n local_project_id: i64,\n local_issue_id: i64,\n) -> NormalizedDiscussion\n\npub fn transform_notes(\n gitlab_discussion: &GitLabDiscussion,\n local_project_id: i64,\n) -> Vec\n```\n\n## Acceptance Criteria\n\n- [ ] `NormalizedDiscussion` struct with all fields\n- [ ] `NormalizedNote` struct with all fields\n- [ ] `transform_discussion` computes first_note_at/last_note_at from notes array\n- [ ] `transform_discussion` computes resolvable (any note is resolvable)\n- [ ] `transform_discussion` computes resolved (all resolvable notes resolved)\n- [ ] `transform_notes` preserves array order via position field (0-indexed)\n- [ ] `transform_notes` maps system flag to is_system\n- [ ] Unit tests cover all computed fields\n\n## Files\n\n- src/gitlab/transformers/mod.rs (add `pub mod discussion;`)\n- src/gitlab/transformers/discussion.rs (create)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/discussion_transformer_tests.rs\n#[test] fn transforms_discussion_payload_to_normalized_schema()\n#[test] fn extracts_notes_array_from_discussion()\n#[test] fn sets_individual_note_flag_correctly()\n#[test] fn flags_system_notes_with_is_system_true()\n#[test] fn preserves_note_order_via_position_field()\n#[test] fn computes_first_note_at_and_last_note_at_correctly()\n#[test] fn computes_resolvable_and_resolved_status()\n```\n\nGREEN: Implement transform_discussion and transform_notes\n\nVERIFY: `cargo test discussion_transformer`\n\n## Edge Cases\n\n- Discussion with single note - first_note_at == last_note_at\n- All notes are system notes - still compute timestamps\n- No notes resolvable - resolvable=false, resolved=false\n- Mix of resolved/unresolved notes - resolved=false until all done","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.196079Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:27:11.485112Z","closed_at":"2026-01-25T22:27:11.485058Z","close_reason":"Implemented NormalizedDiscussion, NormalizedNote, transform_discussion, transform_notes with 9 passing unit tests","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1qf","depends_on_id":"bd-1np","type":"blocks","created_at":"2026-01-25T17:04:05.347218Z","created_by":"tayloreernisse"}]} {"id":"bd-1qz","title":"[CP1] Database migration 002_issues.sql","description":"Create migration file with tables for issues, labels, issue_labels, discussions, and notes.\n\n## Tables\n\n### issues\n- id INTEGER PRIMARY KEY\n- gitlab_id INTEGER UNIQUE NOT NULL\n- project_id INTEGER NOT NULL REFERENCES projects(id)\n- iid INTEGER NOT NULL\n- title TEXT, description TEXT, state TEXT\n- author_username TEXT\n- created_at, updated_at, last_seen_at INTEGER (ms epoch UTC)\n- discussions_synced_for_updated_at INTEGER (watermark for dependent sync)\n- web_url TEXT\n- raw_payload_id INTEGER REFERENCES raw_payloads(id)\n\n### labels (name-only for CP1)\n- id INTEGER PRIMARY KEY\n- gitlab_id INTEGER (optional, for future Labels API)\n- project_id INTEGER NOT NULL REFERENCES projects(id)\n- name TEXT NOT NULL\n- color TEXT, description TEXT (nullable, deferred)\n- UNIQUE(project_id, name)\n\n### issue_labels (junction)\n- issue_id, label_id with CASCADE DELETE\n- Clear existing links before INSERT to handle removed labels\n\n### discussions\n- gitlab_discussion_id TEXT (string ID from API)\n- project_id, issue_id/merge_request_id FKs\n- noteable_type TEXT ('Issue' | 'MergeRequest')\n- individual_note INTEGER, first_note_at, last_note_at, last_seen_at\n- resolvable, resolved flags\n- CHECK constraint for Issue vs MR exclusivity\n\n### notes\n- gitlab_id INTEGER UNIQUE NOT NULL\n- discussion_id, project_id FKs\n- note_type, is_system, author_username, body\n- timestamps, position (array order)\n- resolution fields, DiffNote position fields\n\n## Indexes\n- idx_issues_project_updated, idx_issues_author, idx_issues_discussions_sync\n- uq_issues_project_iid, uq_labels_project_name\n- idx_issue_labels_label\n- uq_discussions_project_discussion_id, idx_discussions_issue/mr/last_note\n- idx_notes_discussion/author/system\n\nFiles: migrations/002_issues.sql\nDone when: Migration applies cleanly on top of 001_initial.sql, schema_version = 2","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:42:31.464544Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.685262Z","deleted_at":"2026-01-25T17:02:01.685258Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-1re","title":"[CP1] gi show issue command","description":"Show issue details with discussions.\n\nFlags:\n- --project=PATH (required if iid is ambiguous across projects)\n\nOutput:\n- Title, project, state, author, dates, labels, URL\n- Description text\n- All discussions with notes (formatted thread view)\n\nHandle ambiguity: If multiple projects have same iid, prompt for --project or show error.\n\nFiles: src/cli/commands/show.ts\nDone when: Issue detail view displays all fields including threaded discussions","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:29.826786Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.153211Z","deleted_at":"2026-01-25T15:21:35.153208Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} @@ -53,7 +53,7 @@ {"id":"bd-1zj6","title":"OBSERV: Enrich robot JSON meta with run_id and stages","description":"## Background\nRobot JSON currently has a flat meta.elapsed_ms. This enriches it with run_id and a stages array, making every lore --robot sync output a complete performance profile.\n\n## Approach\nThe robot JSON output is built in src/cli/commands/sync.rs. The current SyncResult (line 15-25) is serialized into the data field. The meta field is built alongside it.\n\n1. Find or create the SyncMeta struct (likely near SyncResult). Add fields:\n```rust\n#[derive(Debug, Serialize)]\nstruct SyncMeta {\n run_id: String,\n elapsed_ms: u64,\n stages: Vec,\n}\n```\n\n2. After run_sync() completes, extract timings from MetricsLayer:\n```rust\nlet stages = metrics_handle.extract_timings();\nlet meta = SyncMeta {\n run_id: run_id.to_string(),\n elapsed_ms: start.elapsed().as_millis() as u64,\n stages,\n};\n```\n\n3. Build the JSON envelope:\n```rust\nlet output = serde_json::json!({\n \"ok\": true,\n \"data\": result,\n \"meta\": meta,\n});\n```\n\nThe metrics_handle (Arc) must be passed from main.rs to the command handler. This requires adding a parameter to handle_sync_cmd() and run_sync(), or using a global. Prefer parameter passing.\n\nSame pattern for standalone ingest: add stages to IngestMeta.\n\n## Acceptance Criteria\n- [ ] lore --robot sync output includes meta.run_id (string, 8 hex chars)\n- [ ] lore --robot sync output includes meta.stages (array of StageTiming)\n- [ ] meta.elapsed_ms still present (total wall clock time)\n- [ ] Each stage has name, elapsed_ms, items_processed at minimum\n- [ ] Top-level stages have sub_stages when applicable\n- [ ] lore --robot ingest also includes run_id and stages\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/cli/commands/sync.rs (add SyncMeta struct, wire extract_timings)\n- src/cli/commands/ingest.rs (same for standalone ingest)\n- src/main.rs (pass metrics_handle to command handlers)\n\n## TDD Loop\nRED: test_sync_meta_includes_stages (run robot-mode sync, parse JSON, assert meta.stages is array)\nGREEN: Add SyncMeta, extract timings, include in JSON output\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- Empty stages: if sync runs with --no-docs --no-embed, some stages won't exist. stages array is shorter, not padded.\n- extract_timings() called before root span closes: returns incomplete tree. Must call AFTER run_sync returns (span is dropped on function exit).\n- metrics_handle clone: MetricsLayer uses Arc internally, clone is cheap (reference count increment).","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-04T15:54:32.062410Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:31:11.073580Z","closed_at":"2026-02-04T17:31:11.073534Z","close_reason":"Wired MetricsLayer into subscriber stack (all 4 branches), added run_id to SyncResult, enriched SyncMeta with run_id + stages Vec, updated print_sync_json to accept MetricsLayer and extract timings","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-1zj6","depends_on_id":"bd-34ek","type":"blocks","created_at":"2026-02-04T15:55:20.085372Z","created_by":"tayloreernisse"},{"issue_id":"bd-1zj6","depends_on_id":"bd-3er","type":"parent-child","created_at":"2026-02-04T15:54:32.063354Z","created_by":"tayloreernisse"}]} {"id":"bd-1zwv","title":"Display assignees, due_date, and milestone in lore issues output","description":"## Background\nThe `lore issues ` command displays issue details but omits key metadata that exists in the database: assignees, due dates, and milestones. Users need this information to understand issue context without opening GitLab.\n\n**System fit**: This data is already ingested during issue sync (migration 005) but the show command never queries it.\n\n## Approach\n\nAll changes in `src/cli/commands/show.rs`:\n\n### 1. Update IssueRow struct (line ~119)\nAdd fields to internal row struct:\n```rust\nstruct IssueRow {\n // ... existing 10 fields ...\n due_date: Option, // NEW\n milestone_title: Option, // NEW\n}\n```\n\n### 2. Update find_issue() SQL (line ~137)\nExtend SELECT:\n```sql\nSELECT i.id, i.iid, i.title, i.description, i.state, i.author_username,\n i.created_at, i.updated_at, i.web_url, p.path_with_namespace,\n i.due_date, i.milestone_title -- ADD THESE\nFROM issues i ...\n```\n\nUpdate row mapping to extract columns 10 and 11.\n\n### 3. Add get_issue_assignees() (after get_issue_labels ~line 189)\n```rust\nfn get_issue_assignees(conn: &Connection, issue_id: i64) -> Result> {\n let mut stmt = conn.prepare(\n \"SELECT username FROM issue_assignees WHERE issue_id = ? ORDER BY username\"\n )?;\n let assignees = stmt\n .query_map([issue_id], |row| row.get(0))?\n .collect::, _>>()?;\n Ok(assignees)\n}\n```\n\n### 4. Update IssueDetail struct (line ~59)\n```rust\npub struct IssueDetail {\n // ... existing 12 fields ...\n pub assignees: Vec, // NEW\n pub due_date: Option, // NEW\n pub milestone: Option, // NEW\n}\n```\n\n### 5. Update IssueDetailJson struct (line ~770)\nAdd same 3 fields with identical types.\n\n### 6. Update run_show_issue() (line ~89)\n```rust\nlet assignees = get_issue_assignees(&conn, issue.id)?;\n// In return struct:\nassignees,\ndue_date: issue.due_date,\nmilestone: issue.milestone_title,\n```\n\n### 7. Update print_show_issue() (line ~533, after Author line ~548)\n```rust\nif !issue.assignees.is_empty() {\n println!(\"Assignee{}: {}\",\n if issue.assignees.len() > 1 { \"s\" } else { \"\" },\n issue.assignees.iter().map(|a| format!(\"@{}\", a)).collect::>().join(\", \"));\n}\nif let Some(due) = &issue.due_date {\n println!(\"Due: {}\", due);\n}\nif let Some(ms) = &issue.milestone {\n println!(\"Milestone: {}\", ms);\n}\n```\n\n### 8. Update From<&IssueDetail> for IssueDetailJson (line ~799)\n```rust\nassignees: issue.assignees.clone(),\ndue_date: issue.due_date.clone(),\nmilestone: issue.milestone.clone(),\n```\n\n## Acceptance Criteria\n- [ ] `cargo test test_get_issue_assignees` passes (3 tests)\n- [ ] `lore issues ` shows Assignees line when assignees exist\n- [ ] `lore issues ` shows Due line when due_date set\n- [ ] `lore issues ` shows Milestone line when milestone set\n- [ ] `lore -J issues ` includes assignees/due_date/milestone in JSON\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n- `src/cli/commands/show.rs` - ALL changes\n\n## TDD Loop\n\n**RED** - Add tests to `src/cli/commands/show.rs` `#[cfg(test)] mod tests`:\n\n```rust\nuse crate::core::db::{create_connection, run_migrations};\nuse std::path::Path;\n\nfn setup_test_db() -> Connection {\n let conn = create_connection(Path::new(\":memory:\")).unwrap();\n run_migrations(&conn).unwrap();\n conn\n}\n\n#[test]\nfn test_get_issue_assignees_empty() {\n let conn = setup_test_db();\n // seed project + issue with no assignees\n let result = get_issue_assignees(&conn, 1).unwrap();\n assert!(result.is_empty());\n}\n\n#[test]\nfn test_get_issue_assignees_multiple_sorted() {\n let conn = setup_test_db();\n // seed with alice, bob\n let result = get_issue_assignees(&conn, 1).unwrap();\n assert_eq!(result, vec![\"alice\", \"bob\"]); // alphabetical\n}\n\n#[test]\nfn test_get_issue_assignees_single() {\n let conn = setup_test_db();\n // seed with charlie only\n let result = get_issue_assignees(&conn, 1).unwrap();\n assert_eq!(result, vec![\"charlie\"]);\n}\n```\n\n**GREEN** - Implement get_issue_assignees() and struct updates\n\n**VERIFY**: `cargo test test_get_issue_assignees && cargo clippy --all-targets -- -D warnings`\n\n## Edge Cases\n- Empty assignees list -> don't print Assignees line\n- NULL due_date -> don't print Due line \n- NULL milestone_title -> don't print Milestone line\n- Single vs multiple assignees -> \"Assignee\" vs \"Assignees\" grammar","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-02-05T15:16:00.105830Z","created_by":"tayloreernisse","updated_at":"2026-02-05T15:26:08.147202Z","closed_at":"2026-02-05T15:26:08.147154Z","close_reason":"Implemented: assignees, due_date, milestone now display in lore issues . All 7 new tests pass.","compaction_level":0,"original_size":0,"labels":["ISSUE"]} {"id":"bd-208","title":"[CP1] Issue ingestion module","description":"## Background\n\nThe issue ingestion module fetches and stores issues with cursor-based incremental sync. It is the primary data ingestion component, establishing the pattern reused for MR ingestion in CP2. The module handles tuple-cursor semantics, raw payload storage, label extraction, and tracking which issues need discussion sync.\n\n## Approach\n\n### Module: src/ingestion/issues.rs\n\n### Key Structs\n\n```rust\n#[derive(Debug, Default)]\npub struct IngestIssuesResult {\n pub fetched: usize,\n pub upserted: usize,\n pub labels_created: usize,\n pub issues_needing_discussion_sync: Vec,\n}\n\n#[derive(Debug, Clone)]\npub struct IssueForDiscussionSync {\n pub local_issue_id: i64,\n pub iid: i64,\n pub updated_at: i64, // ms epoch\n}\n```\n\n### Main Function\n\n```rust\npub async fn ingest_issues(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n project_id: i64, // Local DB project ID\n gitlab_project_id: i64,\n) -> Result\n```\n\n### Logic (Step by Step)\n\n1. **Get current cursor** from sync_cursors table:\n```sql\nSELECT updated_at_cursor, tie_breaker_id\nFROM sync_cursors\nWHERE project_id = ? AND resource_type = 'issues'\n```\n\n2. **Call pagination method** with cursor rewind:\n```rust\nlet issues_stream = client.paginate_issues(\n gitlab_project_id,\n cursor.updated_at_cursor,\n config.sync.cursor_rewind_seconds,\n);\n```\n\n3. **Apply local filtering** for tuple cursor semantics:\n```rust\n// Skip if issue.updated_at < cursor_updated_at\n// Skip if issue.updated_at == cursor_updated_at AND issue.gitlab_id <= cursor_gitlab_id\nfn passes_cursor_filter(issue: &GitLabIssue, cursor: &SyncCursor) -> bool {\n if issue.updated_at < cursor.updated_at_cursor {\n return false;\n }\n if issue.updated_at == cursor.updated_at_cursor \n && issue.gitlab_id <= cursor.tie_breaker_id {\n return false;\n }\n true\n}\n```\n\n4. **For each issue passing filter**:\n```rust\n// Begin transaction (unchecked_transaction for rusqlite)\nlet tx = conn.unchecked_transaction()?;\n\n// Store raw payload (compressed based on config)\nlet payload_id = store_raw_payload(&tx, &issue_json, config.storage.compress_raw_payloads)?;\n\n// Transform and upsert issue\nlet issue_row = transform_issue(&issue)?;\nupsert_issue(&tx, &issue_row, project_id, payload_id)?;\nlet local_issue_id = get_local_issue_id(&tx, project_id, issue.iid)?;\n\n// Clear existing label links (stale removal!)\ntx.execute(\"DELETE FROM issue_labels WHERE issue_id = ?\", [local_issue_id])?;\n\n// Extract and upsert labels\nfor label_name in &issue_row.label_names {\n let label_id = upsert_label(&tx, project_id, label_name)?;\n link_issue_label(&tx, local_issue_id, label_id)?;\n}\n\ntx.commit()?;\n```\n\n5. **Incremental cursor update** every 100 issues:\n```rust\nif batch_count % 100 == 0 {\n update_sync_cursor(conn, project_id, \"issues\", last_updated_at, last_gitlab_id)?;\n}\n```\n\n6. **Final cursor update** after all issues processed\n\n7. **Determine issues needing discussion sync**:\n```sql\nSELECT id, iid, updated_at\nFROM issues\nWHERE project_id = ?\n AND updated_at > COALESCE(discussions_synced_for_updated_at, 0)\n```\n\n### Helper Functions\n\n```rust\nfn store_raw_payload(conn, json: &Value, compress: bool) -> Result\nfn upsert_issue(conn, issue: &IssueRow, project_id: i64, payload_id: i64) -> Result<()>\nfn get_local_issue_id(conn, project_id: i64, iid: i64) -> Result\nfn upsert_label(conn, project_id: i64, name: &str) -> Result\nfn link_issue_label(conn, issue_id: i64, label_id: i64) -> Result<()>\nfn update_sync_cursor(conn, project_id: i64, resource: &str, updated_at: i64, gitlab_id: i64) -> Result<()>\n```\n\n### Critical Invariant\n\nStale label links MUST be removed on resync. The \"DELETE then INSERT\" pattern ensures GitLab reality is reflected locally. If an issue had labels [A, B] and now has [A, C], the B link must be removed.\n\n## Acceptance Criteria\n\n- [ ] `ingest_issues` returns IngestIssuesResult with all counts\n- [ ] Cursor fetched from sync_cursors at start\n- [ ] Cursor rewind applied before API call\n- [ ] Local filtering skips already-processed issues\n- [ ] Each issue wrapped in transaction for atomicity\n- [ ] Raw payload stored with correct compression\n- [ ] Issue upserted (INSERT OR REPLACE pattern)\n- [ ] Existing label links deleted before new links inserted\n- [ ] Labels upserted (INSERT OR IGNORE by project+name)\n- [ ] Cursor updated every 100 issues (crash recovery)\n- [ ] Final cursor update after all issues\n- [ ] issues_needing_discussion_sync populated correctly\n\n## Files\n\n- src/ingestion/mod.rs (add `pub mod issues;`)\n- src/ingestion/issues.rs (create)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/issue_ingestion_tests.rs\n#[tokio::test] async fn ingests_issues_from_stream()\n#[tokio::test] async fn applies_cursor_filter_correctly()\n#[tokio::test] async fn updates_cursor_every_100_issues()\n#[tokio::test] async fn stores_raw_payload_for_each_issue()\n#[tokio::test] async fn upserts_issues_correctly()\n\n// tests/label_linkage_tests.rs\n#[tokio::test] async fn extracts_and_stores_labels()\n#[tokio::test] async fn removes_stale_label_links_on_resync()\n#[tokio::test] async fn handles_empty_labels_array()\n\n// tests/discussion_eligibility_tests.rs\n#[tokio::test] async fn identifies_issues_needing_discussion_sync()\n#[tokio::test] async fn skips_issues_with_current_watermark()\n```\n\nGREEN: Implement ingest_issues with all helper functions\n\nVERIFY: `cargo test issue_ingestion && cargo test label_linkage && cargo test discussion_eligibility`\n\n## Edge Cases\n\n- Empty issues stream - return result with all zeros\n- Cursor at epoch 0 - fetch all issues (no filtering)\n- Issue with no labels - empty Vec, no label links created\n- Issue with 50+ labels - all should be linked\n- Crash mid-batch - cursor at last 100-boundary, some issues re-fetched\n- Label already exists - upsert via INSERT OR IGNORE\n- Same issue fetched twice (due to rewind) - upsert handles it","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.245404Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:52:38.003964Z","closed_at":"2026-01-25T22:52:38.003868Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-208","depends_on_id":"bd-2iq","type":"blocks","created_at":"2026-01-25T17:04:05.425224Z","created_by":"tayloreernisse"},{"issue_id":"bd-208","depends_on_id":"bd-3nd","type":"blocks","created_at":"2026-01-25T17:04:05.450341Z","created_by":"tayloreernisse"},{"issue_id":"bd-208","depends_on_id":"bd-xhz","type":"blocks","created_at":"2026-01-25T17:04:05.473203Z","created_by":"tayloreernisse"}]} -{"id":"bd-20e","title":"Define TimelineEvent model and TimelineEventType enum","description":"## Background\n\nThe TimelineEvent model is the foundational data type for Gate 3's timeline feature. All pipeline stages (seed, expand, collect, interleave) produce or consume TimelineEvents. This must be defined first because every downstream bead (bd-32q, bd-ypa, bd-3as, bd-dty, bd-2f2) depends on these types.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.3 (Event Model).\n\n## Codebase Context\n\n- Migration 011 created: resource_state_events, resource_label_events, resource_milestone_events, entity_references, pending_dependent_fetches\n- source_method CHECK constraint: `'api' | 'note_parse' | 'description_parse'` (NOT spec's 'api_closes_issues' etc.)\n- reference_type CHECK constraint: `'closes' | 'mentioned' | 'related'`\n- LATEST_SCHEMA_VERSION = 14\n\n## Approach\n\nCreate `src/core/timeline.rs` with the following types:\n\n```rust\n/// The core timeline event. All pipeline stages produce or consume these.\n/// Spec ref: Section 3.3 \"Event Model\"\n#[derive(Debug, Clone, Serialize)]\npub struct TimelineEvent {\n pub timestamp: i64, // ms epoch UTC\n pub entity_type: String, // \"issue\" | \"merge_request\"\n pub entity_id: i64, // local DB id (internal, not in JSON output)\n pub entity_iid: i64,\n pub project_path: String,\n pub event_type: TimelineEventType,\n pub summary: String, // human-readable one-liner\n pub actor: Option, // username or None for system\n pub url: Option, // web URL for the event source\n pub is_seed: bool, // true if from seed phase, false if expanded\n}\n\n/// Per spec Section 3.3. Serde tagged enum for JSON output.\n/// IMPORTANT: entity_type is String (not &'static str) because serde Serialize\n/// requires owned types for struct fields when deriving.\n#[derive(Debug, Clone, Serialize)]\n#[serde(tag = \"kind\", rename_all = \"snake_case\")]\npub enum TimelineEventType {\n Created,\n StateChanged { state: String }, // spec: just the target state\n LabelAdded { label: String },\n LabelRemoved { label: String },\n MilestoneSet { milestone: String },\n MilestoneRemoved { milestone: String },\n Merged, // spec: unit variant\n NoteEvidence {\n note_id: i64, // spec: required\n snippet: String, // first ~200 chars of matching note body\n discussion_id: Option, // spec: optional\n },\n CrossReferenced { target: String }, // compact target ref like \"\\!567\" or \"#234\"\n}\n\n/// Internal entity reference used across pipeline stages.\n#[derive(Debug, Clone, Serialize)]\npub struct EntityRef {\n pub entity_type: String, // String not &'static str — needed for Serialize\n pub entity_id: i64,\n pub entity_iid: i64,\n pub project_path: String,\n}\n\n/// An entity discovered via BFS expansion.\n/// Spec ref: Section 3.5 \"expanded_entities\" JSON structure.\n#[derive(Debug, Clone, Serialize)]\npub struct ExpandedEntityRef {\n pub entity_ref: EntityRef,\n pub depth: u32,\n pub via_from: EntityRef, // the entity that referenced this one\n pub via_reference_type: String, // \"closes\", \"mentioned\", \"related\"\n pub via_source_method: String, // \"api\", \"note_parse\", \"description_parse\"\n}\n\n/// Reference to an unsynced external entity.\n/// Spec ref: Section 3.5 \"unresolved_references\" JSON structure.\n#[derive(Debug, Clone, Serialize)]\npub struct UnresolvedRef {\n pub source: EntityRef,\n pub target_project: Option,\n pub target_type: String,\n pub target_iid: i64,\n pub reference_type: String,\n}\n\n/// Complete result from the timeline pipeline.\n#[derive(Debug, Clone, Serialize)]\npub struct TimelineResult {\n pub query: String,\n pub events: Vec,\n pub seed_entities: Vec,\n pub expanded_entities: Vec,\n pub unresolved_references: Vec,\n}\n```\n\nImplement `Ord` on `TimelineEvent` for chronological sort: primary key `timestamp`, tiebreak by `entity_id` then event_type discriminant.\n\nAlso implement `PartialEq`, `Eq`, `PartialOrd` (required by Ord).\n\nRegister in `src/core/mod.rs`: `pub mod timeline;`\n\n## Acceptance Criteria\n\n- [ ] `src/core/timeline.rs` compiles with no warnings\n- [ ] All struct fields use `String` not `&'static str` (required for `#[derive(Serialize)]`)\n- [ ] `TimelineEventType` has exactly 9 variants matching spec Section 3.3\n- [ ] `NoteEvidence` has `note_id: i64`, `snippet: String`, `discussion_id: Option`\n- [ ] `ExpandedEntityRef.via_source_method` documents codebase values: api, note_parse, description_parse\n- [ ] `Ord` impl sorts by (timestamp, entity_id, event_type discriminant)\n- [ ] `PartialEq`, `Eq`, `PartialOrd` derived or implemented\n- [ ] Module registered in `src/core/mod.rs`\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline;`)\n\n## TDD Loop\n\nRED: Create `src/core/timeline.rs` with `#[cfg(test)] mod tests`:\n- `test_timeline_event_sort_by_timestamp` - events sort chronologically\n- `test_timeline_event_sort_tiebreak` - same-timestamp events sort stably\n- `test_timeline_event_type_serializes_tagged` - serde JSON uses `kind` tag\n- `test_note_evidence_has_note_id` - note_id present in serialized output\n\nGREEN: Implement the types and Ord trait.\n\nVERIFY: `cargo test --lib -- timeline`\n\n## Edge Cases\n\n- Ord must be consistent and total for all valid TimelineEvent pairs\n- NoteEvidence snippet truncated to 200 chars at construction, not in the type\n- entity_type uses String to satisfy serde Serialize derive requirements\n- url field: constructed from project_path + entity_type + iid; None for entities without web_url","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.569126Z","created_by":"tayloreernisse","updated_at":"2026-02-05T19:39:18.322213Z","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","types"],"dependencies":[{"issue_id":"bd-20e","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.573079Z","created_by":"tayloreernisse"}]} +{"id":"bd-20e","title":"Define TimelineEvent model and TimelineEventType enum","description":"## Background\n\nThe TimelineEvent model is the foundational data type for Gate 3's timeline feature. All pipeline stages (seed, expand, collect, interleave) produce or consume TimelineEvents. This must be defined first because every downstream bead (bd-32q, bd-ypa, bd-3as, bd-dty, bd-2f2) depends on these types.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.3 (Event Model).\n\n## Codebase Context\n\n- Migration 011 created: resource_state_events, resource_label_events, resource_milestone_events, entity_references, pending_dependent_fetches\n- source_method CHECK constraint: `'api' | 'note_parse' | 'description_parse'` (NOT spec's 'api_closes_issues' etc.)\n- reference_type CHECK constraint: `'closes' | 'mentioned' | 'related'`\n- LATEST_SCHEMA_VERSION = 14\n\n## Approach\n\nCreate `src/core/timeline.rs` with the following types:\n\n```rust\n/// The core timeline event. All pipeline stages produce or consume these.\n/// Spec ref: Section 3.3 \"Event Model\"\n#[derive(Debug, Clone, Serialize)]\npub struct TimelineEvent {\n pub timestamp: i64, // ms epoch UTC\n pub entity_type: String, // \"issue\" | \"merge_request\"\n pub entity_id: i64, // local DB id (internal, not in JSON output)\n pub entity_iid: i64,\n pub project_path: String,\n pub event_type: TimelineEventType,\n pub summary: String, // human-readable one-liner\n pub actor: Option, // username or None for system\n pub url: Option, // web URL for the event source\n pub is_seed: bool, // true if from seed phase, false if expanded\n}\n\n/// Per spec Section 3.3. Serde tagged enum for JSON output.\n/// IMPORTANT: entity_type is String (not &'static str) because serde Serialize\n/// requires owned types for struct fields when deriving.\n#[derive(Debug, Clone, Serialize)]\n#[serde(tag = \"kind\", rename_all = \"snake_case\")]\npub enum TimelineEventType {\n Created,\n StateChanged { state: String }, // spec: just the target state\n LabelAdded { label: String },\n LabelRemoved { label: String },\n MilestoneSet { milestone: String },\n MilestoneRemoved { milestone: String },\n Merged, // spec: unit variant\n NoteEvidence {\n note_id: i64, // spec: required\n snippet: String, // first ~200 chars of matching note body\n discussion_id: Option, // spec: optional\n },\n CrossReferenced { target: String }, // compact target ref like \"\\!567\" or \"#234\"\n}\n\n/// Internal entity reference used across pipeline stages.\n#[derive(Debug, Clone, Serialize)]\npub struct EntityRef {\n pub entity_type: String, // String not &'static str — needed for Serialize\n pub entity_id: i64,\n pub entity_iid: i64,\n pub project_path: String,\n}\n\n/// An entity discovered via BFS expansion.\n/// Spec ref: Section 3.5 \"expanded_entities\" JSON structure.\n#[derive(Debug, Clone, Serialize)]\npub struct ExpandedEntityRef {\n pub entity_ref: EntityRef,\n pub depth: u32,\n pub via_from: EntityRef, // the entity that referenced this one\n pub via_reference_type: String, // \"closes\", \"mentioned\", \"related\"\n pub via_source_method: String, // \"api\", \"note_parse\", \"description_parse\"\n}\n\n/// Reference to an unsynced external entity.\n/// Spec ref: Section 3.5 \"unresolved_references\" JSON structure.\n#[derive(Debug, Clone, Serialize)]\npub struct UnresolvedRef {\n pub source: EntityRef,\n pub target_project: Option,\n pub target_type: String,\n pub target_iid: i64,\n pub reference_type: String,\n}\n\n/// Complete result from the timeline pipeline.\n#[derive(Debug, Clone, Serialize)]\npub struct TimelineResult {\n pub query: String,\n pub events: Vec,\n pub seed_entities: Vec,\n pub expanded_entities: Vec,\n pub unresolved_references: Vec,\n}\n```\n\nImplement `Ord` on `TimelineEvent` for chronological sort: primary key `timestamp`, tiebreak by `entity_id` then event_type discriminant.\n\nAlso implement `PartialEq`, `Eq`, `PartialOrd` (required by Ord).\n\nRegister in `src/core/mod.rs`: `pub mod timeline;`\n\n## Acceptance Criteria\n\n- [ ] `src/core/timeline.rs` compiles with no warnings\n- [ ] All struct fields use `String` not `&'static str` (required for `#[derive(Serialize)]`)\n- [ ] `TimelineEventType` has exactly 9 variants matching spec Section 3.3\n- [ ] `NoteEvidence` has `note_id: i64`, `snippet: String`, `discussion_id: Option`\n- [ ] `ExpandedEntityRef.via_source_method` documents codebase values: api, note_parse, description_parse\n- [ ] `Ord` impl sorts by (timestamp, entity_id, event_type discriminant)\n- [ ] `PartialEq`, `Eq`, `PartialOrd` derived or implemented\n- [ ] Module registered in `src/core/mod.rs`\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline;`)\n\n## TDD Loop\n\nRED: Create `src/core/timeline.rs` with `#[cfg(test)] mod tests`:\n- `test_timeline_event_sort_by_timestamp` - events sort chronologically\n- `test_timeline_event_sort_tiebreak` - same-timestamp events sort stably\n- `test_timeline_event_type_serializes_tagged` - serde JSON uses `kind` tag\n- `test_note_evidence_has_note_id` - note_id present in serialized output\n\nGREEN: Implement the types and Ord trait.\n\nVERIFY: `cargo test --lib -- timeline`\n\n## Edge Cases\n\n- Ord must be consistent and total for all valid TimelineEvent pairs\n- NoteEvidence snippet truncated to 200 chars at construction, not in the type\n- entity_type uses String to satisfy serde Serialize derive requirements\n- url field: constructed from project_path + entity_type + iid; None for entities without web_url","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.569126Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:43:02.449502Z","closed_at":"2026-02-05T21:43:02.449454Z","close_reason":"Completed: Created src/core/timeline.rs with TimelineEvent, TimelineEventType (9 variants), EntityRef, ExpandedEntityRef, UnresolvedRef, TimelineResult. Ord impl sorts by (timestamp, entity_id, event_type discriminant). entity_id skipped in serde output. 6 tests pass. All quality gates pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","types"],"dependencies":[{"issue_id":"bd-20e","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.573079Z","created_by":"tayloreernisse"}]} {"id":"bd-20h","title":"Implement MR discussion ingestion module","description":"## Background\nMR discussion ingestion with critical atomicity guarantees. Parse notes BEFORE destructive DB operations to prevent data loss. Watermark ONLY advanced on full success.\n\n## Approach\nCreate `src/ingestion/mr_discussions.rs` with:\n1. `IngestMrDiscussionsResult` - Per-MR stats\n2. `ingest_mr_discussions()` - Main function with atomicity guarantees\n3. Upsert + sweep pattern for notes (not delete-all-then-insert)\n4. Sync health telemetry for debugging failures\n\n## Files\n- `src/ingestion/mr_discussions.rs` - New module\n- `tests/mr_discussion_ingestion_tests.rs` - Integration tests\n\n## Acceptance Criteria\n- [ ] `IngestMrDiscussionsResult` has: discussions_fetched, discussions_upserted, notes_upserted, notes_skipped_bad_timestamp, diffnotes_count, pagination_succeeded\n- [ ] `ingest_mr_discussions()` returns `Result`\n- [ ] CRITICAL: Notes parsed BEFORE any DELETE operations\n- [ ] CRITICAL: Watermark NOT advanced if `pagination_succeeded == false`\n- [ ] CRITICAL: Watermark NOT advanced if any note parse fails\n- [ ] Upsert + sweep pattern using `last_seen_at`\n- [ ] Stale discussions/notes removed only on full success\n- [ ] Selective raw payload storage (skip system notes without position)\n- [ ] Sync health telemetry recorded on failure\n- [ ] `does_not_advance_discussion_watermark_on_partial_failure` test passes\n- [ ] `atomic_note_replacement_preserves_data_on_parse_failure` test passes\n\n## TDD Loop\nRED: `cargo test does_not_advance_watermark` -> test fails\nGREEN: Add ingestion with atomicity guarantees\nVERIFY: `cargo test mr_discussion_ingestion`\n\n## Main Function\n```rust\npub async fn ingest_mr_discussions(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n project_id: i64,\n gitlab_project_id: i64,\n mr_iid: i64,\n local_mr_id: i64,\n mr_updated_at: i64,\n) -> Result\n```\n\n## CRITICAL: Atomic Note Replacement\n```rust\n// Record sync start time for sweep\nlet run_seen_at = now_ms();\n\nwhile let Some(discussion_result) = stream.next().await {\n let discussion = match discussion_result {\n Ok(d) => d,\n Err(e) => {\n result.pagination_succeeded = false;\n break; // Stop but don't advance watermark\n }\n };\n \n // CRITICAL: Parse BEFORE destructive operations\n let notes = match transform_notes_with_diff_position(&discussion, project_id) {\n Ok(notes) => notes,\n Err(e) => {\n warn!(\"Note transform failed; preserving existing notes\");\n result.notes_skipped_bad_timestamp += discussion.notes.len();\n result.pagination_succeeded = false;\n continue; // Skip this discussion, don't delete existing\n }\n };\n \n // Only NOW start transaction (after parse succeeded)\n let tx = conn.unchecked_transaction()?;\n \n // Upsert discussion with run_seen_at\n // Upsert notes with run_seen_at (not delete-all)\n \n tx.commit()?;\n}\n```\n\n## Stale Data Sweep (only on success)\n```rust\nif result.pagination_succeeded {\n // Sweep stale discussions\n conn.execute(\n \"DELETE FROM discussions\n WHERE project_id = ? AND merge_request_id = ?\n AND last_seen_at < ?\",\n params![project_id, local_mr_id, run_seen_at],\n )?;\n \n // Sweep stale notes\n conn.execute(\n \"DELETE FROM notes\n WHERE discussion_id IN (\n SELECT id FROM discussions\n WHERE project_id = ? AND merge_request_id = ?\n )\n AND last_seen_at < ?\",\n params![project_id, local_mr_id, run_seen_at],\n )?;\n}\n```\n\n## Watermark Update (ONLY on success)\n```rust\nif result.pagination_succeeded {\n mark_discussions_synced(conn, local_mr_id, mr_updated_at)?;\n clear_sync_health_error(conn, local_mr_id)?;\n} else {\n record_sync_health_error(conn, local_mr_id, \"Pagination incomplete or parse failure\")?;\n warn!(\"Watermark NOT advanced; will retry on next sync\");\n}\n```\n\n## Selective Payload Storage\n```rust\n// Only store payload for DiffNotes and non-system notes\nlet should_store_note_payload =\n !note.is_system() ||\n note.position_new_path().is_some() ||\n note.position_old_path().is_some();\n```\n\n## Integration Tests (CRITICAL)\n```rust\n#[tokio::test]\nasync fn does_not_advance_discussion_watermark_on_partial_failure() {\n // Setup: MR with updated_at > discussions_synced_for_updated_at\n // Mock: Page 1 returns OK, Page 2 returns 500\n // Assert: discussions_synced_for_updated_at unchanged\n}\n\n#[tokio::test]\nasync fn does_not_advance_discussion_watermark_on_note_parse_failure() {\n // Setup: Existing notes in DB\n // Mock: Discussion with note having invalid created_at\n // Assert: Original notes preserved, watermark unchanged\n}\n\n#[tokio::test]\nasync fn atomic_note_replacement_preserves_data_on_parse_failure() {\n // Setup: Discussion with 3 valid notes\n // Mock: Updated discussion where note 2 has bad timestamp\n // Assert: All 3 original notes still in DB\n}\n```\n\n## Edge Cases\n- HTTP error mid-pagination: preserve existing data, log error, no watermark advance\n- Invalid note timestamp: skip discussion, preserve existing notes\n- System notes without position: don't store raw payload (saves space)\n- Empty discussion: still upsert discussion record, no notes","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-26T22:06:42.335714Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:22:43.207057Z","closed_at":"2026-01-27T00:22:43.206996Z","close_reason":"Implemented MR discussion ingestion module with full atomicity guarantees:\n- IngestMrDiscussionsResult with all required fields\n- parse-before-destructive pattern (transform notes before DB ops)\n- Upsert + sweep pattern with last_seen_at timestamps\n- Watermark advanced ONLY on full pagination success\n- Selective payload storage (skip system notes without position)\n- Sync health telemetry for failure debugging\n- All 163 tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-20h","depends_on_id":"bd-3ir","type":"blocks","created_at":"2026-01-26T22:08:54.649094Z","created_by":"tayloreernisse"},{"issue_id":"bd-20h","depends_on_id":"bd-3j6","type":"blocks","created_at":"2026-01-26T22:08:54.686066Z","created_by":"tayloreernisse"},{"issue_id":"bd-20h","depends_on_id":"bd-iba","type":"blocks","created_at":"2026-01-26T22:08:54.722746Z","created_by":"tayloreernisse"}]} {"id":"bd-221","title":"Create migration 008_fts5.sql","description":"## Background\nFTS5 (Full-Text Search 5) provides the lexical search backbone for Gate A. The virtual table + triggers keep the FTS index in sync with the documents table automatically. This migration must be applied AFTER migration 007 (documents table exists). The trigger design handles NULL titles via COALESCE and only rebuilds the FTS entry when searchable text actually changes (not metadata-only updates).\n\n## Approach\nCreate `migrations/008_fts5.sql` with the exact SQL from PRD Section 1.2:\n\n1. **Virtual table:** `documents_fts` using FTS5 with porter stemmer, prefix indexes (2,3,4), external content backed by `documents` table\n2. **Insert trigger:** `documents_ai` — inserts into FTS on document insert, uses COALESCE(title, '') for NULL safety\n3. **Delete trigger:** `documents_ad` — removes from FTS on document delete using the FTS5 delete command syntax\n4. **Update trigger:** `documents_au` — only fires when `title` or `content_text` changes (WHEN clause), performs delete-then-insert to update FTS\n\nRegister migration 8 in `src/core/db.rs` MIGRATIONS array.\n\n**Critical detail:** The COALESCE is required because FTS5 external-content tables require exact value matching for delete operations. If NULL was inserted, the delete trigger couldn't match it (NULL != NULL in SQL).\n\n## Acceptance Criteria\n- [ ] `migrations/008_fts5.sql` file exists\n- [ ] `documents_fts` virtual table created with `tokenize='porter unicode61'` and `prefix='2 3 4'`\n- [ ] `content='documents'` and `content_rowid='id'` set (external content mode)\n- [ ] Insert trigger `documents_ai` fires on document insert with COALESCE(title, '')\n- [ ] Delete trigger `documents_ad` fires on document delete using FTS5 delete command\n- [ ] Update trigger `documents_au` only fires when `old.title IS NOT new.title OR old.content_text != new.content_text`\n- [ ] Prefix search works: query `auth*` matches \"authentication\"\n- [ ] After bulk insert of N documents, `SELECT count(*) FROM documents_fts` returns N\n- [ ] Schema version 8 recorded in schema_version table\n- [ ] `cargo test migration_tests` passes\n\n## Files\n- `migrations/008_fts5.sql` — new file (copy exact SQL from PRD Section 1.2)\n- `src/core/db.rs` — add migration 8 to MIGRATIONS array\n\n## TDD Loop\nRED: Register migration in db.rs, `cargo test migration_tests` fails (SQL file missing)\nGREEN: Create `008_fts5.sql` with all triggers\nVERIFY: `cargo test migration_tests && cargo build`\n\n## Edge Cases\n- Metadata-only updates (e.g., changing `updated_at` or `labels_hash`) must NOT trigger FTS rebuild — the WHEN clause prevents this\n- NULL titles must use COALESCE to empty string in both insert and delete triggers\n- The update trigger does delete+insert (not FTS5 'delete' + regular insert atomically) — this is the correct FTS5 pattern for content changes","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:25:25.763146Z","created_by":"tayloreernisse","updated_at":"2026-01-30T16:56:13.131830Z","closed_at":"2026-01-30T16:56:13.131771Z","close_reason":"Completed: migration 008_fts5.sql with FTS5 virtual table, 3 sync triggers (insert/delete/update with COALESCE NULL safety), prefix search, registered in db.rs, cargo build + tests pass","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-221","depends_on_id":"bd-hrs","type":"blocks","created_at":"2026-01-30T15:29:15.574576Z","created_by":"tayloreernisse"}]} {"id":"bd-227","title":"[CP1] gi count issues/discussions/notes commands","description":"Count entities in the database.\n\n## Module\nsrc/cli/commands/count.rs\n\n## Clap Definition\nCount {\n #[arg(value_parser = [\"issues\", \"mrs\", \"discussions\", \"notes\"])]\n entity: String,\n \n #[arg(long, value_parser = [\"issue\", \"mr\"])]\n r#type: Option,\n}\n\n## Commands\n- gi count issues → 'Issues: N'\n- gi count discussions → 'Discussions: N'\n- gi count discussions --type=issue → 'Issue Discussions: N'\n- gi count notes → 'Notes: N (excluding M system)'\n- gi count notes --type=issue → 'Issue Notes: N (excluding M system)'\n\n## Implementation\n- Simple COUNT(*) queries\n- For notes, also count WHERE is_system = 1 for system note count\n- Filter by noteable_type when --type specified\n\nFiles: src/cli/commands/count.rs\nDone when: Counts match expected values from GitLab","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:58:25.648805Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.920135Z","deleted_at":"2026-01-25T17:02:01.920129Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} @@ -101,7 +101,7 @@ {"id":"bd-31m","title":"[CP1] Test fixtures for mocked GitLab responses","description":"Create mock response files for integration tests.\n\nFixtures to create:\n- gitlab-issue.json (single issue with labels)\n- gitlab-issues-page.json (paginated list)\n- gitlab-discussion.json (single discussion with notes)\n- gitlab-discussions-page.json (paginated list)\n\nInclude edge cases:\n- Issue with labels_details\n- Issue with no labels\n- Discussion with individual_note=true\n- System notes with system=true\n\nFiles: tests/fixtures/mock-responses/gitlab-issue*.json, gitlab-discussion*.json\nDone when: MSW handlers can use fixtures for deterministic tests","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:43.781288Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.155480Z","deleted_at":"2026-01-25T15:21:35.155478Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-327","title":"[CP0] Project scaffold","description":"## Background\n\nThis is the foundational scaffold for the GitLab Inbox CLI tool. Every subsequent bead depends on having the correct project structure, TypeScript configuration, and tooling in place. The configuration choices here (ESM modules, strict TypeScript, Vitest for testing) set constraints for all future code.\n\n## Approach\n\nCreate a Node.js 20+ ESM project with TypeScript strict mode. Use flat ESLint config (v9+) with TypeScript plugin. Configure Vitest with coverage. Create the directory structure matching the PRD exactly.\n\n**package.json essentials:**\n- `\"type\": \"module\"` for ESM\n- `\"bin\": { \"gi\": \"./dist/cli/index.js\" }` for CLI entry point\n- Runtime deps: better-sqlite3, sqlite-vec, commander, zod, pino, pino-pretty, ora, chalk, cli-table3, inquirer\n- Dev deps: typescript, @types/better-sqlite3, @types/node, vitest, msw, eslint, @typescript-eslint/*\n\n**tsconfig.json:**\n- `target: ES2022`, `module: Node16`, `moduleResolution: Node16`\n- `strict: true`, `noImplicitAny: true`, `strictNullChecks: true`\n- `outDir: ./dist`, `rootDir: ./src`\n\n**vitest.config.ts:**\n- Exclude `tests/live/**` unless `GITLAB_LIVE_TESTS=1`\n- Coverage with v8 provider\n\n## Acceptance Criteria\n\n- [ ] `npm install` completes without errors\n- [ ] `npm run build` compiles TypeScript to dist/\n- [ ] `npm run test` runs vitest (0 tests is fine at this stage)\n- [ ] `npm run lint` runs ESLint with no config errors\n- [ ] All directories exist: src/cli/commands/, src/core/, src/gitlab/, src/types/, tests/unit/, tests/integration/, tests/live/, tests/fixtures/mock-responses/, migrations/\n\n## Files\n\nCREATE:\n- package.json\n- tsconfig.json\n- vitest.config.ts\n- eslint.config.js\n- .gitignore\n- src/cli/index.ts (empty placeholder with shebang)\n- src/cli/commands/.gitkeep\n- src/core/.gitkeep\n- src/gitlab/.gitkeep\n- src/types/index.ts (empty)\n- tests/unit/.gitkeep\n- tests/integration/.gitkeep\n- tests/live/.gitkeep\n- tests/fixtures/mock-responses/.gitkeep\n- migrations/.gitkeep\n\n## TDD Loop\n\nN/A - scaffold only. Verify with:\n\n```bash\nnpm install\nnpm run build\nnpm run lint\nnpm run test\n```\n\n## Edge Cases\n\n- Node.js version < 20 will fail on ESM features - add `engines` field\n- better-sqlite3 requires native compilation - may need python/build-essential\n- sqlite-vec installation can fail on some platforms - document fallback","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:47.955044Z","created_by":"tayloreernisse","updated_at":"2026-01-25T02:51:25.347932Z","closed_at":"2026-01-25T02:51:25.347799Z","compaction_level":0,"original_size":0} {"id":"bd-32mc","title":"OBSERV: Implement log retention cleanup at startup","description":"## Background\nLog files accumulate at ~1-10 MB/day. Without cleanup, they grow unbounded. Retention runs BEFORE subscriber init so deleted file handles aren't held open by the appender.\n\n## Approach\nAdd a cleanup function, called from main.rs before the subscriber is initialized (before current line 44):\n\n```rust\n/// Delete log files older than retention_days.\n/// Matches files named lore.YYYY-MM-DD.log in the log directory.\npub fn cleanup_old_logs(log_dir: &Path, retention_days: u32) -> std::io::Result {\n if retention_days == 0 {\n return Ok(0); // 0 means file logging disabled, don't delete\n }\n let cutoff = SystemTime::now() - Duration::from_secs(u64::from(retention_days) * 86400);\n let mut deleted = 0;\n\n for entry in std::fs::read_dir(log_dir)? {\n let entry = entry?;\n let name = entry.file_name();\n let name_str = name.to_string_lossy();\n\n // Only match lore.YYYY-MM-DD.log pattern\n if !name_str.starts_with(\"lore.\") || !name_str.ends_with(\".log\") {\n continue;\n }\n\n if let Ok(metadata) = entry.metadata() {\n if let Ok(modified) = metadata.modified() {\n if modified < cutoff {\n std::fs::remove_file(entry.path())?;\n deleted += 1;\n }\n }\n }\n }\n Ok(deleted)\n}\n```\n\nPlace this function in src/core/paths.rs (next to get_log_dir) or a new src/core/log_retention.rs. Prefer paths.rs since it's small and related.\n\nCall from main.rs:\n```rust\nlet log_dir = get_log_dir(config.logging.log_dir.as_deref());\nlet _ = cleanup_old_logs(&log_dir, config.logging.retention_days);\n// THEN init subscriber\n```\n\nNote: Config must be loaded before cleanup runs. Current main.rs parses Cli at line 60, but config loading happens inside command handlers. This means we need to either:\n A) Load config early in main() before subscriber init (preferred)\n B) Defer cleanup to after config load\n\nSince the subscriber must also know log_dir, approach A is natural: load config -> cleanup -> init subscriber -> dispatch command.\n\n## Acceptance Criteria\n- [ ] Files matching lore.*.log older than retention_days are deleted\n- [ ] Files matching lore.*.log within retention_days are preserved\n- [ ] Non-matching files (e.g., other.txt) are never deleted\n- [ ] retention_days=0 skips cleanup entirely (no files deleted)\n- [ ] Errors on individual files don't prevent cleanup of remaining files\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/paths.rs (add cleanup_old_logs function)\n- src/main.rs (call cleanup before subscriber init)\n\n## TDD Loop\nRED:\n - test_log_retention_cleanup: create tempdir with lore.2026-01-01.log through lore.2026-02-04.log, run with retention_days=7, assert old deleted, recent preserved\n - test_log_retention_ignores_non_log_files: create other.txt alongside old log files, assert other.txt untouched\n - test_log_retention_zero_days: retention_days=0, assert nothing deleted\nGREEN: Implement cleanup_old_logs\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- SystemTime::now() precision varies by OS; use file modified time, not name parsing (simpler and more reliable)\n- read_dir on non-existent directory: get_log_dir creates it first, so this shouldn't happen. But handle gracefully.\n- Permissions error on individual file: log a warning, continue with remaining files (don't propagate)\n- Race condition: another process creates a file during cleanup. Not a concern -- we only delete old files.","status":"closed","priority":1,"issue_type":"task","created_at":"2026-02-04T15:53:55.627901Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:15:04.452086Z","closed_at":"2026-02-04T17:15:04.452039Z","close_reason":"Implemented cleanup_old_logs() with date-pattern matching and retention_days config, runs at startup before subscriber init","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-32mc","depends_on_id":"bd-17n","type":"blocks","created_at":"2026-02-04T15:55:19.523048Z","created_by":"tayloreernisse"},{"issue_id":"bd-32mc","depends_on_id":"bd-1k4","type":"blocks","created_at":"2026-02-04T15:55:19.583155Z","created_by":"tayloreernisse"},{"issue_id":"bd-32mc","depends_on_id":"bd-2nx","type":"parent-child","created_at":"2026-02-04T15:53:55.628795Z","created_by":"tayloreernisse"}]} -{"id":"bd-32q","title":"Implement timeline seed phase: FTS5 keyword search to entity IDs","description":"## Background\n\nThe seed phase is steps 1-2 of the timeline pipeline (spec Section 3.2): SEED + HYDRATE. It converts a keyword query into entity IDs via FTS5 search and collects evidence note candidates.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 steps 1-2.\n\n## Codebase Context\n\n- FTS5 index exists: documents_fts table (migration 008)\n- documents table: id, source_type ('issue'|'merge_request'|'discussion'), source_id, project_id, created_at, content\n- discussions table: id, issue_id, merge_request_id\n- notes table: discussion_id, author_username, body, created_at, is_system, id (note_id)\n- Safe FTS query builder: src/search/fts.rs has to_fts_query(raw, FtsQueryMode::Safe) for sanitizing user input\n- projects table: path_with_namespace\n- issues/merge_requests: iid, project_id\n\n## Approach\n\nCreate `src/core/timeline_seed.rs`:\n\n```rust\nuse crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType};\nuse rusqlite::Connection;\n\npub struct SeedResult {\n pub seed_entities: Vec,\n pub evidence_notes: Vec, // NoteEvidence events\n}\n\npub fn seed_timeline(\n conn: &Connection,\n query: &str,\n project_id: Option,\n since_ms: Option,\n max_seeds: usize, // default 50\n) -> Result { ... }\n```\n\n### SQL for SEED + HYDRATE (entity discovery):\n```sql\nSELECT DISTINCT d.source_type, d.source_id, d.project_id,\n CASE d.source_type\n WHEN 'issue' THEN (SELECT iid FROM issues WHERE id = d.source_id)\n WHEN 'merge_request' THEN (SELECT iid FROM merge_requests WHERE id = d.source_id)\n WHEN 'discussion' THEN NULL -- discussions map to parent entity below\n END AS iid,\n CASE d.source_type\n WHEN 'issue' THEN (SELECT p.path_with_namespace FROM projects p JOIN issues i ON i.project_id = p.id WHERE i.id = d.source_id)\n WHEN 'merge_request' THEN (SELECT p.path_with_namespace FROM projects p JOIN merge_requests m ON m.project_id = p.id WHERE m.id = d.source_id)\n WHEN 'discussion' THEN NULL\n END AS project_path\nFROM documents_fts fts\nJOIN documents d ON d.id = fts.rowid\nWHERE documents_fts MATCH ?1\n AND (?2 IS NULL OR d.project_id = ?2)\nORDER BY rank\nLIMIT ?3\n```\n\nFor 'discussion' source_type: resolve to parent entity via discussions.issue_id or discussions.merge_request_id.\n\n### SQL for evidence notes (top 10 FTS5-matched notes):\n```sql\nSELECT n.id as note_id, n.body, n.created_at, n.author_username,\n disc.id as discussion_id,\n CASE WHEN disc.issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END as parent_type,\n COALESCE(disc.issue_id, disc.merge_request_id) AS parent_entity_id\nFROM documents_fts fts\nJOIN documents d ON d.id = fts.rowid\nJOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'\nJOIN notes n ON n.discussion_id = disc.id AND n.is_system = 0\nWHERE documents_fts MATCH ?1\nORDER BY rank\nLIMIT 10\n```\n\nEvidence notes become TimelineEvent with:\n- event_type: NoteEvidence { note_id, snippet (first 200 chars), discussion_id }\n- Use to_fts_query(query, FtsQueryMode::Safe) to sanitize user input before MATCH\n\nRegister in `src/core/mod.rs`: `pub mod timeline_seed;`\n\n## Acceptance Criteria\n\n- [ ] seed_timeline() returns entities from FTS5 search\n- [ ] Entities deduplicated (same entity from multiple docs appears once)\n- [ ] Discussion documents resolved to parent entity (issue or MR)\n- [ ] Evidence notes capped at 10\n- [ ] Evidence note snippets truncated to 200 chars (safe UTF-8 boundary)\n- [ ] Uses to_fts_query(query, FtsQueryMode::Safe) for input sanitization\n- [ ] --since filter works\n- [ ] -p filter works\n- [ ] Empty result for zero-match queries (not error)\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_seed.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_seed;`)\n\n## TDD Loop\n\nRED:\n- `test_seed_deduplicates_entities`\n- `test_seed_resolves_discussion_to_parent`\n- `test_seed_empty_query_returns_empty`\n- `test_seed_evidence_capped_at_10`\n- `test_seed_evidence_snippet_truncated`\n- `test_seed_respects_since_filter`\n\nTests need in-memory DB with migrations 001-014 + documents/FTS test data.\n\nGREEN: Implement FTS5 queries and deduplication.\n\nVERIFY: `cargo test --lib -- timeline_seed`\n\n## Edge Cases\n\n- FTS5 MATCH invalid syntax: to_fts_query(query, FtsQueryMode::Safe) sanitizes\n- Discussion orphans: LEFT JOIN handles deleted notes\n- UTF-8 truncation: use char_indices() to find safe 200-char boundary\n- Discussion source resolving to both issue_id and merge_request_id: prefer issue_id (shouldn't happen but be defensive)","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.615908Z","created_by":"tayloreernisse","updated_at":"2026-02-05T19:57:28.696448Z","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-32q","depends_on_id":"bd-20e","type":"blocks","created_at":"2026-02-02T21:33:37.368005Z","created_by":"tayloreernisse"},{"issue_id":"bd-32q","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.617483Z","created_by":"tayloreernisse"}]} +{"id":"bd-32q","title":"Implement timeline seed phase: FTS5 keyword search to entity IDs","description":"## Background\n\nThe seed phase is steps 1-2 of the timeline pipeline (spec Section 3.2): SEED + HYDRATE. It converts a keyword query into entity IDs via FTS5 search and collects evidence note candidates.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 steps 1-2.\n\n## Codebase Context\n\n- FTS5 index exists: documents_fts table (migration 008)\n- documents table: id, source_type ('issue'|'merge_request'|'discussion'), source_id, project_id, created_at, content\n- discussions table: id, issue_id, merge_request_id\n- notes table: discussion_id, author_username, body, created_at, is_system, id (note_id)\n- Safe FTS query builder: src/search/fts.rs has to_fts_query(raw, FtsQueryMode::Safe) for sanitizing user input\n- projects table: path_with_namespace\n- issues/merge_requests: iid, project_id\n\n## Approach\n\nCreate `src/core/timeline_seed.rs`:\n\n```rust\nuse crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType};\nuse rusqlite::Connection;\n\npub struct SeedResult {\n pub seed_entities: Vec,\n pub evidence_notes: Vec, // NoteEvidence events\n}\n\npub fn seed_timeline(\n conn: &Connection,\n query: &str,\n project_id: Option,\n since_ms: Option,\n max_seeds: usize, // default 50\n) -> Result { ... }\n```\n\n### SQL for SEED + HYDRATE (entity discovery):\n```sql\nSELECT DISTINCT d.source_type, d.source_id, d.project_id,\n CASE d.source_type\n WHEN 'issue' THEN (SELECT iid FROM issues WHERE id = d.source_id)\n WHEN 'merge_request' THEN (SELECT iid FROM merge_requests WHERE id = d.source_id)\n WHEN 'discussion' THEN NULL -- discussions map to parent entity below\n END AS iid,\n CASE d.source_type\n WHEN 'issue' THEN (SELECT p.path_with_namespace FROM projects p JOIN issues i ON i.project_id = p.id WHERE i.id = d.source_id)\n WHEN 'merge_request' THEN (SELECT p.path_with_namespace FROM projects p JOIN merge_requests m ON m.project_id = p.id WHERE m.id = d.source_id)\n WHEN 'discussion' THEN NULL\n END AS project_path\nFROM documents_fts fts\nJOIN documents d ON d.id = fts.rowid\nWHERE documents_fts MATCH ?1\n AND (?2 IS NULL OR d.project_id = ?2)\nORDER BY rank\nLIMIT ?3\n```\n\nFor 'discussion' source_type: resolve to parent entity via discussions.issue_id or discussions.merge_request_id.\n\n### SQL for evidence notes (top 10 FTS5-matched notes):\n```sql\nSELECT n.id as note_id, n.body, n.created_at, n.author_username,\n disc.id as discussion_id,\n CASE WHEN disc.issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END as parent_type,\n COALESCE(disc.issue_id, disc.merge_request_id) AS parent_entity_id\nFROM documents_fts fts\nJOIN documents d ON d.id = fts.rowid\nJOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'\nJOIN notes n ON n.discussion_id = disc.id AND n.is_system = 0\nWHERE documents_fts MATCH ?1\nORDER BY rank\nLIMIT 10\n```\n\nEvidence notes become TimelineEvent with:\n- event_type: NoteEvidence { note_id, snippet (first 200 chars), discussion_id }\n- Use to_fts_query(query, FtsQueryMode::Safe) to sanitize user input before MATCH\n\nRegister in `src/core/mod.rs`: `pub mod timeline_seed;`\n\n## Acceptance Criteria\n\n- [ ] seed_timeline() returns entities from FTS5 search\n- [ ] Entities deduplicated (same entity from multiple docs appears once)\n- [ ] Discussion documents resolved to parent entity (issue or MR)\n- [ ] Evidence notes capped at 10\n- [ ] Evidence note snippets truncated to 200 chars (safe UTF-8 boundary)\n- [ ] Uses to_fts_query(query, FtsQueryMode::Safe) for input sanitization\n- [ ] --since filter works\n- [ ] -p filter works\n- [ ] Empty result for zero-match queries (not error)\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_seed.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_seed;`)\n\n## TDD Loop\n\nRED:\n- `test_seed_deduplicates_entities`\n- `test_seed_resolves_discussion_to_parent`\n- `test_seed_empty_query_returns_empty`\n- `test_seed_evidence_capped_at_10`\n- `test_seed_evidence_snippet_truncated`\n- `test_seed_respects_since_filter`\n\nTests need in-memory DB with migrations 001-014 + documents/FTS test data.\n\nGREEN: Implement FTS5 queries and deduplication.\n\nVERIFY: `cargo test --lib -- timeline_seed`\n\n## Edge Cases\n\n- FTS5 MATCH invalid syntax: to_fts_query(query, FtsQueryMode::Safe) sanitizes\n- Discussion orphans: LEFT JOIN handles deleted notes\n- UTF-8 truncation: use char_indices() to find safe 200-char boundary\n- Discussion source resolving to both issue_id and merge_request_id: prefer issue_id (shouldn't happen but be defensive)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.615908Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:47:07.966488Z","closed_at":"2026-02-05T21:47:07.966437Z","close_reason":"Completed: Created src/core/timeline_seed.rs with seed_timeline() function. FTS5 search to entity IDs with discussion-to-parent resolution, entity deduplication, evidence note extraction (capped, snippet-truncated). 12 tests pass. All quality gates pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-32q","depends_on_id":"bd-20e","type":"blocks","created_at":"2026-02-02T21:33:37.368005Z","created_by":"tayloreernisse"},{"issue_id":"bd-32q","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.617483Z","created_by":"tayloreernisse"}]} {"id":"bd-335","title":"Implement Ollama API client","description":"## Background\nThe Ollama API client provides the HTTP interface to the local Ollama embedding server. It handles health checks (is Ollama running? does the model exist?), batch embedding requests (up to 32 texts per call), and error translation to LoreError variants. This is the lowest-level embedding component — the pipeline (bd-am7) builds on top of it.\n\n## Approach\nCreate \\`src/embedding/ollama.rs\\` per PRD Section 4.2. **Uses async reqwest (not blocking).**\n\n```rust\nuse reqwest::Client; // NOTE: async Client, not reqwest::blocking\nuse serde::{Deserialize, Serialize};\nuse crate::core::error::{LoreError, Result};\n\npub struct OllamaConfig {\n pub base_url: String, // default \\\"http://localhost:11434\\\"\n pub model: String, // default \\\"nomic-embed-text\\\"\n pub timeout_secs: u64, // default 60\n}\n\nimpl Default for OllamaConfig { /* PRD defaults */ }\n\npub struct OllamaClient {\n client: Client, // async reqwest::Client\n config: OllamaConfig,\n}\n\n#[derive(Serialize)]\nstruct EmbedRequest { model: String, input: Vec }\n\n#[derive(Deserialize)]\nstruct EmbedResponse { model: String, embeddings: Vec> }\n\n#[derive(Deserialize)]\nstruct TagsResponse { models: Vec }\n\n#[derive(Deserialize)]\nstruct ModelInfo { name: String }\n\nimpl OllamaClient {\n pub fn new(config: OllamaConfig) -> Self;\n\n /// Async health check: GET /api/tags\n /// Model matched via starts_with (\\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\")\n pub async fn health_check(&self) -> Result<()>;\n\n /// Async batch embedding: POST /api/embed\n /// Input: Vec of texts, Response: Vec> of 768-dim embeddings\n pub async fn embed_batch(&self, texts: Vec) -> Result>>;\n}\n\n/// Quick health check without full client (async).\npub async fn check_ollama_health(base_url: &str) -> bool;\n```\n\n**Error mapping (per PRD):**\n- Connection refused/timeout -> LoreError::OllamaUnavailable { base_url, source: Some(e) }\n- Model not in /api/tags -> LoreError::OllamaModelNotFound { model }\n- Non-200 from /api/embed -> LoreError::EmbeddingFailed { document_id: 0, reason: format!(\\\"HTTP {}: {}\\\", status, body) }\n\n**Key PRD detail:** Model matching uses \\`starts_with\\` (not exact match) so \\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\".\n\n## Acceptance Criteria\n- [ ] Uses async reqwest::Client (not blocking)\n- [ ] health_check() is async, detects server availability and model presence\n- [ ] Model matched via starts_with (handles \\\":latest\\\" suffix)\n- [ ] embed_batch() is async, sends POST /api/embed\n- [ ] Batch size up to 32 texts\n- [ ] Returns Vec> with 768 dimensions each\n- [ ] OllamaUnavailable error includes base_url and source error\n- [ ] OllamaModelNotFound error includes model name\n- [ ] Non-200 response mapped to EmbeddingFailed with status + body\n- [ ] Timeout: 60 seconds default (configurable via OllamaConfig)\n- [ ] \\`cargo build\\` succeeds\n\n## Files\n- \\`src/embedding/ollama.rs\\` — new file\n- \\`src/embedding/mod.rs\\` — add \\`pub mod ollama;\\` and re-exports\n\n## TDD Loop\nRED: Tests (unit tests with mock, integration needs Ollama):\n- \\`test_config_defaults\\` — verify default base_url, model, timeout\n- \\`test_health_check_model_starts_with\\` — \\\"nomic-embed-text\\\" matches \\\"nomic-embed-text:latest\\\"\n- \\`test_embed_batch_parse\\` — mock response parsed correctly\n- \\`test_connection_error_maps_to_ollama_unavailable\\`\nGREEN: Implement OllamaClient\nVERIFY: \\`cargo test ollama\\`\n\n## Edge Cases\n- Ollama returns model name with version tag (\\\"nomic-embed-text:latest\\\"): starts_with handles this\n- Empty texts array: send empty batch, Ollama returns empty embeddings\n- Ollama returns wrong number of embeddings (2 texts, 1 embedding): caller (pipeline) validates\n- Non-JSON response: reqwest deserialization error -> wrap appropriately","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:26:34.025099Z","created_by":"tayloreernisse","updated_at":"2026-01-30T16:58:17.546852Z","closed_at":"2026-01-30T16:58:17.546794Z","close_reason":"Completed: OllamaClient with async health_check (starts_with model matching), embed_batch, error mapping to LoreError variants, check_ollama_health helper, 4 tests pass","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-335","depends_on_id":"bd-ljf","type":"blocks","created_at":"2026-01-30T15:29:24.627951Z","created_by":"tayloreernisse"}]} {"id":"bd-343o","title":"Fetch and store GitLab linked issues (Related to)","description":"## Background\n\nGitLab's \"Linked items\" provides bidirectional issue linking distinct from \"closes\" and \"mentioned\" references. This data is only available via the issue links API (GET /projects/:id/issues/:iid/links).\n\n**IMPORTANT:** This bead uses migration **017** (after bd-2y79's migration 016). Coordinate numbering.\n\n## Codebase Context\n\n- entity_references table (migration 011) with:\n - reference_type CHECK: 'closes' | 'mentioned' | 'related'\n - source_method CHECK: 'api' | 'note_parse' | 'description_parse'\n- pending_dependent_fetches: job_type CHECK 'resource_events' | 'mr_closes_issues' | 'mr_diffs'\n- **CRITICAL:** Adding 'issue_links' to job_type CHECK requires recreating pending_dependent_fetches table (SQLite can't ALTER CHECK constraints). Migration 017 must copy data, drop, recreate with expanded CHECK, and reinsert.\n- Orchestrator pattern: enqueue_job() + drain loop with claim/complete/fail (src/ingestion/orchestrator.rs)\n- dependent_queue.rs: enqueue_job(), claim_jobs(), complete_job(), fail_job()\n- GitLab issue links API returns link_type: \"relates_to\", \"blocks\", \"is_blocked_by\"\n- entity_references reference_type only has 'closes', 'mentioned', 'related' — \"blocks\"/\"is_blocked_by\" not modeled. Store all as 'related' with link_type in a JSON payload_json field or as a separate column in a future migration.\n\n## Approach\n\n### Phase 1: API Client (src/gitlab/client.rs)\n```rust\npub async fn fetch_issue_links(\n &self,\n project_id: i64,\n issue_iid: i64,\n) -> Result> {\n // GET /projects/:id/issues/:iid/links\n // Use fetch_all_pages() + coalesce_not_found()\n}\n```\n\n### Phase 2: Types (src/gitlab/types.rs)\n```rust\n#[derive(Debug, Deserialize)]\npub struct GitLabIssueLink {\n pub id: i64,\n pub iid: i64,\n pub title: String,\n pub state: String,\n pub web_url: String,\n pub link_type: String, // \"relates_to\", \"blocks\", \"is_blocked_by\"\n pub link_created_at: Option,\n}\n```\n\n### Phase 3: Migration 017 (migrations/017_issue_links_job_type.sql)\nRecreate pending_dependent_fetches with expanded CHECK:\n```sql\nCREATE TABLE pending_dependent_fetches_new (\n id INTEGER PRIMARY KEY,\n project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,\n entity_type TEXT NOT NULL CHECK (entity_type IN ('issue', 'merge_request')),\n entity_iid INTEGER NOT NULL,\n entity_local_id INTEGER NOT NULL,\n job_type TEXT NOT NULL CHECK (job_type IN (\n 'resource_events', 'mr_closes_issues', 'mr_diffs', 'issue_links'\n )),\n payload_json TEXT,\n enqueued_at INTEGER NOT NULL,\n attempts INTEGER NOT NULL DEFAULT 0,\n last_error TEXT,\n next_retry_at INTEGER,\n locked_at INTEGER,\n UNIQUE(project_id, entity_type, entity_iid, job_type)\n);\nINSERT INTO pending_dependent_fetches_new SELECT * FROM pending_dependent_fetches;\nDROP TABLE pending_dependent_fetches;\nALTER TABLE pending_dependent_fetches_new RENAME TO pending_dependent_fetches;\n-- Recreate indexes from migration 011\n```\n\n### Phase 4: Ingestion (src/ingestion/issue_links.rs NEW)\n```rust\npub async fn fetch_and_store_issue_links(\n conn: &Connection,\n client: &GitLabClient,\n project_id: i64,\n issue_local_id: i64,\n issue_iid: i64,\n) -> Result {\n // 1. Fetch links from API\n // 2. Resolve target issue to local DB id (or store as unresolved)\n // 3. Insert into entity_references: reference_type='related', source_method='api'\n // 4. Create bidirectional refs: A->B and B->A\n // 5. Skip self-links\n}\n```\n\n### Phase 5: Queue Integration\n- Enqueue 'issue_links' job after issue ingestion in orchestrator\n- Add drain_issue_links() following drain_mr_closes_issues() pattern\n\n### Phase 6: Display\nIn `lore show issue 123`, add \"Related Issues\" section after closing MRs.\n\n## Acceptance Criteria\n\n- [ ] API client fetches issue links with pagination\n- [ ] Stored as entity_reference: reference_type='related', source_method='api'\n- [ ] Bidirectional: A links B creates both A->B and B->A references\n- [ ] link_type captured (relates_to, blocks, is_blocked_by) — stored as 'related' for now\n- [ ] Cross-project links stored as unresolved (target_entity_id NULL)\n- [ ] Self-links skipped\n- [ ] Migration 017 recreates pending_dependent_fetches with 'issue_links' in CHECK\n- [ ] `lore show issue 123` shows related issues section\n- [ ] `lore --robot show issue 123` includes related_issues in JSON\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- src/gitlab/client.rs (add fetch_issue_links)\n- src/gitlab/types.rs (add GitLabIssueLink)\n- src/ingestion/issue_links.rs (NEW)\n- src/ingestion/mod.rs (add pub mod issue_links)\n- src/ingestion/orchestrator.rs (enqueue + drain)\n- migrations/017_issue_links_job_type.sql (NEW — table recreation)\n- src/core/db.rs (add migration to MIGRATIONS array)\n- src/cli/commands/show.rs (display related issues)\n\n## TDD Loop\n\nRED:\n- test_issue_link_deserialization\n- test_store_issue_links_creates_bidirectional_references\n- test_self_link_skipped\n- test_cross_project_link_unresolved\n\nGREEN: Implement API client, ingestion, migration, display.\n\nVERIFY: cargo test --lib -- issue_links\n\n## Edge Cases\n\n- Cross-project links: target not in local DB -> unresolved reference\n- Self-links: skip\n- UNIQUE constraint prevents duplicate entity_references\n- \"blocks\"/\"is_blocked_by\" semantics not modeled in entity_references yet — store as 'related'\n- Table recreation migration: safe because pending_dependent_fetches is transient queue data\n- Migration numbering: 017 follows bd-2y79's migration 016","status":"open","priority":2,"issue_type":"feature","created_at":"2026-02-05T15:14:25.202900Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:16:28.629763Z","compaction_level":0,"original_size":0,"labels":["ISSUE"]} {"id":"bd-34ek","title":"OBSERV: Implement MetricsLayer custom tracing subscriber layer","description":"## Background\nMetricsLayer is a custom tracing subscriber layer that records span timing and structured fields, then materializes them into Vec. This avoids threading a mutable collector through every function signature -- spans are the single source of truth.\n\n## Approach\nAdd to src/core/metrics.rs (same file as StageTiming):\n\n```rust\nuse std::collections::HashMap;\nuse std::sync::{Arc, Mutex};\nuse std::time::Instant;\nuse tracing::span::{Attributes, Id, Record};\nuse tracing::Subscriber;\nuse tracing_subscriber::layer::{Context, Layer};\nuse tracing_subscriber::registry::LookupSpan;\n\n#[derive(Debug)]\nstruct SpanData {\n name: String,\n parent_id: Option,\n start: Instant,\n fields: HashMap,\n}\n\n#[derive(Debug, Clone)]\npub struct MetricsLayer {\n spans: Arc>>,\n completed: Arc>>,\n}\n\nimpl MetricsLayer {\n pub fn new() -> Self {\n Self {\n spans: Arc::new(Mutex::new(HashMap::new())),\n completed: Arc::new(Mutex::new(Vec::new())),\n }\n }\n\n /// Extract timing tree for a completed run.\n /// Call this after the root span closes.\n pub fn extract_timings(&self) -> Vec {\n let completed = self.completed.lock().unwrap();\n // Build tree: find root entries (no parent), attach children\n // ... tree construction logic\n }\n}\n\nimpl Layer for MetricsLayer\nwhere\n S: Subscriber + for<'a> LookupSpan<'a>,\n{\n fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, ctx: Context<'_, S>) {\n let parent_id = ctx.span(id).and_then(|s| s.parent().map(|p| p.id()));\n let mut fields = HashMap::new();\n // Visit attrs to capture initial field values\n let mut visitor = FieldVisitor(&mut fields);\n attrs.record(&mut visitor);\n\n self.spans.lock().unwrap().insert(id.into_u64(), SpanData {\n name: attrs.metadata().name().to_string(),\n parent_id,\n start: Instant::now(),\n fields,\n });\n }\n\n fn on_record(&self, id: &Id, values: &Record<'_>, _ctx: Context<'_, S>) {\n // Capture recorded fields (items_processed, items_skipped, errors)\n if let Some(data) = self.spans.lock().unwrap().get_mut(&id.into_u64()) {\n let mut visitor = FieldVisitor(&mut data.fields);\n values.record(&mut visitor);\n }\n }\n\n fn on_close(&self, id: Id, _ctx: Context<'_, S>) {\n if let Some(data) = self.spans.lock().unwrap().remove(&id.into_u64()) {\n let elapsed = data.start.elapsed();\n let timing = StageTiming {\n name: data.name,\n project: data.fields.get(\"project\").and_then(|v| v.as_str()).map(String::from),\n elapsed_ms: elapsed.as_millis() as u64,\n items_processed: data.fields.get(\"items_processed\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n items_skipped: data.fields.get(\"items_skipped\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n errors: data.fields.get(\"errors\").and_then(|v| v.as_u64()).unwrap_or(0) as usize,\n sub_stages: vec![], // Will be populated during extract_timings tree construction\n };\n self.completed.lock().unwrap().push((id.into_u64(), timing));\n }\n }\n}\n```\n\nNeed a FieldVisitor struct implementing tracing::field::Visit to capture field values.\n\nRegister in subscriber stack (src/main.rs), alongside stderr and file layers:\n```rust\nlet metrics_layer = MetricsLayer::new();\nlet metrics_handle = metrics_layer.clone(); // Clone Arc for later extraction\n\nregistry()\n .with(stderr_layer.with_filter(stderr_filter))\n .with(file_layer.with_filter(file_filter))\n .with(metrics_layer) // No filter -- captures all spans\n .init();\n```\n\nPass metrics_handle to command handlers so they can call extract_timings() after the pipeline completes.\n\n## Acceptance Criteria\n- [ ] MetricsLayer captures span enter/close timing\n- [ ] on_record captures items_processed, items_skipped, errors fields\n- [ ] extract_timings() returns correctly nested Vec tree\n- [ ] Parallel spans (multiple projects) both appear as sub_stages of parent\n- [ ] Thread-safe: Arc> allows concurrent span operations\n- [ ] cargo clippy --all-targets -- -D warnings passes\n\n## Files\n- src/core/metrics.rs (add MetricsLayer, FieldVisitor, tree construction)\n- src/main.rs (register MetricsLayer in subscriber stack)\n\n## TDD Loop\nRED:\n - test_metrics_layer_single_span: enter/exit one span, extract, assert one StageTiming\n - test_metrics_layer_nested_spans: parent + child, assert child in parent.sub_stages\n - test_metrics_layer_parallel_spans: two sibling spans, assert both in parent.sub_stages\n - test_metrics_layer_field_recording: record items_processed=42, assert captured\nGREEN: Implement MetricsLayer with on_new_span, on_record, on_close, extract_timings\nVERIFY: cargo test && cargo clippy --all-targets -- -D warnings\n\n## Edge Cases\n- Span ID reuse: tracing may reuse span IDs after close. Using remove on close prevents stale data.\n- Lock contention: Mutex per operation. For high-span-count scenarios, consider parking_lot::Mutex. But lore's span count is low (<100 per run), so std::sync::Mutex is fine.\n- extract_timings tree construction: iterate completed Vec, build parent->children map, then recursively construct StageTiming tree. Root entries have parent_id matching the root span or None.\n- MetricsLayer has no filter: it sees ALL spans. To avoid noise from dependency spans, check if span name starts with known stage names, or rely on the \"stage\" field being present.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-04T15:54:31.960669Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:25:25.523811Z","closed_at":"2026-02-04T17:25:25.523730Z","close_reason":"Implemented MetricsLayer custom tracing subscriber layer with span timing capture, rate-limit/retry event detection, tree extraction, and 12 unit tests","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-34ek","depends_on_id":"bd-1o4h","type":"blocks","created_at":"2026-02-04T15:55:19.851554Z","created_by":"tayloreernisse"},{"issue_id":"bd-34ek","depends_on_id":"bd-24j1","type":"blocks","created_at":"2026-02-04T15:55:19.905554Z","created_by":"tayloreernisse"},{"issue_id":"bd-34ek","depends_on_id":"bd-3er","type":"parent-child","created_at":"2026-02-04T15:54:31.961646Z","created_by":"tayloreernisse"}]} @@ -115,7 +115,7 @@ {"id":"bd-38q","title":"Implement dirty source tracking module","description":"## Background\nDirty source tracking drives incremental document regeneration. When entities are upserted during ingestion, they're marked dirty. The regenerator drains this queue. The key constraint: mark_dirty_tx() takes &Transaction to enforce atomic marking inside the entity upsert transaction. Uses ON CONFLICT DO UPDATE (not INSERT OR IGNORE) to reset backoff on re-queue.\n\n## Approach\nCreate \\`src/ingestion/dirty_tracker.rs\\` per PRD Section 6.1.\n\n```rust\nconst DIRTY_SOURCES_BATCH_SIZE: usize = 500;\n\n/// Mark dirty INSIDE existing transaction. Takes &Transaction, NOT &Connection.\n/// ON CONFLICT resets ALL backoff/error state (not INSERT OR IGNORE).\n/// This ensures fresh updates are immediately eligible, not stuck behind stale backoff.\npub fn mark_dirty_tx(\n tx: &rusqlite::Transaction<'_>,\n source_type: SourceType,\n source_id: i64,\n) -> Result<()>;\n\n/// Convenience wrapper for non-transactional contexts.\npub fn mark_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()>;\n\n/// Get dirty sources ready for processing.\n/// WHERE next_attempt_at IS NULL OR next_attempt_at <= now\n/// ORDER BY attempt_count ASC, queued_at ASC (failed items deprioritized)\n/// LIMIT 500\npub fn get_dirty_sources(conn: &Connection) -> Result>;\n\n/// Clear dirty entry after successful processing.\npub fn clear_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()>;\n```\n\n**PRD-specific details:**\n- get_dirty_sources ORDER BY: \\`attempt_count ASC, queued_at ASC\\` (failed items processed AFTER fresh items)\n- mark_dirty_tx ON CONFLICT resets: queued_at, attempt_count=0, last_attempt_at=NULL, last_error=NULL, next_attempt_at=NULL\n- SourceType parsed from string in query results via match on \\\"issue\\\"/\\\"merge_request\\\"/\\\"discussion\\\"\n- Invalid source_type in DB -> rusqlite::Error::FromSqlConversionFailure\n\n**Error recording is in regenerator.rs (bd-1u1)**, not dirty_tracker. The dirty_tracker only marks, gets, and clears.\n\n## Acceptance Criteria\n- [ ] mark_dirty_tx takes &Transaction<'_>, NOT &Connection\n- [ ] ON CONFLICT DO UPDATE resets: attempt_count=0, next_attempt_at=NULL, last_error=NULL, last_attempt_at=NULL\n- [ ] Uses ON CONFLICT DO UPDATE, NOT INSERT OR IGNORE (PRD explains why)\n- [ ] get_dirty_sources WHERE next_attempt_at IS NULL OR <= now\n- [ ] get_dirty_sources ORDER BY attempt_count ASC, queued_at ASC\n- [ ] get_dirty_sources LIMIT 500\n- [ ] get_dirty_sources returns Vec<(SourceType, i64)>\n- [ ] clear_dirty DELETEs entry\n- [ ] Queue drains completely when called in loop\n- [ ] \\`cargo test dirty_tracker\\` passes\n\n## Files\n- \\`src/ingestion/dirty_tracker.rs\\` — new file\n- \\`src/ingestion/mod.rs\\` — add \\`pub mod dirty_tracker;\\`\n\n## TDD Loop\nRED: Tests:\n- \\`test_mark_dirty_tx_inserts\\` — entry appears in dirty_sources\n- \\`test_requeue_resets_backoff\\` — mark, simulate error state, re-mark -> attempt_count=0, next_attempt_at=NULL\n- \\`test_get_respects_backoff\\` — entry with future next_attempt_at not returned\n- \\`test_get_orders_by_attempt_count\\` — fresh items before failed items\n- \\`test_batch_size_500\\` — insert 600, get returns 500\n- \\`test_clear_removes\\` — entry gone after clear\n- \\`test_drain_loop\\` — insert 1200, loop 3 times = empty\nGREEN: Implement all functions\nVERIFY: \\`cargo test dirty_tracker\\`\n\n## Edge Cases\n- Empty queue: get returns empty Vec\n- Invalid source_type string in DB: FromSqlConversionFailure error\n- Concurrent mark + get: ON CONFLICT handles race condition","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:27:09.434845Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:31:35.455315Z","closed_at":"2026-01-30T17:31:35.455127Z","close_reason":"Implemented dirty_tracker with mark_dirty_tx, get_dirty_sources, clear_dirty, record_dirty_error + 8 tests","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-38q","depends_on_id":"bd-36p","type":"blocks","created_at":"2026-01-30T15:29:34.914038Z","created_by":"tayloreernisse"},{"issue_id":"bd-38q","depends_on_id":"bd-hrs","type":"blocks","created_at":"2026-01-30T15:29:34.961390Z","created_by":"tayloreernisse"},{"issue_id":"bd-38q","depends_on_id":"bd-mem","type":"blocks","created_at":"2026-01-30T15:29:34.995197Z","created_by":"tayloreernisse"}]} {"id":"bd-39w","title":"[CP1] Test fixtures for mocked GitLab responses","description":"## Background\n\nTest fixtures provide mocked GitLab API responses for unit and integration tests. They enable testing without a live GitLab instance and ensure consistent test data across runs.\n\n## Approach\n\n### Fixture Files\n\nCreate JSON fixtures that match GitLab API response shapes:\n\n```\ntests/fixtures/\n├── gitlab_issue.json # Single issue\n├── gitlab_issues_page.json # Array of issues (pagination test)\n├── gitlab_discussion.json # Single discussion with notes\n└── gitlab_discussions_page.json # Array of discussions\n```\n\n### gitlab_issue.json\n\n```json\n{\n \"id\": 12345,\n \"iid\": 42,\n \"project_id\": 100,\n \"title\": \"Test issue title\",\n \"description\": \"Test issue description\",\n \"state\": \"opened\",\n \"created_at\": \"2024-01-15T10:00:00.000Z\",\n \"updated_at\": \"2024-01-20T15:30:00.000Z\",\n \"closed_at\": null,\n \"author\": {\n \"id\": 1,\n \"username\": \"testuser\",\n \"name\": \"Test User\"\n },\n \"labels\": [\"bug\", \"priority::high\"],\n \"web_url\": \"https://gitlab.example.com/group/project/-/issues/42\"\n}\n```\n\n### gitlab_discussion.json\n\n```json\n{\n \"id\": \"6a9c1750b37d513a43987b574953fceb50b03ce7\",\n \"individual_note\": false,\n \"notes\": [\n {\n \"id\": 1001,\n \"type\": \"DiscussionNote\",\n \"body\": \"First comment in thread\",\n \"author\": { \"id\": 1, \"username\": \"testuser\", \"name\": \"Test User\" },\n \"created_at\": \"2024-01-16T09:00:00.000Z\",\n \"updated_at\": \"2024-01-16T09:00:00.000Z\",\n \"system\": false,\n \"resolvable\": true,\n \"resolved\": false,\n \"resolved_by\": null,\n \"resolved_at\": null,\n \"position\": null\n },\n {\n \"id\": 1002,\n \"type\": \"DiscussionNote\",\n \"body\": \"Reply to first comment\",\n \"author\": { \"id\": 2, \"username\": \"reviewer\", \"name\": \"Reviewer\" },\n \"created_at\": \"2024-01-16T10:00:00.000Z\",\n \"updated_at\": \"2024-01-16T10:00:00.000Z\",\n \"system\": false,\n \"resolvable\": true,\n \"resolved\": false,\n \"resolved_by\": null,\n \"resolved_at\": null,\n \"position\": null\n }\n ]\n}\n```\n\n### Helper Module\n\n```rust\n// tests/fixtures/mod.rs\n\npub fn load_fixture(name: &str) -> T {\n let path = PathBuf::from(env!(\"CARGO_MANIFEST_DIR\"))\n .join(\"tests/fixtures\")\n .join(name);\n let content = std::fs::read_to_string(&path)\n .expect(&format!(\"Failed to read fixture: {}\", name));\n serde_json::from_str(&content)\n .expect(&format!(\"Failed to parse fixture: {}\", name))\n}\n\npub fn gitlab_issue() -> GitLabIssue {\n load_fixture(\"gitlab_issue.json\")\n}\n\npub fn gitlab_issues_page() -> Vec {\n load_fixture(\"gitlab_issues_page.json\")\n}\n\npub fn gitlab_discussion() -> GitLabDiscussion {\n load_fixture(\"gitlab_discussion.json\")\n}\n```\n\n## Acceptance Criteria\n\n- [ ] gitlab_issue.json deserializes to GitLabIssue correctly\n- [ ] gitlab_issues_page.json contains 3+ issues for pagination tests\n- [ ] gitlab_discussion.json contains multi-note thread\n- [ ] gitlab_discussions_page.json contains mix of individual_note true/false\n- [ ] At least one fixture includes system: true note\n- [ ] Helper functions load fixtures without panic\n\n## Files\n\n- tests/fixtures/gitlab_issue.json (create)\n- tests/fixtures/gitlab_issues_page.json (create)\n- tests/fixtures/gitlab_discussion.json (create)\n- tests/fixtures/gitlab_discussions_page.json (create)\n- tests/fixtures/mod.rs (create)\n\n## TDD Loop\n\nRED:\n```rust\n#[test] fn fixture_gitlab_issue_deserializes()\n#[test] fn fixture_gitlab_discussion_deserializes()\n#[test] fn fixture_has_system_note()\n```\n\nGREEN: Create JSON fixtures and helper module\n\nVERIFY: `cargo test fixture`\n\n## Edge Cases\n\n- Include issue with empty labels array\n- Include issue with null description\n- Include system note (system: true)\n- Include individual_note: true discussion (standalone comment)\n- Timestamps must be valid ISO 8601","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-25T17:02:38.433752Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:48:08.415195Z","closed_at":"2026-01-25T22:48:08.415132Z","close_reason":"Created 4 JSON fixture files (issue, issues_page, discussion, discussions_page) with helper tests - 6 tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-39w","depends_on_id":"bd-1np","type":"blocks","created_at":"2026-01-25T17:04:05.770848Z","created_by":"tayloreernisse"}]} {"id":"bd-3ae","title":"Epic: CP2 Gate A - MRs Only","description":"## Background\nGate A validates core MR ingestion works before adding complexity. Proves the cursor-based sync, pagination, and basic CLI work. This is the foundation - if Gate A fails, nothing else matters.\n\n## Acceptance Criteria (Pass/Fail)\n- [ ] `gi ingest --type=merge_requests` completes without error\n- [ ] `SELECT COUNT(*) FROM merge_requests` > 0\n- [ ] `gi list mrs --limit=5` shows 5 MRs with iid, title, state, author\n- [ ] `gi count mrs` shows total count matching DB query\n- [ ] MR with `state=locked` can be stored (if exists in test data)\n- [ ] Draft MR shows `draft=1` in DB and `[DRAFT]` in list output\n- [ ] `work_in_progress=true` MR shows `draft=1` (fallback works)\n- [ ] `head_sha` populated for MRs with commits\n- [ ] `references_short` and `references_full` populated\n- [ ] Re-run ingest shows \"0 new MRs\" or minimal refetch (cursor working)\n- [ ] Cursor saved at page boundary, not item boundary\n\n## Validation Script\n```bash\n#!/bin/bash\nset -e\n\nDB_PATH=\"${XDG_DATA_HOME:-$HOME/.local/share}/gitlab-inbox/db.sqlite3\"\n\necho \"=== Gate A: MRs Only ===\"\n\n# 1. Clear any existing MR data for clean test\necho \"Step 1: Reset MR cursor for clean test...\"\nsqlite3 \"$DB_PATH\" \"DELETE FROM sync_cursors WHERE resource_type = 'merge_requests';\"\n\n# 2. Run MR ingestion\necho \"Step 2: Ingest MRs...\"\ngi ingest --type=merge_requests\n\n# 3. Verify MRs exist\necho \"Step 3: Verify MR count...\"\nMR_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests;\")\necho \" MR count: $MR_COUNT\"\n[ \"$MR_COUNT\" -gt 0 ] || { echo \"FAIL: No MRs ingested\"; exit 1; }\n\n# 4. Verify list command\necho \"Step 4: Test list command...\"\ngi list mrs --limit=5\n\n# 5. Verify count command\necho \"Step 5: Test count command...\"\ngi count mrs\n\n# 6. Verify draft handling\necho \"Step 6: Check draft MRs...\"\nDRAFT_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE draft = 1;\")\necho \" Draft MR count: $DRAFT_COUNT\"\n\n# 7. Verify head_sha population\necho \"Step 7: Check head_sha...\"\nSHA_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE head_sha IS NOT NULL;\")\necho \" MRs with head_sha: $SHA_COUNT\"\n\n# 8. Verify references\necho \"Step 8: Check references...\"\nREF_COUNT=$(sqlite3 \"$DB_PATH\" \"SELECT COUNT(*) FROM merge_requests WHERE references_short IS NOT NULL;\")\necho \" MRs with references: $REF_COUNT\"\n\n# 9. Verify cursor saved\necho \"Step 9: Check cursor...\"\nCURSOR=$(sqlite3 \"$DB_PATH\" \"SELECT updated_at, gitlab_id FROM sync_cursors WHERE resource_type = 'merge_requests';\")\necho \" Cursor: $CURSOR\"\n[ -n \"$CURSOR\" ] || { echo \"FAIL: Cursor not saved\"; exit 1; }\n\n# 10. Re-run and verify minimal refetch\necho \"Step 10: Re-run ingest (should be minimal)...\"\ngi ingest --type=merge_requests\n# Output should show minimal or zero new MRs\n\necho \"\"\necho \"=== Gate A: PASSED ===\"\n```\n\n## Test Commands (Quick Verification)\n```bash\n# Run these in order:\ngi ingest --type=merge_requests\ngi list mrs --limit=10\ngi count mrs\n\n# Verify in DB:\nsqlite3 ~/.local/share/gitlab-inbox/db.sqlite3 \"\n SELECT \n COUNT(*) as total,\n SUM(CASE WHEN draft = 1 THEN 1 ELSE 0 END) as drafts,\n SUM(CASE WHEN head_sha IS NOT NULL THEN 1 ELSE 0 END) as with_sha,\n SUM(CASE WHEN references_short IS NOT NULL THEN 1 ELSE 0 END) as with_refs\n FROM merge_requests;\n\"\n\n# Re-run (should be no-op):\ngi ingest --type=merge_requests\n```\n\n## Dependencies\nThis gate requires these beads to be complete:\n- bd-3ir (Database migration)\n- bd-5ta (GitLab MR types)\n- bd-34o (MR transformer)\n- bd-iba (GitLab client pagination)\n- bd-ser (MR ingestion module)\n\n## Edge Cases\n- `locked` state is transitional (merge in progress); may not exist in test data\n- Some older GitLab instances may not return `head_sha` for all MRs\n- `work_in_progress` is deprecated but should still work as fallback\n- Very large projects (10k+ MRs) may take significant time on first sync","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-26T22:06:00.966522Z","created_by":"tayloreernisse","updated_at":"2026-01-27T00:48:21.057298Z","closed_at":"2026-01-27T00:48:21.057225Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3ae","depends_on_id":"bd-iba","type":"blocks","created_at":"2026-01-26T22:08:55.576626Z","created_by":"tayloreernisse"},{"issue_id":"bd-3ae","depends_on_id":"bd-ser","type":"blocks","created_at":"2026-01-26T22:08:55.446814Z","created_by":"tayloreernisse"}]} -{"id":"bd-3as","title":"Implement timeline event collection and chronological interleaving","description":"## Background\n\nThe event collection phase is steps 4-5 of the timeline pipeline (spec Section 3.2). It takes seed + expanded entity sets and collects all their events from resource event tables, then interleaves chronologically.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 steps 4-5, Section 3.3 (Event Model).\n\n## Codebase Context\n\n- resource_state_events: columns include state, actor_username (not actor_gitlab_id for display), created_at, issue_id, merge_request_id, source_merge_request_iid, source_commit\n- resource_label_events: columns include action ('add'|'remove'), label_name (NULLABLE since migration 012), actor_username, created_at\n- resource_milestone_events: columns include action ('add'|'remove'), milestone_title (NULLABLE since migration 012), actor_username, created_at\n- issues table: created_at, author_username, title, web_url\n- merge_requests table: created_at, author_username, title, web_url, merged_at, updated_at\n- All timestamps are ms epoch UTC (stored as INTEGER)\n\n## Approach\n\nCreate `src/core/timeline_collect.rs`:\n\n```rust\nuse rusqlite::Connection;\nuse crate::core::timeline::{TimelineEvent, TimelineEventType, EntityRef, ExpandedEntityRef};\n\npub fn collect_events(\n conn: &Connection,\n seed_entities: &[EntityRef],\n expanded_entities: &[ExpandedEntityRef],\n evidence_notes: &[TimelineEvent], // from seed phase\n since_ms: Option, // --since filter\n limit: usize, // -n flag (default 100)\n) -> Result> { ... }\n```\n\n### Event Collection Per Entity\n\nFor each entity (seed + expanded), collect:\n\n1. **Creation event** (`Created`):\n ```sql\n -- Issues:\n SELECT created_at, author_username, title, web_url FROM issues WHERE id = ?1\n -- MRs:\n SELECT created_at, author_username, title, web_url FROM merge_requests WHERE id = ?1\n ```\n\n2. **State changes** (`StateChanged { state }`):\n ```sql\n SELECT state, actor_username, created_at FROM resource_state_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2) -- since filter\n ORDER BY created_at ASC\n ```\n NOTE: For MRs, a state='merged' event also produces a separate Merged variant.\n\n3. **Label changes** (`LabelAdded`/`LabelRemoved`):\n ```sql\n SELECT action, label_name, actor_username, created_at FROM resource_label_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2)\n ORDER BY created_at ASC\n ```\n Handle NULL label_name (deleted label): use \"[deleted label]\" as fallback.\n\n4. **Milestone changes** (`MilestoneSet`/`MilestoneRemoved`):\n ```sql\n SELECT action, milestone_title, actor_username, created_at FROM resource_milestone_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2)\n ORDER BY created_at ASC\n ```\n Handle NULL milestone_title: use \"[deleted milestone]\" as fallback.\n\n5. **Merge event** (Merged, MR only):\n Derive from merge_requests.merged_at (preferred) OR resource_state_events WHERE state='merged'. Skip StateChanged when state='merged' — emit only the Merged variant.\n\n### Chronological Interleave\n\n```rust\nevents.sort(); // Uses Ord impl from bd-20e\nif let Some(since) = since_ms {\n events.retain(|e| e.timestamp >= since);\n}\nevents.truncate(limit);\n```\n\nRegister in `src/core/mod.rs`: `pub mod timeline_collect;`\n\n## Acceptance Criteria\n\n- [ ] Collects Created, StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged, NoteEvidence events\n- [ ] Merged events deduplicated from StateChanged{merged} — emit only Merged variant\n- [ ] NULL label_name/milestone_title handled with fallback text\n- [ ] --since filter applied to all event types\n- [ ] Events sorted chronologically with stable tiebreak\n- [ ] Limit applied AFTER sorting\n- [ ] Evidence notes from seed phase included\n- [ ] is_seed correctly set based on entity source\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_collect.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_collect;`)\n\n## TDD Loop\n\nRED:\n- `test_collect_creation_event` - entity produces Created event\n- `test_collect_state_events` - state changes produce StateChanged events\n- `test_collect_merged_dedup` - state='merged' produces Merged not StateChanged\n- `test_collect_null_label_fallback` - NULL label_name uses fallback text\n- `test_collect_since_filter` - old events excluded\n- `test_collect_chronological_sort` - mixed entity events interleave correctly\n- `test_collect_respects_limit`\n\nTests need in-memory DB with migrations 001-014 applied.\n\nGREEN: Implement SQL queries and event assembly.\n\nVERIFY: `cargo test --lib -- timeline_collect`\n\n## Edge Cases\n\n- MR with merged_at=NULL and no state='merged' event: no Merged event emitted\n- Entity with 0 events in resource tables: only Created event returned\n- NULL actor_username: actor field is None\n- Timestamps at exact --since boundary: use >= (inclusive)","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.703942Z","created_by":"tayloreernisse","updated_at":"2026-02-05T19:57:11.639952Z","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-3as","depends_on_id":"bd-1ep","type":"blocks","created_at":"2026-02-02T21:33:37.618171Z","created_by":"tayloreernisse"},{"issue_id":"bd-3as","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.705605Z","created_by":"tayloreernisse"},{"issue_id":"bd-3as","depends_on_id":"bd-ypa","type":"blocks","created_at":"2026-02-02T21:33:37.575585Z","created_by":"tayloreernisse"}]} +{"id":"bd-3as","title":"Implement timeline event collection and chronological interleaving","description":"## Background\n\nThe event collection phase is steps 4-5 of the timeline pipeline (spec Section 3.2). It takes seed + expanded entity sets and collects all their events from resource event tables, then interleaves chronologically.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 steps 4-5, Section 3.3 (Event Model).\n\n## Codebase Context\n\n- resource_state_events: columns include state, actor_username (not actor_gitlab_id for display), created_at, issue_id, merge_request_id, source_merge_request_iid, source_commit\n- resource_label_events: columns include action ('add'|'remove'), label_name (NULLABLE since migration 012), actor_username, created_at\n- resource_milestone_events: columns include action ('add'|'remove'), milestone_title (NULLABLE since migration 012), actor_username, created_at\n- issues table: created_at, author_username, title, web_url\n- merge_requests table: created_at, author_username, title, web_url, merged_at, updated_at\n- All timestamps are ms epoch UTC (stored as INTEGER)\n\n## Approach\n\nCreate `src/core/timeline_collect.rs`:\n\n```rust\nuse rusqlite::Connection;\nuse crate::core::timeline::{TimelineEvent, TimelineEventType, EntityRef, ExpandedEntityRef};\n\npub fn collect_events(\n conn: &Connection,\n seed_entities: &[EntityRef],\n expanded_entities: &[ExpandedEntityRef],\n evidence_notes: &[TimelineEvent], // from seed phase\n since_ms: Option, // --since filter\n limit: usize, // -n flag (default 100)\n) -> Result> { ... }\n```\n\n### Event Collection Per Entity\n\nFor each entity (seed + expanded), collect:\n\n1. **Creation event** (`Created`):\n ```sql\n -- Issues:\n SELECT created_at, author_username, title, web_url FROM issues WHERE id = ?1\n -- MRs:\n SELECT created_at, author_username, title, web_url FROM merge_requests WHERE id = ?1\n ```\n\n2. **State changes** (`StateChanged { state }`):\n ```sql\n SELECT state, actor_username, created_at FROM resource_state_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2) -- since filter\n ORDER BY created_at ASC\n ```\n NOTE: For MRs, a state='merged' event also produces a separate Merged variant.\n\n3. **Label changes** (`LabelAdded`/`LabelRemoved`):\n ```sql\n SELECT action, label_name, actor_username, created_at FROM resource_label_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2)\n ORDER BY created_at ASC\n ```\n Handle NULL label_name (deleted label): use \"[deleted label]\" as fallback.\n\n4. **Milestone changes** (`MilestoneSet`/`MilestoneRemoved`):\n ```sql\n SELECT action, milestone_title, actor_username, created_at FROM resource_milestone_events\n WHERE (issue_id = ?1 OR merge_request_id = ?1)\n AND (?2 IS NULL OR created_at >= ?2)\n ORDER BY created_at ASC\n ```\n Handle NULL milestone_title: use \"[deleted milestone]\" as fallback.\n\n5. **Merge event** (Merged, MR only):\n Derive from merge_requests.merged_at (preferred) OR resource_state_events WHERE state='merged'. Skip StateChanged when state='merged' — emit only the Merged variant.\n\n### Chronological Interleave\n\n```rust\nevents.sort(); // Uses Ord impl from bd-20e\nif let Some(since) = since_ms {\n events.retain(|e| e.timestamp >= since);\n}\nevents.truncate(limit);\n```\n\nRegister in `src/core/mod.rs`: `pub mod timeline_collect;`\n\n## Acceptance Criteria\n\n- [ ] Collects Created, StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged, NoteEvidence events\n- [ ] Merged events deduplicated from StateChanged{merged} — emit only Merged variant\n- [ ] NULL label_name/milestone_title handled with fallback text\n- [ ] --since filter applied to all event types\n- [ ] Events sorted chronologically with stable tiebreak\n- [ ] Limit applied AFTER sorting\n- [ ] Evidence notes from seed phase included\n- [ ] is_seed correctly set based on entity source\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_collect.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_collect;`)\n\n## TDD Loop\n\nRED:\n- `test_collect_creation_event` - entity produces Created event\n- `test_collect_state_events` - state changes produce StateChanged events\n- `test_collect_merged_dedup` - state='merged' produces Merged not StateChanged\n- `test_collect_null_label_fallback` - NULL label_name uses fallback text\n- `test_collect_since_filter` - old events excluded\n- `test_collect_chronological_sort` - mixed entity events interleave correctly\n- `test_collect_respects_limit`\n\nTests need in-memory DB with migrations 001-014 applied.\n\nGREEN: Implement SQL queries and event assembly.\n\nVERIFY: `cargo test --lib -- timeline_collect`\n\n## Edge Cases\n\n- MR with merged_at=NULL and no state='merged' event: no Merged event emitted\n- Entity with 0 events in resource tables: only Created event returned\n- NULL actor_username: actor field is None\n- Timestamps at exact --since boundary: use >= (inclusive)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.703942Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:53:01.160429Z","closed_at":"2026-02-05T21:53:01.160380Z","close_reason":"Completed: Created src/core/timeline_collect.rs with event collection for Created, StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged, NoteEvidence. Merged dedup (state=merged skipped in favor of Merged variant). NULL label/milestone fallbacks. Since filter, chronological sort, limit. 10 tests pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-3as","depends_on_id":"bd-1ep","type":"blocks","created_at":"2026-02-02T21:33:37.618171Z","created_by":"tayloreernisse"},{"issue_id":"bd-3as","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.705605Z","created_by":"tayloreernisse"},{"issue_id":"bd-3as","depends_on_id":"bd-ypa","type":"blocks","created_at":"2026-02-02T21:33:37.575585Z","created_by":"tayloreernisse"}]} {"id":"bd-3bo","title":"[CP1] gi count issues/discussions/notes commands","description":"Count entities in the database.\n\nCommands:\n- gi count issues → 'Issues: N'\n- gi count discussions --type=issue → 'Issue Discussions: N'\n- gi count notes --type=issue → 'Issue Notes: N (excluding M system)'\n\nFiles: src/cli/commands/count.ts\nDone when: Counts match expected values from GitLab","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T15:20:16.190875Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.156293Z","deleted_at":"2026-01-25T15:21:35.156290Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-3er","title":"OBSERV Epic: Phase 3 - Performance Metrics Collection","description":"StageTiming struct, custom MetricsLayer tracing subscriber layer, span-to-metrics extraction, robot JSON enrichment with meta.stages, human-readable timing summary.\n\nDepends on: Phase 2 (spans must exist to extract timing from)\nUnblocks: Phase 4 (sync history needs Vec to store)\n\nFiles: src/core/metrics.rs (new), src/cli/commands/sync.rs, src/cli/commands/ingest.rs, src/main.rs\n\nAcceptance criteria (PRD Section 6.3):\n- lore --robot sync includes meta.run_id and meta.stages array\n- Each stage has name, elapsed_ms, items_processed\n- Top-level stages have sub_stages arrays\n- Interactive sync prints timing summary table\n- Zero-value fields omitted from JSON","status":"closed","priority":2,"issue_type":"epic","created_at":"2026-02-04T15:53:27.415566Z","created_by":"tayloreernisse","updated_at":"2026-02-04T17:32:56.743477Z","closed_at":"2026-02-04T17:32:56.743430Z","close_reason":"All Phase 3 tasks complete: StageTiming struct, MetricsLayer, span field recording, robot JSON enrichment with stages, and human-readable timing summary","compaction_level":0,"original_size":0,"labels":["observability"],"dependencies":[{"issue_id":"bd-3er","depends_on_id":"bd-2ni","type":"blocks","created_at":"2026-02-04T15:55:19.101775Z","created_by":"tayloreernisse"}]} {"id":"bd-3eu","title":"Implement hybrid search with adaptive recall","description":"## Background\nHybrid search is the top-level search orchestrator that combines FTS5 lexical results with sqlite-vec semantic results via RRF ranking. It supports three modes (Lexical, Semantic, Hybrid) and implements adaptive recall (wider initial fetch when filters are applied) and graceful degradation (falls back to FTS when Ollama is unavailable). All modes use RRF for consistent --explain output.\n\n## Approach\nCreate `src/search/hybrid.rs` per PRD Section 5.3.\n\n**Key types:**\n```rust\n#[derive(Debug, Clone, Copy, PartialEq, Eq)]\npub enum SearchMode {\n Hybrid, // Vector + FTS with RRF\n Lexical, // FTS only\n Semantic, // Vector only\n}\n\nimpl SearchMode {\n pub fn from_str(s: &str) -> Option {\n match s.to_lowercase().as_str() {\n \"hybrid\" => Some(Self::Hybrid),\n \"lexical\" | \"fts\" => Some(Self::Lexical),\n \"semantic\" | \"vector\" => Some(Self::Semantic),\n _ => None,\n }\n }\n\n pub fn as_str(&self) -> &'static str {\n match self {\n Self::Hybrid => \"hybrid\",\n Self::Lexical => \"lexical\",\n Self::Semantic => \"semantic\",\n }\n }\n}\n\npub struct HybridResult {\n pub document_id: i64,\n pub score: f64, // Normalized RRF score (0-1)\n pub vector_rank: Option,\n pub fts_rank: Option,\n pub rrf_score: f64, // Raw RRF score\n}\n```\n\n**Core function (ASYNC, PRD-exact signature):**\n```rust\npub async fn search_hybrid(\n conn: &Connection,\n client: Option<&OllamaClient>, // None if Ollama unavailable\n ollama_base_url: Option<&str>, // For actionable error messages\n query: &str,\n mode: SearchMode,\n filters: &SearchFilters,\n fts_mode: FtsQueryMode,\n) -> Result<(Vec, Vec)>\n```\n\n**IMPORTANT — client is `Option<&OllamaClient>`:** This enables graceful degradation. When Ollama is unavailable, the caller passes `None` and hybrid mode falls back to FTS-only with a warning. The `ollama_base_url` is separate so error messages can include it even when client is None.\n\n**Adaptive recall constants (PRD Section 5.3):**\n```rust\nconst BASE_RECALL_MIN: usize = 50;\nconst FILTERED_RECALL_MIN: usize = 200;\nconst RECALL_CAP: usize = 1500;\n```\n\n**Recall formula:**\n```rust\nlet requested = filters.clamp_limit();\nlet top_k = if filters.has_any_filter() {\n (requested * 50).max(FILTERED_RECALL_MIN).min(RECALL_CAP)\n} else {\n (requested * 10).max(BASE_RECALL_MIN).min(RECALL_CAP)\n};\n```\n\n**Mode behavior:**\n- **Lexical:** FTS only -> rank_rrf with empty vector list (single-list RRF)\n- **Semantic:** Vector only -> requires client (error if None) -> rank_rrf with empty FTS list\n- **Hybrid:** Both FTS + vector -> rank_rrf with both lists\n- **Hybrid with client=None:** Graceful degradation to Lexical with warning, NOT error\n\n**Graceful degradation logic:**\n```rust\nSearchMode::Hybrid => {\n let fts_results = search_fts(conn, query, top_k, fts_mode)?;\n let fts_tuples: Vec<_> = fts_results.iter().map(|r| (r.document_id, r.rank)).collect();\n\n match client {\n Some(client) => {\n let query_embedding = client.embed_batch(vec\\![query.to_string()]).await?;\n let embedding = query_embedding.into_iter().next().unwrap();\n let vec_results = search_vector(conn, &embedding, top_k)?;\n let vec_tuples: Vec<_> = vec_results.iter().map(|r| (r.document_id, r.distance)).collect();\n let ranked = rank_rrf(&vec_tuples, &fts_tuples);\n // ... map to HybridResult\n Ok((results, warnings))\n }\n None => {\n warnings.push(\"Ollama unavailable, falling back to lexical search\".into());\n let ranked = rank_rrf(&[], &fts_tuples);\n // ... map to HybridResult\n Ok((results, warnings))\n }\n }\n}\n```\n\n## Acceptance Criteria\n- [ ] Function is `async` (per PRD — Ollama client methods are async)\n- [ ] Signature takes `client: Option<&OllamaClient>` (not required)\n- [ ] Signature takes `ollama_base_url: Option<&str>` for actionable error messages\n- [ ] Returns `(Vec, Vec)` — results + warnings\n- [ ] Lexical mode: FTS-only results ranked via RRF (single list)\n- [ ] Semantic mode: vector-only results ranked via RRF; error if client is None\n- [ ] Hybrid mode: both FTS + vector results merged via RRF\n- [ ] Graceful degradation: client=None in Hybrid falls back to FTS with warning (not error)\n- [ ] Adaptive recall: unfiltered max(50, limit*10), filtered max(200, limit*50), capped 1500\n- [ ] All modes produce consistent --explain output (vector_rank, fts_rank, rrf_score)\n- [ ] SearchMode::from_str accepts aliases: \"fts\" for Lexical, \"vector\" for Semantic\n- [ ] `cargo build` succeeds\n\n## Files\n- `src/search/hybrid.rs` — new file\n- `src/search/mod.rs` — add `pub use hybrid::{search_hybrid, HybridResult, SearchMode};`\n\n## TDD Loop\nRED: Tests (some integration, some unit):\n- `test_lexical_mode` — FTS results only\n- `test_semantic_mode` — vector results only\n- `test_hybrid_mode` — both lists merged\n- `test_graceful_degradation` — None client falls back to FTS with warning in warnings vec\n- `test_adaptive_recall_unfiltered` — recall = max(50, limit*10)\n- `test_adaptive_recall_filtered` — recall = max(200, limit*50)\n- `test_recall_cap` — never exceeds 1500\n- `test_search_mode_from_str` — \"hybrid\", \"lexical\", \"fts\", \"semantic\", \"vector\", invalid\nGREEN: Implement search_hybrid\nVERIFY: `cargo test hybrid`\n\n## Edge Cases\n- Both FTS and vector return zero results: empty output (not error)\n- FTS returns results but vector returns empty: RRF still works (single-list)\n- Very high limit (100) with filters: recall = min(5000, 1500) = 1500\n- Semantic mode with client=None: error (OllamaUnavailable), not degradation\n- Semantic mode with 0% coverage: return LoreError::EmbeddingsNotBuilt","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T15:26:50.343002Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:56:16.631748Z","closed_at":"2026-01-30T17:56:16.631682Z","close_reason":"Implemented hybrid search with 3 modes (lexical/semantic/hybrid), graceful degradation when Ollama unavailable, adaptive recall (50-1500), RRF fusion. 6 tests pass.","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-3eu","depends_on_id":"bd-1k1","type":"blocks","created_at":"2026-01-30T15:29:24.913458Z","created_by":"tayloreernisse"},{"issue_id":"bd-3eu","depends_on_id":"bd-335","type":"blocks","created_at":"2026-01-30T15:29:25.025502Z","created_by":"tayloreernisse"},{"issue_id":"bd-3eu","depends_on_id":"bd-3ez","type":"blocks","created_at":"2026-01-30T15:29:24.987809Z","created_by":"tayloreernisse"},{"issue_id":"bd-3eu","depends_on_id":"bd-bjo","type":"blocks","created_at":"2026-01-30T15:29:24.950761Z","created_by":"tayloreernisse"}]} @@ -179,6 +179,6 @@ {"id":"bd-v6i","title":"[CP1] gi ingest --type=issues command","description":"## Background\n\nThe `gi ingest --type=issues` command is the main entry point for issue ingestion. It acquires a single-flight lock, calls the orchestrator for each configured project, and outputs progress/summary to the user.\n\n## Approach\n\n### Module: src/cli/commands/ingest.rs\n\n### Clap Definition\n\n```rust\n#[derive(Args)]\npub struct IngestArgs {\n /// Resource type to ingest\n #[arg(long, value_parser = [\"issues\", \"merge_requests\"])]\n pub r#type: String,\n\n /// Filter to single project\n #[arg(long)]\n pub project: Option,\n\n /// Override stale sync lock\n #[arg(long)]\n pub force: bool,\n}\n```\n\n### Handler Function\n\n```rust\npub async fn handle_ingest(args: IngestArgs, config: &Config) -> Result<()>\n```\n\n### Logic\n\n1. **Acquire single-flight lock**: `acquire_sync_lock(conn, args.force)?`\n2. **Get projects to sync**:\n - If `args.project` specified, filter to that one\n - Otherwise, get all configured projects from DB\n3. **For each project**:\n - Print \"Ingesting issues for {project_path}...\"\n - Call `ingest_project_issues(conn, client, config, project_id, gitlab_project_id)`\n - Print \"{N} issues fetched, {M} new labels\"\n4. **Print discussion sync summary**:\n - \"Fetching discussions ({N} issues with updates)...\"\n - \"{N} discussions, {M} notes (excluding {K} system notes)\"\n - \"Skipped discussion sync for {N} unchanged issues.\"\n5. **Release lock**: Lock auto-released when handler returns\n\n### Output Format (matches PRD)\n\n```\nIngesting issues...\n\n group/project-one: 1,234 issues fetched, 45 new labels\n\nFetching discussions (312 issues with updates)...\n\n group/project-one: 312 issues → 1,234 discussions, 5,678 notes\n\nTotal: 1,234 issues, 1,234 discussions, 5,678 notes (excluding 1,234 system notes)\nSkipped discussion sync for 922 unchanged issues.\n```\n\n## Acceptance Criteria\n\n- [ ] Clap args parse --type, --project, --force correctly\n- [ ] Single-flight lock acquired before sync starts\n- [ ] Lock error message is clear if concurrent run attempted\n- [ ] Progress output shows per-project counts\n- [ ] Summary includes unchanged issues skipped count\n- [ ] --force flag allows overriding stale lock\n\n## Files\n\n- src/cli/commands/mod.rs (add `pub mod ingest;`)\n- src/cli/commands/ingest.rs (create)\n- src/cli/mod.rs (add Ingest variant to Commands enum)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/cli_ingest_tests.rs\n#[tokio::test] async fn ingest_issues_acquires_lock()\n#[tokio::test] async fn ingest_issues_fails_on_concurrent_run()\n#[tokio::test] async fn ingest_issues_respects_project_filter()\n#[tokio::test] async fn ingest_issues_force_overrides_stale_lock()\n```\n\nGREEN: Implement handler with lock and orchestrator calls\n\nVERIFY: `cargo test cli_ingest`\n\n## Edge Cases\n\n- No projects configured - return early with helpful message\n- Project filter matches nothing - error with \"project not found\"\n- Lock already held - clear error \"Sync already in progress\"\n- Ctrl-C during sync - lock should be released (via Drop or SIGINT handler)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.312565Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:56:44.090142Z","closed_at":"2026-01-25T22:56:44.090086Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-v6i","depends_on_id":"bd-ozy","type":"blocks","created_at":"2026-01-25T17:04:05.629772Z","created_by":"tayloreernisse"}]} {"id":"bd-xhz","title":"[CP1] GitLab client pagination methods","description":"## Background\n\nGitLab pagination methods enable fetching large result sets (issues, discussions) as async streams. The client uses `x-next-page` headers to determine continuation and applies cursor rewind for tuple-based incremental sync.\n\n## Approach\n\nAdd pagination methods to GitLabClient using `async-stream` crate:\n\n### Methods to Add\n\n```rust\nimpl GitLabClient {\n /// Paginate through issues for a project.\n pub fn paginate_issues(\n &self,\n gitlab_project_id: i64,\n updated_after: Option, // ms epoch cursor\n cursor_rewind_seconds: u32,\n ) -> Pin> + Send + '_>>\n\n /// Paginate through discussions for an issue.\n pub fn paginate_issue_discussions(\n &self,\n gitlab_project_id: i64,\n issue_iid: i64,\n ) -> Pin> + Send + '_>>\n\n /// Make request and return response with headers for pagination.\n async fn request_with_headers(\n &self,\n path: &str,\n params: &[(&str, String)],\n ) -> Result<(T, HeaderMap)>\n}\n```\n\n### Pagination Logic\n\n1. Start at page 1, per_page=100\n2. For issues: add scope=all, state=all, order_by=updated_at, sort=asc\n3. Apply cursor rewind: `updated_after = cursor - rewind_seconds` (clamped to 0)\n4. Yield each item from response\n5. Check `x-next-page` header for continuation\n6. Stop when header is empty/absent OR response is empty\n\n### Cursor Rewind\n\n```rust\nif let Some(ts) = updated_after {\n let rewind_ms = (cursor_rewind_seconds as i64) * 1000;\n let rewound = (ts - rewind_ms).max(0); // Clamp to avoid underflow\n // Convert to ISO 8601 for updated_after param\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `paginate_issues` returns Stream of GitLabIssue\n- [ ] `paginate_issues` adds scope=all, state=all, order_by=updated_at, sort=asc\n- [ ] `paginate_issues` applies cursor rewind with max(0) clamping\n- [ ] `paginate_issue_discussions` returns Stream of GitLabDiscussion\n- [ ] Both methods follow x-next-page header until empty\n- [ ] Both methods stop on empty response (fallback)\n- [ ] `request_with_headers` returns (T, HeaderMap) tuple\n\n## Files\n\n- src/gitlab/client.rs (edit - add methods)\n\n## TDD Loop\n\nRED:\n```rust\n// tests/pagination_tests.rs\n#[tokio::test] async fn fetches_all_pages_when_multiple_exist()\n#[tokio::test] async fn respects_per_page_parameter()\n#[tokio::test] async fn follows_x_next_page_header_until_empty()\n#[tokio::test] async fn falls_back_to_empty_page_stop_if_headers_missing()\n#[tokio::test] async fn applies_cursor_rewind_for_tuple_semantics()\n#[tokio::test] async fn clamps_negative_rewind_to_zero()\n```\n\nGREEN: Implement pagination methods with async-stream\n\nVERIFY: `cargo test pagination`\n\n## Edge Cases\n\n- cursor_updated_at near zero - rewind must not underflow (use max(0))\n- GitLab returns empty x-next-page - treat as end of pages\n- GitLab omits pagination headers entirely - use empty response as stop condition\n- DateTime conversion fails - omit updated_after and fetch all (safe fallback)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.222168Z","created_by":"tayloreernisse","updated_at":"2026-01-25T22:28:39.192876Z","closed_at":"2026-01-25T22:28:39.192815Z","close_reason":"Implemented paginate_issues and paginate_issue_discussions with async-stream, cursor rewind with max(0) clamping, x-next-page header following, 4 unit tests passing","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-xhz","depends_on_id":"bd-1np","type":"blocks","created_at":"2026-01-25T17:04:05.398212Z","created_by":"tayloreernisse"},{"issue_id":"bd-xhz","depends_on_id":"bd-2ys","type":"blocks","created_at":"2026-01-25T17:04:05.371440Z","created_by":"tayloreernisse"}]} {"id":"bd-ymd","title":"[CP1] Final validation - Gate A through D","description":"Run all tests and verify all internal gates pass.\n\n## Gate A: Issues Only (Must Pass First)\n- [ ] gi ingest --type=issues fetches all issues from configured projects\n- [ ] Issues stored with correct schema, including last_seen_at\n- [ ] Cursor-based sync is resumable (re-run fetches only new/updated)\n- [ ] Incremental cursor updates every 100 issues\n- [ ] Raw payloads stored for each issue\n- [ ] gi list issues and gi count issues work\n\n## Gate B: Labels Correct (Must Pass)\n- [ ] Labels extracted and stored (name-only)\n- [ ] Label links created correctly\n- [ ] Stale label links removed on re-sync (verified with test)\n- [ ] Label count per issue matches GitLab\n\n## Gate C: Dependent Discussion Sync (Must Pass)\n- [ ] Discussions fetched for issues with updated_at advancement\n- [ ] Notes stored with is_system flag correctly set\n- [ ] Raw payloads stored for discussions and notes\n- [ ] discussions_synced_for_updated_at watermark updated after sync\n- [ ] Unchanged issues skip discussion refetch (verified with test)\n- [ ] Bounded concurrency (dependent_concurrency respected)\n\n## Gate D: Resumability Proof (Must Pass)\n- [ ] Kill mid-run, rerun; bounded redo (cursor progress preserved)\n- [ ] No redundant discussion refetch after crash recovery\n- [ ] Single-flight lock prevents concurrent runs\n\n## Final Gate (Must Pass)\n- [ ] All unit tests pass (cargo test)\n- [ ] All integration tests pass (mocked with wiremock)\n- [ ] cargo clippy passes with no warnings\n- [ ] cargo fmt --check passes\n- [ ] Compiles with --release\n\n## Validation Commands\ncargo test\ncargo clippy -- -D warnings\ncargo fmt --check\ncargo build --release\n\nFiles: All CP1 files\nDone when: All gate criteria pass","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:59:26.795633Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:02.132613Z","deleted_at":"2026-01-25T17:02:02.132608Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} -{"id":"bd-ypa","title":"Implement timeline expand phase: BFS cross-reference expansion","description":"## Background\n\nThe expand phase is step 3 of the timeline pipeline (spec Section 3.2). Starting from seed entities, it performs BFS over entity_references to discover related entities not matched by keywords.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 step 3, Section 3.5 (expanded_entities JSON).\n\n## Codebase Context\n\n- entity_references table exists (migration 011) with columns: source_entity_type, source_entity_id, target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method, created_at\n- reference_type CHECK: `'closes' | 'mentioned' | 'related'`\n- source_method CHECK: `'api' | 'note_parse' | 'description_parse'` — use these values in provenance, NOT the spec's original values\n- Indexes: idx_entity_refs_source (source_entity_type, source_entity_id), idx_entity_refs_target (target_entity_id WHERE NOT NULL)\n\n## Approach\n\nCreate `src/core/timeline_expand.rs`:\n\n```rust\nuse std::collections::{HashSet, VecDeque};\nuse rusqlite::Connection;\nuse crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef};\n\npub struct ExpandResult {\n pub expanded_entities: Vec,\n pub unresolved_references: Vec,\n}\n\npub fn expand_timeline(\n conn: &Connection,\n seeds: &[EntityRef],\n depth: u32, // 0=no expansion, 1=default, 2+=deep\n include_mentions: bool, // --expand-mentions flag\n max_entities: usize, // cap at 100 to prevent explosion\n) -> Result { ... }\n```\n\n### BFS Algorithm\n\n```\nvisited: HashSet<(String, i64)> = seeds as set (entity_type, entity_id)\nqueue: VecDeque<(EntityRef, u32)> for multi-hop\n\nFor each seed:\n query_neighbors(conn, seed, edge_types) -> outgoing + incoming refs\n - Outgoing: SELECT target_* FROM entity_references WHERE source_entity_type=? AND source_entity_id=? AND reference_type IN (...)\n - Incoming: SELECT source_* FROM entity_references WHERE target_entity_type=? AND target_entity_id=? AND reference_type IN (...)\n - Unresolved (target_entity_id IS NULL): collect in UnresolvedRef, don't traverse\n - New resolved: add to expanded with provenance (via_from, via_reference_type, via_source_method)\n - If current_depth < depth: enqueue for further BFS\n```\n\n### Edge Type Filtering\n\n```rust\nfn edge_types(include_mentions: bool) -> Vec<&'static str> {\n if include_mentions {\n vec![\"closes\", \"related\", \"mentioned\"]\n } else {\n vec![\"closes\", \"related\"]\n }\n}\n```\n\n### Provenance (Critical for spec compliance)\n\nEach expanded entity needs via object per spec Section 3.5:\n- via_from: EntityRef of the entity that referenced this one\n- via_reference_type: from entity_references.reference_type column\n- via_source_method: from entity_references.source_method column (**codebase values: 'api', 'note_parse', 'description_parse'**)\n\nRegister in `src/core/mod.rs`: `pub mod timeline_expand;`\n\n## Acceptance Criteria\n\n- [ ] BFS traverses outgoing AND incoming edges in entity_references\n- [ ] Default: only \"closes\" and \"related\" edges (not \"mentioned\")\n- [ ] --expand-mentions: also traverses \"mentioned\" edges\n- [ ] depth=0: returns empty expanded list\n- [ ] max_entities cap prevents explosion (default 100)\n- [ ] Provenance: via_source_method uses codebase values (api/note_parse/description_parse), NOT spec values\n- [ ] Unresolved references (target_entity_id IS NULL) collected, not traversed\n- [ ] No duplicates: visited set by (entity_type, entity_id)\n- [ ] Self-references skipped\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_expand.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_expand;`)\n\n## TDD Loop\n\nRED: Tests in `src/core/timeline_expand.rs`:\n- `test_expand_depth_zero` - returns empty\n- `test_expand_finds_linked_entity` - seed issue -> closes -> linked MR\n- `test_expand_bidirectional` - starting from target also finds source\n- `test_expand_respects_max_entities`\n- `test_expand_skips_mentions_by_default`\n- `test_expand_includes_mentions_when_flagged`\n- `test_expand_collects_unresolved`\n- `test_expand_tracks_provenance` - verify via_source_method is 'api' not 'api_closes_issues'\n\nTests need in-memory DB with migrations 001-014 applied + entity_references test data.\n\nGREEN: Implement BFS.\n\nVERIFY: `cargo test --lib -- timeline_expand`\n\n## Edge Cases\n\n- Circular references: visited set prevents infinite loop\n- Entity referenced from multiple seeds: first-come provenance wins\n- Empty entity_references: returns empty, not error\n- Cross-project refs with NULL target_entity_id: add to unresolved","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.659381Z","created_by":"tayloreernisse","updated_at":"2026-02-05T19:39:40.338144Z","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-ypa","depends_on_id":"bd-32q","type":"blocks","created_at":"2026-02-02T21:33:37.448515Z","created_by":"tayloreernisse"},{"issue_id":"bd-ypa","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T21:33:37.528233Z","created_by":"tayloreernisse"},{"issue_id":"bd-ypa","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.661036Z","created_by":"tayloreernisse"}]} +{"id":"bd-ypa","title":"Implement timeline expand phase: BFS cross-reference expansion","description":"## Background\n\nThe expand phase is step 3 of the timeline pipeline (spec Section 3.2). Starting from seed entities, it performs BFS over entity_references to discover related entities not matched by keywords.\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 3.2 step 3, Section 3.5 (expanded_entities JSON).\n\n## Codebase Context\n\n- entity_references table exists (migration 011) with columns: source_entity_type, source_entity_id, target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method, created_at\n- reference_type CHECK: `'closes' | 'mentioned' | 'related'`\n- source_method CHECK: `'api' | 'note_parse' | 'description_parse'` — use these values in provenance, NOT the spec's original values\n- Indexes: idx_entity_refs_source (source_entity_type, source_entity_id), idx_entity_refs_target (target_entity_id WHERE NOT NULL)\n\n## Approach\n\nCreate `src/core/timeline_expand.rs`:\n\n```rust\nuse std::collections::{HashSet, VecDeque};\nuse rusqlite::Connection;\nuse crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef};\n\npub struct ExpandResult {\n pub expanded_entities: Vec,\n pub unresolved_references: Vec,\n}\n\npub fn expand_timeline(\n conn: &Connection,\n seeds: &[EntityRef],\n depth: u32, // 0=no expansion, 1=default, 2+=deep\n include_mentions: bool, // --expand-mentions flag\n max_entities: usize, // cap at 100 to prevent explosion\n) -> Result { ... }\n```\n\n### BFS Algorithm\n\n```\nvisited: HashSet<(String, i64)> = seeds as set (entity_type, entity_id)\nqueue: VecDeque<(EntityRef, u32)> for multi-hop\n\nFor each seed:\n query_neighbors(conn, seed, edge_types) -> outgoing + incoming refs\n - Outgoing: SELECT target_* FROM entity_references WHERE source_entity_type=? AND source_entity_id=? AND reference_type IN (...)\n - Incoming: SELECT source_* FROM entity_references WHERE target_entity_type=? AND target_entity_id=? AND reference_type IN (...)\n - Unresolved (target_entity_id IS NULL): collect in UnresolvedRef, don't traverse\n - New resolved: add to expanded with provenance (via_from, via_reference_type, via_source_method)\n - If current_depth < depth: enqueue for further BFS\n```\n\n### Edge Type Filtering\n\n```rust\nfn edge_types(include_mentions: bool) -> Vec<&'static str> {\n if include_mentions {\n vec![\"closes\", \"related\", \"mentioned\"]\n } else {\n vec![\"closes\", \"related\"]\n }\n}\n```\n\n### Provenance (Critical for spec compliance)\n\nEach expanded entity needs via object per spec Section 3.5:\n- via_from: EntityRef of the entity that referenced this one\n- via_reference_type: from entity_references.reference_type column\n- via_source_method: from entity_references.source_method column (**codebase values: 'api', 'note_parse', 'description_parse'**)\n\nRegister in `src/core/mod.rs`: `pub mod timeline_expand;`\n\n## Acceptance Criteria\n\n- [ ] BFS traverses outgoing AND incoming edges in entity_references\n- [ ] Default: only \"closes\" and \"related\" edges (not \"mentioned\")\n- [ ] --expand-mentions: also traverses \"mentioned\" edges\n- [ ] depth=0: returns empty expanded list\n- [ ] max_entities cap prevents explosion (default 100)\n- [ ] Provenance: via_source_method uses codebase values (api/note_parse/description_parse), NOT spec values\n- [ ] Unresolved references (target_entity_id IS NULL) collected, not traversed\n- [ ] No duplicates: visited set by (entity_type, entity_id)\n- [ ] Self-references skipped\n- [ ] Module registered in src/core/mod.rs\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/core/timeline_expand.rs` (NEW)\n- `src/core/mod.rs` (add `pub mod timeline_expand;`)\n\n## TDD Loop\n\nRED: Tests in `src/core/timeline_expand.rs`:\n- `test_expand_depth_zero` - returns empty\n- `test_expand_finds_linked_entity` - seed issue -> closes -> linked MR\n- `test_expand_bidirectional` - starting from target also finds source\n- `test_expand_respects_max_entities`\n- `test_expand_skips_mentions_by_default`\n- `test_expand_includes_mentions_when_flagged`\n- `test_expand_collects_unresolved`\n- `test_expand_tracks_provenance` - verify via_source_method is 'api' not 'api_closes_issues'\n\nTests need in-memory DB with migrations 001-014 applied + entity_references test data.\n\nGREEN: Implement BFS.\n\nVERIFY: `cargo test --lib -- timeline_expand`\n\n## Edge Cases\n\n- Circular references: visited set prevents infinite loop\n- Entity referenced from multiple seeds: first-come provenance wins\n- Empty entity_references: returns empty, not error\n- Cross-project refs with NULL target_entity_id: add to unresolved","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:33:08.659381Z","created_by":"tayloreernisse","updated_at":"2026-02-05T21:49:46.868460Z","closed_at":"2026-02-05T21:49:46.868410Z","close_reason":"Completed: Created src/core/timeline_expand.rs with BFS cross-reference expansion. Bidirectional traversal, depth limiting, mention filtering, max entity cap, provenance tracking, unresolved reference collection. 10 tests pass. All quality gates pass.","compaction_level":0,"original_size":0,"labels":["gate-3","phase-b","query"],"dependencies":[{"issue_id":"bd-ypa","depends_on_id":"bd-32q","type":"blocks","created_at":"2026-02-02T21:33:37.448515Z","created_by":"tayloreernisse"},{"issue_id":"bd-ypa","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T21:33:37.528233Z","created_by":"tayloreernisse"},{"issue_id":"bd-ypa","depends_on_id":"bd-ike","type":"parent-child","created_at":"2026-02-02T21:33:08.661036Z","created_by":"tayloreernisse"}]} {"id":"bd-z0s","title":"[CP1] Final validation - Gate A through D","description":"Run all tests and verify all internal gates pass.\n\n## Gate A: Issues Only (Must Pass First)\n- [ ] gi ingest --type=issues fetches all issues from configured projects\n- [ ] Issues stored with correct schema, including last_seen_at\n- [ ] Cursor-based sync is resumable (re-run fetches only new/updated)\n- [ ] Incremental cursor updates every 100 issues\n- [ ] Raw payloads stored for each issue\n- [ ] gi list issues and gi count issues work\n\n## Gate B: Labels Correct (Must Pass)\n- [ ] Labels extracted and stored (name-only)\n- [ ] Label links created correctly\n- [ ] **Stale label links removed on re-sync** (verified with test)\n- [ ] Label count per issue matches GitLab\n\n## Gate C: Dependent Discussion Sync (Must Pass)\n- [ ] Discussions fetched for issues with updated_at advancement\n- [ ] Notes stored with is_system flag correctly set\n- [ ] Raw payloads stored for discussions and notes\n- [ ] discussions_synced_for_updated_at watermark updated after sync\n- [ ] **Unchanged issues skip discussion refetch** (verified with test)\n- [ ] Bounded concurrency (dependent_concurrency respected)\n\n## Gate D: Resumability Proof (Must Pass)\n- [ ] Kill mid-run, rerun; bounded redo (cursor progress preserved)\n- [ ] No redundant discussion refetch after crash recovery\n- [ ] Single-flight lock prevents concurrent runs\n\n## Final Gate (Must Pass)\n- [ ] All unit tests pass (cargo test)\n- [ ] All integration tests pass (mocked with wiremock)\n- [ ] cargo clippy passes with no warnings\n- [ ] cargo fmt --check passes\n- [ ] Compiles with --release\n\n## Validation Commands\ncargo test\ncargo clippy -- -D warnings\ncargo fmt --check\ncargo build --release\n\n## Data Integrity Checks\n- SELECT COUNT(*) FROM issues matches GitLab issue count\n- Every issue has a raw_payloads row\n- Every discussion has a raw_payloads row\n- Labels in issue_labels junction all exist in labels table\n- Re-running gi ingest --type=issues fetches 0 new items\n- After removing a label in GitLab and re-syncing, the link is removed\n\nFiles: All CP1 files\nDone when: All gate criteria pass","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-25T17:02:38.459095Z","created_by":"tayloreernisse","updated_at":"2026-01-25T23:27:09.567537Z","closed_at":"2026-01-25T23:27:09.567478Z","close_reason":"All gates pass: 71 tests, clippy clean, fmt clean, release build successful","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-z0s","depends_on_id":"bd-17v","type":"blocks","created_at":"2026-01-25T17:04:05.889114Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-2f0","type":"blocks","created_at":"2026-01-25T17:04:05.841210Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-39w","type":"blocks","created_at":"2026-01-25T17:04:05.913316Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-3n1","type":"blocks","created_at":"2026-01-25T17:04:05.817830Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-o7b","type":"blocks","created_at":"2026-01-25T17:04:05.864480Z","created_by":"tayloreernisse"},{"issue_id":"bd-z0s","depends_on_id":"bd-v6i","type":"blocks","created_at":"2026-01-25T17:04:05.794555Z","created_by":"tayloreernisse"}]} {"id":"bd-z94","title":"Implement 'lore file-history' command with human and robot output","description":"## Background\n\nThe file-history command is Gate 4's user-facing CLI. It answers 'which MRs touched this file, and why?'\n\n**Spec reference:** `docs/phase-b-temporal-intelligence.md` Section 4.4-4.5.\n\n## Codebase Context\n\n- CLI pattern: Commands enum in src/cli/mod.rs, handler in src/main.rs, output in src/cli/commands/\n- Project resolution: resolve_project() returns project_id or exit 18 (Ambiguous)\n- Robot mode: {ok, data, meta} envelope pattern\n- merge_requests.merged_at exists (migration 006) — order by COALESCE(merged_at, updated_at) DESC\n- discussions table: issue_id, merge_request_id\n- notes table: position_new_path for DiffNotes (used for --discussions flag)\n- mr_file_changes table: migration 016 (bd-1oo)\n- resolve_rename_chain() from bd-1yx (src/core/file_history.rs) for rename handling\n- VALID_COMMANDS array in src/main.rs (line ~448)\n\n## Approach\n\n### 1. FileHistoryArgs subcommand (`src/cli/mod.rs`):\n```rust\n/// Show MRs that touched a file, with linked issues and discussions\n#[command(name = \"file-history\")]\nFileHistory(FileHistoryArgs),\n```\n\n```rust\n#[derive(Parser, Debug)]\npub struct FileHistoryArgs {\n /// File path to trace history for\n pub path: String,\n /// Scope to a specific project (fuzzy match)\n #[arg(short = 'p', long)]\n pub project: Option,\n /// Include discussion snippets from DiffNotes on this file\n #[arg(long)]\n pub discussions: bool,\n /// Disable rename chain resolution\n #[arg(long = \"no-follow-renames\")]\n pub no_follow_renames: bool,\n /// Only show merged MRs\n #[arg(long)]\n pub merged: bool,\n /// Maximum results\n #[arg(short = 'n', long = \"limit\", default_value = \"50\")]\n pub limit: usize,\n}\n```\n\n### 2. Query logic (`src/cli/commands/file_history.rs`):\n\n1. Resolve project (exit 18 on ambiguous)\n2. Call resolve_rename_chain() unless --no-follow-renames\n3. Query mr_file_changes for all resolved paths\n4. JOIN merge_requests for MR details\n5. Optionally fetch DiffNote discussions on the file\n6. Order by COALESCE(merged_at, updated_at) DESC\n7. Apply --merged filter and --limit\n\n### 3. Human output:\n```\nFile History: src/auth/oauth.rs (via 3 paths, 5 MRs)\nRename chain: src/authentication/oauth.rs -> src/auth/oauth.rs\n\n !456 \"Implement OAuth2 flow\" merged @alice 2024-01-22 modified\n !489 \"Fix OAuth token expiry\" merged @bob 2024-02-15 modified\n !512 \"Refactor auth module\" merged @carol 2024-03-01 renamed\n```\n\n### 4. Robot JSON:\n```json\n{\n \"ok\": true,\n \"data\": {\n \"path\": \"src/auth/oauth.rs\",\n \"rename_chain\": [\"src/authentication/oauth.rs\", \"src/auth/oauth.rs\"],\n \"merge_requests\": [\n {\n \"iid\": 456,\n \"title\": \"Implement OAuth2 flow\",\n \"state\": \"merged\",\n \"author\": \"alice\",\n \"merged_at\": \"2024-01-22T...\",\n \"change_type\": \"modified\",\n \"discussion_count\": 12,\n \"file_discussion_count\": 4,\n \"merge_commit_sha\": \"abc123\"\n }\n ]\n },\n \"meta\": {\n \"total_mrs\": 5,\n \"renames_followed\": true,\n \"paths_searched\": 2\n }\n}\n```\n\n## Acceptance Criteria\n\n- [ ] `lore file-history src/foo.rs` works with human output\n- [ ] `lore --robot file-history src/foo.rs` works with JSON envelope\n- [ ] Rename chain displayed in human output when renames detected\n- [ ] Robot JSON includes rename_chain array\n- [ ] --no-follow-renames disables resolution (queries literal path only)\n- [ ] --merged filters to merged MRs only\n- [ ] --discussions includes DiffNote snippets from notes.position_new_path matching\n- [ ] -p for project scoping (exit 18 on ambiguous)\n- [ ] -n limits results\n- [ ] No MR history: friendly message (exit 0, not error)\n- [ ] \"file-history\" added to VALID_COMMANDS array\n- [ ] robot-docs manifest includes file-history command\n- [ ] `cargo check --all-targets` passes\n- [ ] `cargo clippy --all-targets -- -D warnings` passes\n\n## Files\n\n- `src/cli/mod.rs` (FileHistoryArgs struct + Commands::FileHistory variant)\n- `src/cli/commands/file_history.rs` (NEW — query + human + robot output)\n- `src/cli/commands/mod.rs` (add `pub mod file_history;` + re-exports)\n- `src/main.rs` (handler dispatch + VALID_COMMANDS + robot-docs entry)\n\n## TDD Loop\n\nNo unit tests for CLI wiring. Verify with:\n\n```bash\ncargo check --all-targets\ncargo run -- file-history --help\n```\n\n## Edge Cases\n\n- File path with spaces: clap handles quoting\n- Path not in any MR: empty result, friendly message, not error\n- MRs ordered by COALESCE(merged_at, updated_at) DESC (unmerged MRs use updated_at)\n- --discussions with no DiffNotes: empty discussion section, not error\n- rename_chain omitted from robot JSON when --no-follow-renames is set\n","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:34:09.027259Z","created_by":"tayloreernisse","updated_at":"2026-02-05T20:57:44.467745Z","compaction_level":0,"original_size":0,"labels":["cli","gate-4","phase-b"],"dependencies":[{"issue_id":"bd-z94","depends_on_id":"bd-14q","type":"parent-child","created_at":"2026-02-02T21:34:09.028633Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-1yx","type":"blocks","created_at":"2026-02-02T21:34:16.784122Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-2yo","type":"blocks","created_at":"2026-02-02T21:34:16.741201Z","created_by":"tayloreernisse"},{"issue_id":"bd-z94","depends_on_id":"bd-3ia","type":"blocks","created_at":"2026-02-02T21:34:16.824983Z","created_by":"tayloreernisse"}]} diff --git a/.beads/last-touched b/.beads/last-touched index bb61cf3..b595e60 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -bd-2n4 +bd-3as diff --git a/migrations/016_mr_file_changes.sql b/migrations/016_mr_file_changes.sql new file mode 100644 index 0000000..4ca10fe --- /dev/null +++ b/migrations/016_mr_file_changes.sql @@ -0,0 +1,20 @@ +-- Migration 016: MR file changes table +-- Powers file-history and trace commands (Gates 4-5) + +CREATE TABLE mr_file_changes ( + id INTEGER PRIMARY KEY, + merge_request_id INTEGER NOT NULL REFERENCES merge_requests(id) ON DELETE CASCADE, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + old_path TEXT, + new_path TEXT NOT NULL, + change_type TEXT NOT NULL CHECK (change_type IN ('added', 'modified', 'renamed', 'deleted')), + UNIQUE(merge_request_id, new_path) +); + +CREATE INDEX idx_mfc_project_path ON mr_file_changes(project_id, new_path); +CREATE INDEX idx_mfc_project_old_path ON mr_file_changes(project_id, old_path) WHERE old_path IS NOT NULL; +CREATE INDEX idx_mfc_mr ON mr_file_changes(merge_request_id); +CREATE INDEX idx_mfc_renamed ON mr_file_changes(project_id, change_type) WHERE change_type = 'renamed'; + +INSERT INTO schema_version (version, applied_at, description) +VALUES (16, strftime('%s', 'now') * 1000, 'MR file changes table'); diff --git a/src/core/db.rs b/src/core/db.rs index 7ef61f8..d915c8c 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -44,6 +44,14 @@ const MIGRATIONS: &[(&str, &str)] = &[ "014", include_str!("../../migrations/014_sync_runs_enrichment.sql"), ), + ( + "015", + include_str!("../../migrations/015_commit_shas_and_closes_watermark.sql"), + ), + ( + "016", + include_str!("../../migrations/016_mr_file_changes.sql"), + ), ]; pub fn create_connection(db_path: &Path) -> Result { diff --git a/src/core/mod.rs b/src/core/mod.rs index bbc91b8..72cf9a2 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -14,6 +14,10 @@ pub mod project; pub mod references; pub mod sync_run; pub mod time; +pub mod timeline; +pub mod timeline_collect; +pub mod timeline_expand; +pub mod timeline_seed; pub use config::Config; pub use error::{LoreError, Result}; diff --git a/src/core/timeline.rs b/src/core/timeline.rs new file mode 100644 index 0000000..c9ee919 --- /dev/null +++ b/src/core/timeline.rs @@ -0,0 +1,253 @@ +use std::cmp::Ordering; + +use serde::Serialize; + +/// The core timeline event. All pipeline stages produce or consume these. +/// Spec ref: Section 3.3 "Event Model" +#[derive(Debug, Clone, Serialize)] +pub struct TimelineEvent { + pub timestamp: i64, + pub entity_type: String, + #[serde(skip)] + pub entity_id: i64, + pub entity_iid: i64, + pub project_path: String, + pub event_type: TimelineEventType, + pub summary: String, + pub actor: Option, + pub url: Option, + pub is_seed: bool, +} + +impl PartialEq for TimelineEvent { + fn eq(&self, other: &Self) -> bool { + self.timestamp == other.timestamp + && self.entity_id == other.entity_id + && self.event_type_discriminant() == other.event_type_discriminant() + } +} + +impl Eq for TimelineEvent {} + +impl PartialOrd for TimelineEvent { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for TimelineEvent { + fn cmp(&self, other: &Self) -> Ordering { + self.timestamp + .cmp(&other.timestamp) + .then_with(|| self.entity_id.cmp(&other.entity_id)) + .then_with(|| { + self.event_type_discriminant() + .cmp(&other.event_type_discriminant()) + }) + } +} + +impl TimelineEvent { + fn event_type_discriminant(&self) -> u8 { + match &self.event_type { + TimelineEventType::Created => 0, + TimelineEventType::StateChanged { .. } => 1, + TimelineEventType::LabelAdded { .. } => 2, + TimelineEventType::LabelRemoved { .. } => 3, + TimelineEventType::MilestoneSet { .. } => 4, + TimelineEventType::MilestoneRemoved { .. } => 5, + TimelineEventType::Merged => 6, + TimelineEventType::NoteEvidence { .. } => 7, + TimelineEventType::CrossReferenced { .. } => 8, + } + } +} + +/// Per spec Section 3.3. Serde tagged enum for JSON output. +#[derive(Debug, Clone, Serialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum TimelineEventType { + Created, + StateChanged { + state: String, + }, + LabelAdded { + label: String, + }, + LabelRemoved { + label: String, + }, + MilestoneSet { + milestone: String, + }, + MilestoneRemoved { + milestone: String, + }, + Merged, + NoteEvidence { + note_id: i64, + snippet: String, + discussion_id: Option, + }, + CrossReferenced { + target: String, + }, +} + +/// Internal entity reference used across pipeline stages. +#[derive(Debug, Clone, Serialize)] +pub struct EntityRef { + pub entity_type: String, + pub entity_id: i64, + pub entity_iid: i64, + pub project_path: String, +} + +/// An entity discovered via BFS expansion. +/// Spec ref: Section 3.5 "expanded_entities" JSON structure. +#[derive(Debug, Clone, Serialize)] +pub struct ExpandedEntityRef { + pub entity_ref: EntityRef, + pub depth: u32, + pub via_from: EntityRef, + pub via_reference_type: String, + pub via_source_method: String, +} + +/// Reference to an unsynced external entity. +/// Spec ref: Section 3.5 "unresolved_references" JSON structure. +#[derive(Debug, Clone, Serialize)] +pub struct UnresolvedRef { + pub source: EntityRef, + pub target_project: Option, + pub target_type: String, + pub target_iid: i64, + pub reference_type: String, +} + +/// Complete result from the timeline pipeline. +#[derive(Debug, Clone, Serialize)] +pub struct TimelineResult { + pub query: String, + pub events: Vec, + pub seed_entities: Vec, + pub expanded_entities: Vec, + pub unresolved_references: Vec, +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_event(timestamp: i64, entity_id: i64, event_type: TimelineEventType) -> TimelineEvent { + TimelineEvent { + timestamp, + entity_type: "issue".to_owned(), + entity_id, + entity_iid: 1, + project_path: "group/project".to_owned(), + event_type, + summary: "test".to_owned(), + actor: None, + url: None, + is_seed: true, + } + } + + #[test] + fn test_timeline_event_sort_by_timestamp() { + let mut events = [ + make_event(3000, 1, TimelineEventType::Created), + make_event(1000, 2, TimelineEventType::Created), + make_event(2000, 3, TimelineEventType::Merged), + ]; + events.sort(); + assert_eq!(events[0].timestamp, 1000); + assert_eq!(events[1].timestamp, 2000); + assert_eq!(events[2].timestamp, 3000); + } + + #[test] + fn test_timeline_event_sort_tiebreak() { + let mut events = [ + make_event(1000, 5, TimelineEventType::Created), + make_event(1000, 2, TimelineEventType::Merged), + make_event(1000, 2, TimelineEventType::Created), + ]; + events.sort(); + // Same timestamp: sort by entity_id first, then event_type discriminant + assert_eq!(events[0].entity_id, 2); + assert!(matches!(events[0].event_type, TimelineEventType::Created)); + assert_eq!(events[1].entity_id, 2); + assert!(matches!(events[1].event_type, TimelineEventType::Merged)); + assert_eq!(events[2].entity_id, 5); + } + + #[test] + fn test_timeline_event_type_serializes_tagged() { + let event_type = TimelineEventType::StateChanged { + state: "closed".to_owned(), + }; + let json = serde_json::to_value(&event_type).unwrap(); + assert_eq!(json["kind"], "state_changed"); + assert_eq!(json["state"], "closed"); + } + + #[test] + fn test_note_evidence_has_note_id() { + let event_type = TimelineEventType::NoteEvidence { + note_id: 42, + snippet: "some text".to_owned(), + discussion_id: Some(7), + }; + let json = serde_json::to_value(&event_type).unwrap(); + assert_eq!(json["kind"], "note_evidence"); + assert_eq!(json["note_id"], 42); + assert_eq!(json["snippet"], "some text"); + assert_eq!(json["discussion_id"], 7); + } + + #[test] + fn test_entity_id_skipped_in_serialization() { + let event = make_event(1000, 99, TimelineEventType::Created); + let json = serde_json::to_value(&event).unwrap(); + assert!(json.get("entity_id").is_none()); + assert_eq!(json["entity_iid"], 1); + } + + #[test] + fn test_timeline_event_type_variant_count() { + // Verify all 9 variants serialize without panic + let variants: Vec = vec![ + TimelineEventType::Created, + TimelineEventType::StateChanged { + state: "closed".to_owned(), + }, + TimelineEventType::LabelAdded { + label: "bug".to_owned(), + }, + TimelineEventType::LabelRemoved { + label: "bug".to_owned(), + }, + TimelineEventType::MilestoneSet { + milestone: "v1".to_owned(), + }, + TimelineEventType::MilestoneRemoved { + milestone: "v1".to_owned(), + }, + TimelineEventType::Merged, + TimelineEventType::NoteEvidence { + note_id: 1, + snippet: "text".to_owned(), + discussion_id: None, + }, + TimelineEventType::CrossReferenced { + target: "!567".to_owned(), + }, + ]; + assert_eq!(variants.len(), 9); + for v in &variants { + serde_json::to_value(v).unwrap(); + } + } +} diff --git a/src/core/timeline_collect.rs b/src/core/timeline_collect.rs new file mode 100644 index 0000000..ad9ff1e --- /dev/null +++ b/src/core/timeline_collect.rs @@ -0,0 +1,687 @@ +use rusqlite::Connection; + +use crate::core::error::Result; +use crate::core::timeline::{EntityRef, ExpandedEntityRef, TimelineEvent, TimelineEventType}; + +/// Collect all events for seed and expanded entities, interleave chronologically. +/// +/// Steps 4-5 of the timeline pipeline: +/// 1. For each entity, collect Created, StateChanged, Label, Milestone, Merged events +/// 2. Merge in evidence notes from the seed phase +/// 3. Sort chronologically with stable tiebreak +/// 4. Apply --since filter and --limit +pub fn collect_events( + conn: &Connection, + seed_entities: &[EntityRef], + expanded_entities: &[ExpandedEntityRef], + evidence_notes: &[TimelineEvent], + since_ms: Option, + limit: usize, +) -> Result> { + let mut all_events: Vec = Vec::new(); + + // Collect events for seed entities + for entity in seed_entities { + collect_entity_events(conn, entity, true, &mut all_events)?; + } + + // Collect events for expanded entities + for expanded in expanded_entities { + collect_entity_events(conn, &expanded.entity_ref, false, &mut all_events)?; + } + + // Add evidence notes from seed phase + all_events.extend(evidence_notes.iter().cloned()); + + // Sort chronologically (uses Ord impl from timeline.rs) + all_events.sort(); + + // Apply --since filter + if let Some(since) = since_ms { + all_events.retain(|e| e.timestamp >= since); + } + + // Apply limit + all_events.truncate(limit); + + Ok(all_events) +} + +/// Collect all events for a single entity. +fn collect_entity_events( + conn: &Connection, + entity: &EntityRef, + is_seed: bool, + events: &mut Vec, +) -> Result<()> { + collect_creation_event(conn, entity, is_seed, events)?; + collect_state_events(conn, entity, is_seed, events)?; + collect_label_events(conn, entity, is_seed, events)?; + collect_milestone_events(conn, entity, is_seed, events)?; + collect_merged_event(conn, entity, is_seed, events)?; + Ok(()) +} + +/// Collect the Created event from the entity's own table. +fn collect_creation_event( + conn: &Connection, + entity: &EntityRef, + is_seed: bool, + events: &mut Vec, +) -> Result<()> { + let table = match entity.entity_type.as_str() { + "issue" => "issues", + "merge_request" => "merge_requests", + _ => return Ok(()), + }; + + let sql = + format!("SELECT created_at, author_username, title, web_url FROM {table} WHERE id = ?1"); + + let result = conn.query_row(&sql, rusqlite::params![entity.entity_id], |row| { + Ok(( + row.get::<_, Option>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + row.get::<_, Option>(3)?, + )) + }); + + if let Ok((Some(created_at), author, title, url)) = result { + let type_label = if entity.entity_type == "issue" { + "Issue" + } else { + "MR" + }; + let title_str = title.as_deref().unwrap_or("(untitled)"); + events.push(TimelineEvent { + timestamp: created_at, + entity_type: entity.entity_type.clone(), + entity_id: entity.entity_id, + entity_iid: entity.entity_iid, + project_path: entity.project_path.clone(), + event_type: TimelineEventType::Created, + summary: format!("{type_label} #{} created: {title_str}", entity.entity_iid), + actor: author, + url, + is_seed, + }); + } + + Ok(()) +} + +/// Collect state change events. State='merged' produces Merged, not StateChanged. +fn collect_state_events( + conn: &Connection, + entity: &EntityRef, + is_seed: bool, + events: &mut Vec, +) -> Result<()> { + let (id_col, id_val) = entity_id_column(entity); + + let sql = format!( + "SELECT state, actor_username, created_at FROM resource_state_events + WHERE {id_col} = ?1 + ORDER BY created_at ASC" + ); + + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map(rusqlite::params![id_val], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, i64>(2)?, + )) + })?; + + for row_result in rows { + let (state, actor, created_at) = row_result?; + + // state='merged' is handled by collect_merged_event — skip here + if state == "merged" { + continue; + } + + events.push(TimelineEvent { + timestamp: created_at, + entity_type: entity.entity_type.clone(), + entity_id: entity.entity_id, + entity_iid: entity.entity_iid, + project_path: entity.project_path.clone(), + event_type: TimelineEventType::StateChanged { + state: state.clone(), + }, + summary: format!("State changed to {state}"), + actor, + url: None, + is_seed, + }); + } + + Ok(()) +} + +/// Collect label add/remove events. +fn collect_label_events( + conn: &Connection, + entity: &EntityRef, + is_seed: bool, + events: &mut Vec, +) -> Result<()> { + let (id_col, id_val) = entity_id_column(entity); + + let sql = format!( + "SELECT action, label_name, actor_username, created_at FROM resource_label_events + WHERE {id_col} = ?1 + ORDER BY created_at ASC" + ); + + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map(rusqlite::params![id_val], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + row.get::<_, i64>(3)?, + )) + })?; + + for row_result in rows { + let (action, label_name, actor, created_at) = row_result?; + let label = label_name.unwrap_or_else(|| "[deleted label]".to_owned()); + + let (event_type, summary) = match action.as_str() { + "add" => ( + TimelineEventType::LabelAdded { + label: label.clone(), + }, + format!("Label added: {label}"), + ), + "remove" => ( + TimelineEventType::LabelRemoved { + label: label.clone(), + }, + format!("Label removed: {label}"), + ), + _ => continue, + }; + + events.push(TimelineEvent { + timestamp: created_at, + entity_type: entity.entity_type.clone(), + entity_id: entity.entity_id, + entity_iid: entity.entity_iid, + project_path: entity.project_path.clone(), + event_type, + summary, + actor, + url: None, + is_seed, + }); + } + + Ok(()) +} + +/// Collect milestone add/remove events. +fn collect_milestone_events( + conn: &Connection, + entity: &EntityRef, + is_seed: bool, + events: &mut Vec, +) -> Result<()> { + let (id_col, id_val) = entity_id_column(entity); + + let sql = format!( + "SELECT action, milestone_title, actor_username, created_at FROM resource_milestone_events + WHERE {id_col} = ?1 + ORDER BY created_at ASC" + ); + + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map(rusqlite::params![id_val], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + row.get::<_, i64>(3)?, + )) + })?; + + for row_result in rows { + let (action, milestone_title, actor, created_at) = row_result?; + let milestone = milestone_title.unwrap_or_else(|| "[deleted milestone]".to_owned()); + + let (event_type, summary) = match action.as_str() { + "add" => ( + TimelineEventType::MilestoneSet { + milestone: milestone.clone(), + }, + format!("Milestone set: {milestone}"), + ), + "remove" => ( + TimelineEventType::MilestoneRemoved { + milestone: milestone.clone(), + }, + format!("Milestone removed: {milestone}"), + ), + _ => continue, + }; + + events.push(TimelineEvent { + timestamp: created_at, + entity_type: entity.entity_type.clone(), + entity_id: entity.entity_id, + entity_iid: entity.entity_iid, + project_path: entity.project_path.clone(), + event_type, + summary, + actor, + url: None, + is_seed, + }); + } + + Ok(()) +} + +/// Collect Merged event for MRs. Prefers merged_at from the MR table. +/// Falls back to resource_state_events WHERE state='merged' if merged_at is NULL. +fn collect_merged_event( + conn: &Connection, + entity: &EntityRef, + is_seed: bool, + events: &mut Vec, +) -> Result<()> { + if entity.entity_type != "merge_request" { + return Ok(()); + } + + // Try merged_at from merge_requests table first + let mr_result = conn.query_row( + "SELECT merged_at, merge_user_username, web_url FROM merge_requests WHERE id = ?1", + rusqlite::params![entity.entity_id], + |row| { + Ok(( + row.get::<_, Option>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + )) + }, + ); + + if let Ok((Some(merged_at), merge_user, url)) = mr_result { + events.push(TimelineEvent { + timestamp: merged_at, + entity_type: entity.entity_type.clone(), + entity_id: entity.entity_id, + entity_iid: entity.entity_iid, + project_path: entity.project_path.clone(), + event_type: TimelineEventType::Merged, + summary: format!("MR !{} merged", entity.entity_iid), + actor: merge_user, + url, + is_seed, + }); + return Ok(()); + } + + // Fallback: check resource_state_events for state='merged' + let fallback_result = conn.query_row( + "SELECT actor_username, created_at FROM resource_state_events + WHERE merge_request_id = ?1 AND state = 'merged' + ORDER BY created_at DESC LIMIT 1", + rusqlite::params![entity.entity_id], + |row| Ok((row.get::<_, Option>(0)?, row.get::<_, i64>(1)?)), + ); + + if let Ok((actor, created_at)) = fallback_result { + events.push(TimelineEvent { + timestamp: created_at, + entity_type: entity.entity_type.clone(), + entity_id: entity.entity_id, + entity_iid: entity.entity_iid, + project_path: entity.project_path.clone(), + event_type: TimelineEventType::Merged, + summary: format!("MR !{} merged", entity.entity_iid), + actor, + url: None, + is_seed, + }); + } + + Ok(()) +} + +/// Return the correct column name and value for querying resource event tables. +fn entity_id_column(entity: &EntityRef) -> (&'static str, i64) { + match entity.entity_type.as_str() { + "issue" => ("issue_id", entity.entity_id), + "merge_request" => ("merge_request_id", entity.entity_id), + _ => ("issue_id", entity.entity_id), // shouldn't happen + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::db::{create_connection, run_migrations}; + use std::path::Path; + + fn setup_test_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn + } + + fn insert_project(conn: &Connection) -> i64 { + conn.execute( + "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')", + [], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 { + conn.execute( + "INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, ?3, 'Auth bug', 'opened', 'alice', 1000, 2000, 3000, 'https://gitlab.com/group/project/-/issues/1')", + rusqlite::params![iid * 100, project_id, iid], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_mr(conn: &Connection, project_id: i64, iid: i64, merged_at: Option) -> i64 { + conn.execute( + "INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, merged_at, merge_user_username, web_url) VALUES (?1, ?2, ?3, 'Fix auth', 'merged', 'bob', 1000, 5000, 6000, ?4, 'charlie', 'https://gitlab.com/group/project/-/merge_requests/10')", + rusqlite::params![iid * 100, project_id, iid, merged_at], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn make_entity_ref(entity_type: &str, entity_id: i64, iid: i64) -> EntityRef { + EntityRef { + entity_type: entity_type.to_owned(), + entity_id, + entity_iid: iid, + project_path: "group/project".to_owned(), + } + } + + fn insert_state_event( + conn: &Connection, + project_id: i64, + issue_id: Option, + mr_id: Option, + state: &str, + created_at: i64, + ) { + let gitlab_id: i64 = rand::random::().into(); + conn.execute( + "INSERT INTO resource_state_events (gitlab_id, project_id, issue_id, merge_request_id, state, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, 'alice', ?6)", + rusqlite::params![gitlab_id, project_id, issue_id, mr_id, state, created_at], + ) + .unwrap(); + } + + fn insert_label_event( + conn: &Connection, + project_id: i64, + issue_id: Option, + mr_id: Option, + action: &str, + label_name: Option<&str>, + created_at: i64, + ) { + let gitlab_id: i64 = rand::random::().into(); + conn.execute( + "INSERT INTO resource_label_events (gitlab_id, project_id, issue_id, merge_request_id, action, label_name, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, 'alice', ?7)", + rusqlite::params![gitlab_id, project_id, issue_id, mr_id, action, label_name, created_at], + ) + .unwrap(); + } + + fn insert_milestone_event( + conn: &Connection, + project_id: i64, + issue_id: Option, + mr_id: Option, + action: &str, + milestone_title: Option<&str>, + created_at: i64, + ) { + let gitlab_id: i64 = rand::random::().into(); + conn.execute( + "INSERT INTO resource_milestone_events (gitlab_id, project_id, issue_id, merge_request_id, action, milestone_title, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, 'alice', ?7)", + rusqlite::params![gitlab_id, project_id, issue_id, mr_id, action, milestone_title, created_at], + ) + .unwrap(); + } + + #[test] + fn test_collect_creation_event() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + + let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap(); + assert_eq!(events.len(), 1); + assert!(matches!(events[0].event_type, TimelineEventType::Created)); + assert_eq!(events[0].timestamp, 1000); + assert_eq!(events[0].actor, Some("alice".to_owned())); + assert!(events[0].is_seed); + } + + #[test] + fn test_collect_state_events() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + + insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000); + insert_state_event(&conn, project_id, Some(issue_id), None, "reopened", 4000); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap(); + + // Created + 2 state changes = 3 + assert_eq!(events.len(), 3); + assert!(matches!(events[0].event_type, TimelineEventType::Created)); + assert!(matches!( + events[1].event_type, + TimelineEventType::StateChanged { ref state } if state == "closed" + )); + assert!(matches!( + events[2].event_type, + TimelineEventType::StateChanged { ref state } if state == "reopened" + )); + } + + #[test] + fn test_collect_merged_dedup() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let mr_id = insert_mr(&conn, project_id, 10, Some(5000)); + + // Also add a state event for 'merged' — this should NOT produce a StateChanged + insert_state_event(&conn, project_id, None, Some(mr_id), "merged", 5000); + + let seeds = vec![make_entity_ref("merge_request", mr_id, 10)]; + let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap(); + + // Should have Created + Merged (not Created + StateChanged{merged} + Merged) + let merged_count = events + .iter() + .filter(|e| matches!(e.event_type, TimelineEventType::Merged)) + .count(); + let state_merged_count = events + .iter() + .filter(|e| matches!(&e.event_type, TimelineEventType::StateChanged { state } if state == "merged")) + .count(); + + assert_eq!(merged_count, 1); + assert_eq!(state_merged_count, 0); + } + + #[test] + fn test_collect_null_label_fallback() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + + insert_label_event(&conn, project_id, Some(issue_id), None, "add", None, 2000); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap(); + + let label_event = events.iter().find(|e| { + matches!(&e.event_type, TimelineEventType::LabelAdded { label } if label == "[deleted label]") + }); + assert!(label_event.is_some()); + } + + #[test] + fn test_collect_null_milestone_fallback() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + + insert_milestone_event(&conn, project_id, Some(issue_id), None, "add", None, 2000); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap(); + + let ms_event = events.iter().find(|e| { + matches!(&e.event_type, TimelineEventType::MilestoneSet { milestone } if milestone == "[deleted milestone]") + }); + assert!(ms_event.is_some()); + } + + #[test] + fn test_collect_since_filter() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + + insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000); + insert_state_event(&conn, project_id, Some(issue_id), None, "reopened", 5000); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + + // Since 4000: should exclude Created (1000) and closed (3000) + let events = collect_events(&conn, &seeds, &[], &[], Some(4000), 100).unwrap(); + assert_eq!(events.len(), 1); + assert_eq!(events[0].timestamp, 5000); + } + + #[test] + fn test_collect_chronological_sort() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let mr_id = insert_mr(&conn, project_id, 10, Some(4000)); + + insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000); + insert_label_event( + &conn, + project_id, + None, + Some(mr_id), + "add", + Some("bug"), + 2000, + ); + + let seeds = vec![ + make_entity_ref("issue", issue_id, 1), + make_entity_ref("merge_request", mr_id, 10), + ]; + let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap(); + + // Verify chronological order + for window in events.windows(2) { + assert!(window[0].timestamp <= window[1].timestamp); + } + } + + #[test] + fn test_collect_respects_limit() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + + for i in 0..20 { + insert_state_event( + &conn, + project_id, + Some(issue_id), + None, + "closed", + 3000 + i * 100, + ); + } + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let events = collect_events(&conn, &seeds, &[], &[], None, 5).unwrap(); + assert_eq!(events.len(), 5); + } + + #[test] + fn test_collect_evidence_notes_included() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + + let evidence = vec![TimelineEvent { + timestamp: 2500, + entity_type: "issue".to_owned(), + entity_id: issue_id, + entity_iid: 1, + project_path: "group/project".to_owned(), + event_type: TimelineEventType::NoteEvidence { + note_id: 42, + snippet: "relevant note".to_owned(), + discussion_id: Some(1), + }, + summary: "Note by alice".to_owned(), + actor: Some("alice".to_owned()), + url: None, + is_seed: true, + }]; + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let events = collect_events(&conn, &seeds, &[], &evidence, None, 100).unwrap(); + + let note_event = events.iter().find(|e| { + matches!( + &e.event_type, + TimelineEventType::NoteEvidence { note_id, .. } if *note_id == 42 + ) + }); + assert!(note_event.is_some()); + } + + #[test] + fn test_collect_merged_fallback_to_state_event() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + // MR with merged_at = NULL + let mr_id = insert_mr(&conn, project_id, 10, None); + + // But has a state event for 'merged' + insert_state_event(&conn, project_id, None, Some(mr_id), "merged", 5000); + + let seeds = vec![make_entity_ref("merge_request", mr_id, 10)]; + let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap(); + + let merged = events + .iter() + .find(|e| matches!(e.event_type, TimelineEventType::Merged)); + assert!(merged.is_some()); + assert_eq!(merged.unwrap().timestamp, 5000); + } +} diff --git a/src/core/timeline_expand.rs b/src/core/timeline_expand.rs new file mode 100644 index 0000000..8e228d6 --- /dev/null +++ b/src/core/timeline_expand.rs @@ -0,0 +1,592 @@ +use std::collections::{HashSet, VecDeque}; + +use rusqlite::Connection; + +use crate::core::error::Result; +use crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef}; + +/// Result of the expand phase. +pub struct ExpandResult { + pub expanded_entities: Vec, + pub unresolved_references: Vec, +} + +/// Run the EXPAND phase of the timeline pipeline (BFS over entity_references). +/// +/// Starting from seed entities, traverses cross-references (both outgoing and incoming) +/// to discover related entities. Collects provenance (who referenced whom, how). +pub fn expand_timeline( + conn: &Connection, + seeds: &[EntityRef], + depth: u32, + include_mentions: bool, + max_entities: usize, +) -> Result { + if depth == 0 || seeds.is_empty() { + return Ok(ExpandResult { + expanded_entities: Vec::new(), + unresolved_references: Vec::new(), + }); + } + + let edge_types = if include_mentions { + vec!["closes", "related", "mentioned"] + } else { + vec!["closes", "related"] + }; + + let mut visited: HashSet<(String, i64)> = seeds + .iter() + .map(|s| (s.entity_type.clone(), s.entity_id)) + .collect(); + + let mut queue: VecDeque<(EntityRef, u32)> = seeds.iter().map(|s| (s.clone(), 0)).collect(); + + let mut expanded = Vec::new(); + let mut unresolved = Vec::new(); + + while let Some((current, current_depth)) = queue.pop_front() { + if expanded.len() >= max_entities { + break; + } + + let neighbors = find_neighbors(conn, ¤t, &edge_types)?; + + for neighbor in neighbors { + match neighbor { + Neighbor::Resolved { + entity_ref, + reference_type, + source_method, + } => { + let key = (entity_ref.entity_type.clone(), entity_ref.entity_id); + if !visited.insert(key) { + continue; + } + + expanded.push(ExpandedEntityRef { + entity_ref: entity_ref.clone(), + depth: current_depth + 1, + via_from: current.clone(), + via_reference_type: reference_type, + via_source_method: source_method, + }); + + if expanded.len() >= max_entities { + break; + } + + if current_depth + 1 < depth { + queue.push_back((entity_ref, current_depth + 1)); + } + } + Neighbor::Unresolved(unresolved_ref) => { + unresolved.push(unresolved_ref); + } + } + } + } + + Ok(ExpandResult { + expanded_entities: expanded, + unresolved_references: unresolved, + }) +} + +enum Neighbor { + Resolved { + entity_ref: EntityRef, + reference_type: String, + source_method: String, + }, + Unresolved(UnresolvedRef), +} + +/// Find all neighbors (outgoing + incoming) for an entity in entity_references. +fn find_neighbors( + conn: &Connection, + entity: &EntityRef, + edge_types: &[&str], +) -> Result> { + let mut neighbors = Vec::new(); + + find_outgoing(conn, entity, edge_types, &mut neighbors)?; + find_incoming(conn, entity, edge_types, &mut neighbors)?; + + Ok(neighbors) +} + +/// Find outgoing references: current entity is the source. +fn find_outgoing( + conn: &Connection, + entity: &EntityRef, + edge_types: &[&str], + neighbors: &mut Vec, +) -> Result<()> { + let placeholders: String = edge_types + .iter() + .enumerate() + .map(|(i, _)| format!("?{}", i + 3)) + .collect::>() + .join(", "); + + let sql = format!( + "SELECT target_entity_type, target_entity_id, target_project_path, target_entity_iid, + reference_type, source_method + FROM entity_references + WHERE source_entity_type = ?1 + AND source_entity_id = ?2 + AND reference_type IN ({placeholders})" + ); + + let mut params: Vec> = vec![ + Box::new(entity.entity_type.clone()), + Box::new(entity.entity_id), + ]; + for et in edge_types { + params.push(Box::new(et.to_string())); + } + + let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map(params_refs.as_slice(), |row| { + Ok(( + row.get::<_, String>(0)?, // target_entity_type + row.get::<_, Option>(1)?, // target_entity_id + row.get::<_, Option>(2)?, // target_project_path + row.get::<_, Option>(3)?, // target_entity_iid + row.get::<_, String>(4)?, // reference_type + row.get::<_, String>(5)?, // source_method + )) + })?; + + for row_result in rows { + let (target_type, target_id, target_project_path, target_iid, ref_type, source_method) = + row_result?; + + match target_id { + Some(tid) => { + if let Some(resolved) = resolve_entity_ref(conn, &target_type, tid)? { + neighbors.push(Neighbor::Resolved { + entity_ref: resolved, + reference_type: ref_type, + source_method, + }); + } + } + None => { + neighbors.push(Neighbor::Unresolved(UnresolvedRef { + source: entity.clone(), + target_project: target_project_path, + target_type, + target_iid: target_iid.unwrap_or(0), + reference_type: ref_type, + })); + } + } + } + + Ok(()) +} + +/// Find incoming references: current entity is the target. +fn find_incoming( + conn: &Connection, + entity: &EntityRef, + edge_types: &[&str], + neighbors: &mut Vec, +) -> Result<()> { + let placeholders: String = edge_types + .iter() + .enumerate() + .map(|(i, _)| format!("?{}", i + 3)) + .collect::>() + .join(", "); + + let sql = format!( + "SELECT source_entity_type, source_entity_id, reference_type, source_method + FROM entity_references + WHERE target_entity_type = ?1 + AND target_entity_id = ?2 + AND reference_type IN ({placeholders})" + ); + + let mut params: Vec> = vec![ + Box::new(entity.entity_type.clone()), + Box::new(entity.entity_id), + ]; + for et in edge_types { + params.push(Box::new(et.to_string())); + } + + let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map(params_refs.as_slice(), |row| { + Ok(( + row.get::<_, String>(0)?, // source_entity_type + row.get::<_, i64>(1)?, // source_entity_id + row.get::<_, String>(2)?, // reference_type + row.get::<_, String>(3)?, // source_method + )) + })?; + + for row_result in rows { + let (source_type, source_id, ref_type, source_method) = row_result?; + + if let Some(resolved) = resolve_entity_ref(conn, &source_type, source_id)? { + neighbors.push(Neighbor::Resolved { + entity_ref: resolved, + reference_type: ref_type, + source_method, + }); + } + } + + Ok(()) +} + +/// Resolve an entity ID to a full EntityRef with iid and project_path. +fn resolve_entity_ref( + conn: &Connection, + entity_type: &str, + entity_id: i64, +) -> Result> { + let table = match entity_type { + "issue" => "issues", + "merge_request" => "merge_requests", + _ => return Ok(None), + }; + + let sql = format!( + "SELECT e.iid, p.path_with_namespace + FROM {table} e + JOIN projects p ON p.id = e.project_id + WHERE e.id = ?1" + ); + + let result = conn.query_row(&sql, rusqlite::params![entity_id], |row| { + Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) + }); + + match result { + Ok((iid, project_path)) => Ok(Some(EntityRef { + entity_type: entity_type.to_owned(), + entity_id, + entity_iid: iid, + project_path, + })), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(e.into()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::db::{create_connection, run_migrations}; + use std::path::Path; + + fn setup_test_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn + } + + fn insert_project(conn: &Connection) -> i64 { + conn.execute( + "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')", + [], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 { + conn.execute( + "INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test', 'opened', 'alice', 1000, 2000, 3000)", + rusqlite::params![iid * 100, project_id, iid], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 { + conn.execute( + "INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)", + rusqlite::params![iid * 100, project_id, iid], + ) + .unwrap(); + conn.last_insert_rowid() + } + + #[allow(clippy::too_many_arguments)] + fn insert_ref( + conn: &Connection, + project_id: i64, + source_type: &str, + source_id: i64, + target_type: &str, + target_id: Option, + ref_type: &str, + source_method: &str, + ) { + conn.execute( + "INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, reference_type, source_method, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1000)", + rusqlite::params![project_id, source_type, source_id, target_type, target_id, ref_type, source_method], + ) + .unwrap(); + } + + fn make_entity_ref(entity_type: &str, entity_id: i64, iid: i64) -> EntityRef { + EntityRef { + entity_type: entity_type.to_owned(), + entity_id, + entity_iid: iid, + project_path: "group/project".to_owned(), + } + } + + #[test] + fn test_expand_depth_zero() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + + let result = expand_timeline(&conn, &seeds, 0, false, 100).unwrap(); + assert!(result.expanded_entities.is_empty()); + assert!(result.unresolved_references.is_empty()); + } + + #[test] + fn test_expand_finds_linked_entity() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let mr_id = insert_mr(&conn, project_id, 10); + + // MR closes issue + insert_ref( + &conn, + project_id, + "merge_request", + mr_id, + "issue", + Some(issue_id), + "closes", + "api", + ); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); + + assert_eq!(result.expanded_entities.len(), 1); + assert_eq!( + result.expanded_entities[0].entity_ref.entity_type, + "merge_request" + ); + assert_eq!(result.expanded_entities[0].entity_ref.entity_iid, 10); + assert_eq!(result.expanded_entities[0].depth, 1); + } + + #[test] + fn test_expand_bidirectional() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let mr_id = insert_mr(&conn, project_id, 10); + + // MR closes issue (MR is source, issue is target) + insert_ref( + &conn, + project_id, + "merge_request", + mr_id, + "issue", + Some(issue_id), + "closes", + "api", + ); + + // Starting from MR should find the issue (outgoing) + let seeds = vec![make_entity_ref("merge_request", mr_id, 10)]; + let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); + + assert_eq!(result.expanded_entities.len(), 1); + assert_eq!(result.expanded_entities[0].entity_ref.entity_type, "issue"); + } + + #[test] + fn test_expand_respects_max_entities() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + + // Create 10 MRs that all close this issue + for i in 2..=11 { + let mr_id = insert_mr(&conn, project_id, i); + insert_ref( + &conn, + project_id, + "merge_request", + mr_id, + "issue", + Some(issue_id), + "closes", + "api", + ); + } + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let result = expand_timeline(&conn, &seeds, 1, false, 3).unwrap(); + + assert!(result.expanded_entities.len() <= 3); + } + + #[test] + fn test_expand_skips_mentions_by_default() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let mr_id = insert_mr(&conn, project_id, 10); + + // MR mentions issue (should be skipped by default) + insert_ref( + &conn, + project_id, + "merge_request", + mr_id, + "issue", + Some(issue_id), + "mentioned", + "note_parse", + ); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); + assert!(result.expanded_entities.is_empty()); + } + + #[test] + fn test_expand_includes_mentions_when_flagged() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let mr_id = insert_mr(&conn, project_id, 10); + + // MR mentions issue + insert_ref( + &conn, + project_id, + "merge_request", + mr_id, + "issue", + Some(issue_id), + "mentioned", + "note_parse", + ); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let result = expand_timeline(&conn, &seeds, 1, true, 100).unwrap(); + assert_eq!(result.expanded_entities.len(), 1); + } + + #[test] + fn test_expand_collects_unresolved() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + + // Unresolved cross-project reference + conn.execute( + "INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method, created_at) VALUES (?1, 'issue', ?2, 'issue', NULL, 'other/repo', 42, 'closes', 'description_parse', 1000)", + rusqlite::params![project_id, issue_id], + ) + .unwrap(); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); + + assert!(result.expanded_entities.is_empty()); + assert_eq!(result.unresolved_references.len(), 1); + assert_eq!( + result.unresolved_references[0].target_project, + Some("other/repo".to_owned()) + ); + assert_eq!(result.unresolved_references[0].target_iid, 42); + } + + #[test] + fn test_expand_tracks_provenance() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let mr_id = insert_mr(&conn, project_id, 10); + + insert_ref( + &conn, + project_id, + "merge_request", + mr_id, + "issue", + Some(issue_id), + "closes", + "api", + ); + + let seeds = vec![make_entity_ref("issue", issue_id, 1)]; + let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); + + assert_eq!(result.expanded_entities.len(), 1); + let expanded = &result.expanded_entities[0]; + assert_eq!(expanded.via_reference_type, "closes"); + assert_eq!(expanded.via_source_method, "api"); + assert_eq!(expanded.via_from.entity_type, "issue"); + assert_eq!(expanded.via_from.entity_id, issue_id); + } + + #[test] + fn test_expand_no_duplicates() { + let conn = setup_test_db(); + let project_id = insert_project(&conn); + let issue_id = insert_issue(&conn, project_id, 1); + let mr_id = insert_mr(&conn, project_id, 10); + + // Two references from MR to same issue (different methods) + insert_ref( + &conn, + project_id, + "merge_request", + mr_id, + "issue", + Some(issue_id), + "closes", + "api", + ); + insert_ref( + &conn, + project_id, + "merge_request", + mr_id, + "issue", + Some(issue_id), + "related", + "note_parse", + ); + + let seeds = vec![make_entity_ref("merge_request", mr_id, 10)]; + let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); + + // Should only appear once (first-come wins) + assert_eq!(result.expanded_entities.len(), 1); + } + + #[test] + fn test_expand_empty_seeds() { + let conn = setup_test_db(); + let result = expand_timeline(&conn, &[], 1, false, 100).unwrap(); + assert!(result.expanded_entities.is_empty()); + } +} diff --git a/src/core/timeline_seed.rs b/src/core/timeline_seed.rs new file mode 100644 index 0000000..45f1907 --- /dev/null +++ b/src/core/timeline_seed.rs @@ -0,0 +1,573 @@ +use std::collections::HashSet; + +use rusqlite::Connection; + +use crate::core::error::Result; +use crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType}; +use crate::search::{FtsQueryMode, to_fts_query}; + +/// Result of the seed + hydrate phases. +pub struct SeedResult { + pub seed_entities: Vec, + pub evidence_notes: Vec, +} + +/// Run the SEED + HYDRATE phases of the timeline pipeline. +/// +/// 1. SEED: FTS5 keyword search over documents -> matched document IDs +/// 2. HYDRATE: Map document IDs -> source entities + top matched notes as evidence +/// +/// Discussion documents are resolved to their parent entity (issue or MR). +/// Entities are deduplicated. Evidence notes are capped at `max_evidence`. +pub fn seed_timeline( + conn: &Connection, + query: &str, + project_id: Option, + since_ms: Option, + max_seeds: usize, + max_evidence: usize, +) -> Result { + let fts_query = to_fts_query(query, FtsQueryMode::Safe); + if fts_query.is_empty() { + return Ok(SeedResult { + seed_entities: Vec::new(), + evidence_notes: Vec::new(), + }); + } + + let seed_entities = find_seed_entities(conn, &fts_query, project_id, since_ms, max_seeds)?; + let evidence_notes = find_evidence_notes(conn, &fts_query, project_id, since_ms, max_evidence)?; + + Ok(SeedResult { + seed_entities, + evidence_notes, + }) +} + +/// Find seed entities via FTS5 search, resolving discussions to their parent entity. +fn find_seed_entities( + conn: &Connection, + fts_query: &str, + project_id: Option, + since_ms: Option, + max_seeds: usize, +) -> Result> { + let sql = r" + SELECT d.source_type, d.source_id, d.project_id, + disc.issue_id, disc.merge_request_id + FROM documents_fts + JOIN documents d ON d.id = documents_fts.rowid + LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion' + WHERE documents_fts MATCH ?1 + AND (?2 IS NULL OR d.project_id = ?2) + AND (?3 IS NULL OR d.updated_at >= ?3) + ORDER BY rank + LIMIT ?4 + "; + + let mut stmt = conn.prepare(sql)?; + let rows = stmt.query_map( + rusqlite::params![fts_query, project_id, since_ms, (max_seeds * 3) as i64], + |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, i64>(1)?, + row.get::<_, i64>(2)?, + row.get::<_, Option>(3)?, + row.get::<_, Option>(4)?, + )) + }, + )?; + + let mut seen = HashSet::new(); + let mut entities = Vec::new(); + + for row_result in rows { + let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id) = row_result?; + + let (entity_type, entity_id) = match source_type.as_str() { + "issue" => ("issue".to_owned(), source_id), + "merge_request" => ("merge_request".to_owned(), source_id), + "discussion" => { + if let Some(issue_id) = disc_issue_id { + ("issue".to_owned(), issue_id) + } else if let Some(mr_id) = disc_mr_id { + ("merge_request".to_owned(), mr_id) + } else { + continue; // orphaned discussion + } + } + _ => continue, + }; + + let key = (entity_type.clone(), entity_id); + if !seen.insert(key) { + continue; + } + + if let Some(entity_ref) = resolve_entity(conn, &entity_type, entity_id, proj_id)? { + entities.push(entity_ref); + } + + if entities.len() >= max_seeds { + break; + } + } + + Ok(entities) +} + +/// Resolve an entity ID to a full EntityRef with iid and project_path. +fn resolve_entity( + conn: &Connection, + entity_type: &str, + entity_id: i64, + project_id: i64, +) -> Result> { + let (table, id_col) = match entity_type { + "issue" => ("issues", "id"), + "merge_request" => ("merge_requests", "id"), + _ => return Ok(None), + }; + + let sql = format!( + "SELECT e.iid, p.path_with_namespace + FROM {table} e + JOIN projects p ON p.id = e.project_id + WHERE e.{id_col} = ?1 AND e.project_id = ?2" + ); + + let result = conn.query_row(&sql, rusqlite::params![entity_id, project_id], |row| { + Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) + }); + + match result { + Ok((iid, project_path)) => Ok(Some(EntityRef { + entity_type: entity_type.to_owned(), + entity_id, + entity_iid: iid, + project_path, + })), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(e.into()), + } +} + +/// Find evidence notes: FTS5-matched discussion notes that provide context. +fn find_evidence_notes( + conn: &Connection, + fts_query: &str, + project_id: Option, + since_ms: Option, + max_evidence: usize, +) -> Result> { + let sql = r" + SELECT n.id AS note_id, n.body, n.created_at, n.author_username, + disc.id AS discussion_id, + CASE WHEN disc.issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END AS parent_type, + COALESCE(disc.issue_id, disc.merge_request_id) AS parent_entity_id, + d.project_id + FROM documents_fts + JOIN documents d ON d.id = documents_fts.rowid + JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion' + JOIN notes n ON n.discussion_id = disc.id AND n.is_system = 0 + WHERE documents_fts MATCH ?1 + AND (?2 IS NULL OR d.project_id = ?2) + AND (?3 IS NULL OR d.updated_at >= ?3) + ORDER BY rank + LIMIT ?4 + "; + + let mut stmt = conn.prepare(sql)?; + let rows = stmt.query_map( + rusqlite::params![fts_query, project_id, since_ms, max_evidence as i64], + |row| { + Ok(( + row.get::<_, i64>(0)?, // note_id + row.get::<_, Option>(1)?, // body + row.get::<_, i64>(2)?, // created_at + row.get::<_, Option>(3)?, // author + row.get::<_, i64>(4)?, // discussion_id + row.get::<_, String>(5)?, // parent_type + row.get::<_, i64>(6)?, // parent_entity_id + row.get::<_, i64>(7)?, // project_id + )) + }, + )?; + + let mut events = Vec::new(); + + for row_result in rows { + let ( + note_id, + body, + created_at, + author, + discussion_id, + parent_type, + parent_entity_id, + proj_id, + ) = row_result?; + + let snippet = truncate_to_chars(body.as_deref().unwrap_or(""), 200); + + let entity_ref = resolve_entity(conn, &parent_type, parent_entity_id, proj_id)?; + let (iid, project_path) = match entity_ref { + Some(ref e) => (e.entity_iid, e.project_path.clone()), + None => continue, + }; + + events.push(TimelineEvent { + timestamp: created_at, + entity_type: parent_type, + entity_id: parent_entity_id, + entity_iid: iid, + project_path, + event_type: TimelineEventType::NoteEvidence { + note_id, + snippet, + discussion_id: Some(discussion_id), + }, + summary: format!("Note by {}", author.as_deref().unwrap_or("unknown")), + actor: author, + url: None, + is_seed: true, + }); + } + + Ok(events) +} + +/// Truncate a string to at most `max_chars` characters on a safe UTF-8 boundary. +fn truncate_to_chars(s: &str, max_chars: usize) -> String { + let char_count = s.chars().count(); + if char_count <= max_chars { + return s.to_owned(); + } + + let byte_end = s + .char_indices() + .nth(max_chars) + .map(|(i, _)| i) + .unwrap_or(s.len()); + s[..byte_end].to_owned() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::db::{create_connection, run_migrations}; + use std::path::Path; + + fn setup_test_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn + } + + fn insert_test_project(conn: &Connection) -> i64 { + conn.execute( + "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')", + [], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_test_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 { + conn.execute( + "INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test issue', 'opened', 'alice', 1000, 2000, 3000)", + rusqlite::params![iid * 100, project_id, iid], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_test_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 { + conn.execute( + "INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)", + rusqlite::params![iid * 100, project_id, iid], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_document( + conn: &Connection, + source_type: &str, + source_id: i64, + project_id: i64, + content: &str, + ) -> i64 { + conn.execute( + "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![source_type, source_id, project_id, content, format!("hash_{source_id}")], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_discussion( + conn: &Connection, + project_id: i64, + issue_id: Option, + mr_id: Option, + ) -> i64 { + let noteable_type = if issue_id.is_some() { + "Issue" + } else { + "MergeRequest" + }; + conn.execute( + "INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, ?3, ?4, ?5, 0)", + rusqlite::params![format!("disc_{}", rand::random::()), project_id, issue_id, mr_id, noteable_type], + ) + .unwrap(); + conn.last_insert_rowid() + } + + fn insert_note( + conn: &Connection, + discussion_id: i64, + project_id: i64, + body: &str, + is_system: bool, + ) -> i64 { + let gitlab_id: i64 = rand::random::().into(); + conn.execute( + "INSERT INTO notes (gitlab_id, discussion_id, project_id, is_system, author_username, body, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, ?4, 'alice', ?5, 5000, 5000, 5000)", + rusqlite::params![gitlab_id, discussion_id, project_id, is_system as i32, body], + ) + .unwrap(); + conn.last_insert_rowid() + } + + #[test] + fn test_seed_empty_query_returns_empty() { + let conn = setup_test_db(); + let result = seed_timeline(&conn, "", None, None, 50, 10).unwrap(); + assert!(result.seed_entities.is_empty()); + assert!(result.evidence_notes.is_empty()); + } + + #[test] + fn test_seed_no_matches_returns_empty() { + let conn = setup_test_db(); + let project_id = insert_test_project(&conn); + let issue_id = insert_test_issue(&conn, project_id, 1); + insert_document( + &conn, + "issue", + issue_id, + project_id, + "unrelated content here", + ); + + let result = seed_timeline(&conn, "nonexistent_xyzzy_query", None, None, 50, 10).unwrap(); + assert!(result.seed_entities.is_empty()); + } + + #[test] + fn test_seed_finds_issue() { + let conn = setup_test_db(); + let project_id = insert_test_project(&conn); + let issue_id = insert_test_issue(&conn, project_id, 42); + insert_document( + &conn, + "issue", + issue_id, + project_id, + "authentication error in login flow", + ); + + let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap(); + assert_eq!(result.seed_entities.len(), 1); + assert_eq!(result.seed_entities[0].entity_type, "issue"); + assert_eq!(result.seed_entities[0].entity_iid, 42); + assert_eq!(result.seed_entities[0].project_path, "group/project"); + } + + #[test] + fn test_seed_finds_mr() { + let conn = setup_test_db(); + let project_id = insert_test_project(&conn); + let mr_id = insert_test_mr(&conn, project_id, 99); + insert_document( + &conn, + "merge_request", + mr_id, + project_id, + "fix authentication bug", + ); + + let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap(); + assert_eq!(result.seed_entities.len(), 1); + assert_eq!(result.seed_entities[0].entity_type, "merge_request"); + assert_eq!(result.seed_entities[0].entity_iid, 99); + } + + #[test] + fn test_seed_deduplicates_entities() { + let conn = setup_test_db(); + let project_id = insert_test_project(&conn); + let issue_id = insert_test_issue(&conn, project_id, 10); + + // Two documents referencing the same issue + insert_document( + &conn, + "issue", + issue_id, + project_id, + "authentication error first doc", + ); + let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None); + insert_document( + &conn, + "discussion", + disc_id, + project_id, + "authentication error second doc", + ); + + let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap(); + // Should deduplicate: both map to the same issue + assert_eq!(result.seed_entities.len(), 1); + assert_eq!(result.seed_entities[0].entity_iid, 10); + } + + #[test] + fn test_seed_resolves_discussion_to_parent() { + let conn = setup_test_db(); + let project_id = insert_test_project(&conn); + let issue_id = insert_test_issue(&conn, project_id, 7); + let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None); + insert_document( + &conn, + "discussion", + disc_id, + project_id, + "deployment pipeline failed", + ); + + let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap(); + assert_eq!(result.seed_entities.len(), 1); + assert_eq!(result.seed_entities[0].entity_type, "issue"); + assert_eq!(result.seed_entities[0].entity_iid, 7); + } + + #[test] + fn test_seed_evidence_capped() { + let conn = setup_test_db(); + let project_id = insert_test_project(&conn); + let issue_id = insert_test_issue(&conn, project_id, 1); + + // Create 15 discussion documents with notes about "deployment" + for i in 0..15 { + let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None); + insert_document( + &conn, + "discussion", + disc_id, + project_id, + &format!("deployment issue number {i}"), + ); + insert_note( + &conn, + disc_id, + project_id, + &format!("deployment note {i}"), + false, + ); + } + + let result = seed_timeline(&conn, "deployment", None, None, 50, 5).unwrap(); + assert!(result.evidence_notes.len() <= 5); + } + + #[test] + fn test_seed_evidence_snippet_truncated() { + let conn = setup_test_db(); + let project_id = insert_test_project(&conn); + let issue_id = insert_test_issue(&conn, project_id, 1); + let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None); + insert_document( + &conn, + "discussion", + disc_id, + project_id, + "deployment configuration", + ); + + let long_body = "x".repeat(500); + insert_note(&conn, disc_id, project_id, &long_body, false); + + let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap(); + assert!(!result.evidence_notes.is_empty()); + if let TimelineEventType::NoteEvidence { snippet, .. } = + &result.evidence_notes[0].event_type + { + assert!(snippet.chars().count() <= 200); + } else { + panic!("Expected NoteEvidence"); + } + } + + #[test] + fn test_seed_respects_project_filter() { + let conn = setup_test_db(); + let project_id = insert_test_project(&conn); + + // Insert a second project + conn.execute( + "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (2, 'other/repo', 'https://gitlab.com/other/repo')", + [], + ) + .unwrap(); + let project2_id = conn.last_insert_rowid(); + + let issue1_id = insert_test_issue(&conn, project_id, 1); + insert_document( + &conn, + "issue", + issue1_id, + project_id, + "authentication error", + ); + + let issue2_id = insert_test_issue(&conn, project2_id, 2); + insert_document( + &conn, + "issue", + issue2_id, + project2_id, + "authentication error", + ); + + // Filter to project 1 only + let result = + seed_timeline(&conn, "authentication", Some(project_id), None, 50, 10).unwrap(); + assert_eq!(result.seed_entities.len(), 1); + assert_eq!(result.seed_entities[0].project_path, "group/project"); + } + + #[test] + fn test_truncate_to_chars_short() { + assert_eq!(truncate_to_chars("hello", 200), "hello"); + } + + #[test] + fn test_truncate_to_chars_long() { + let long = "a".repeat(300); + let result = truncate_to_chars(&long, 200); + assert_eq!(result.chars().count(), 200); + } + + #[test] + fn test_truncate_to_chars_multibyte() { + let s = "\u{1F600}".repeat(300); // emoji + let result = truncate_to_chars(&s, 200); + assert_eq!(result.chars().count(), 200); + // Verify valid UTF-8 + assert!(std::str::from_utf8(result.as_bytes()).is_ok()); + } +}