From 128008578aaeeb1adcaabbab89bb918d1c498091 Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Tue, 3 Feb 2026 12:51:49 -0500 Subject: [PATCH] feat(events): Wire resource event fetching into sync pipeline (bd-1ep) Enqueue resource_events jobs for all issues/MRs after discussion sync, then drain the queue by fetching state/label/milestone events from GitLab API and storing them via transaction-based wrappers. Adds progress events, count tracking through orchestrator->ingest->sync result chain, and respects fetch_resource_events config flag. Includes clippy fixes across codebase from parallel agent work. Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 2 +- .beads/last-touched | 2 +- src/cli/commands/embed.rs | 7 +- src/cli/commands/generate_docs.rs | 13 +- src/cli/commands/ingest.rs | 50 +- src/cli/commands/mod.rs | 10 +- src/cli/commands/search.rs | 44 +- src/cli/commands/show.rs | 10 +- src/cli/commands/stats.rs | 76 +-- src/cli/commands/sync.rs | 92 +++- src/cli/mod.rs | 55 ++- src/core/backoff.rs | 5 +- src/core/db.rs | 20 +- src/core/dependent_queue.rs | 37 +- src/core/error.rs | 6 +- src/core/events_db.rs | 5 +- src/core/project.rs | 55 ++- src/documents/extractor.rs | 645 +++++++++++++++++++++----- src/documents/mod.rs | 10 +- src/documents/regenerator.rs | 66 ++- src/documents/truncation.rs | 11 +- src/embedding/chunking.rs | 17 +- src/embedding/mod.rs | 6 +- src/embedding/ollama.rs | 30 +- src/embedding/pipeline.rs | 37 +- src/gitlab/client.rs | 58 +-- src/gitlab/transformers/discussion.rs | 6 +- src/ingestion/dirty_tracker.rs | 72 ++- src/ingestion/discussion_queue.rs | 58 ++- src/ingestion/discussions.rs | 3 +- src/ingestion/issues.rs | 14 +- src/ingestion/merge_requests.rs | 2 +- src/ingestion/mod.rs | 2 +- src/ingestion/mr_discussions.rs | 12 +- src/ingestion/orchestrator.rs | 434 ++++++++++++++++- src/main.rs | 71 +-- src/search/filters.rs | 5 +- src/search/fts.rs | 14 +- src/search/hybrid.rs | 76 ++- src/search/mod.rs | 12 +- src/search/rrf.rs | 31 +- src/search/vector.rs | 13 +- tests/embedding.rs | 85 +++- tests/fts_search.rs | 197 ++++++-- tests/golden_query_tests.rs | 98 ++-- tests/hybrid_search.rs | 58 ++- tests/migration_tests.rs | 2 +- 47 files changed, 1981 insertions(+), 653 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 3b4cf17..2e373eb 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -8,7 +8,7 @@ {"id":"bd-18t","title":"Implement discussion truncation logic","description":"## Background\nDiscussion threads can contain dozens of notes spanning thousands of characters. The truncation module ensures discussion documents stay within a 32k character limit (suitable for embedding chunking) by dropping middle notes while preserving first and last notes for context. A separate hard safety cap of 2MB applies to ALL document types for pathological content (pasted logs, base64 blobs). Issue/MR documents are NOT truncated by the discussion logic — only the hard cap applies.\n\n## Approach\nCreate `src/documents/truncation.rs` per PRD Section 2.3:\n\n```rust\npub const MAX_DISCUSSION_CHARS: usize = 32_000;\npub const MAX_DOCUMENT_CHARS_HARD: usize = 2_000_000;\n\npub struct NoteContent {\n pub author: String,\n pub date: String,\n pub body: String,\n}\n\npub struct TruncationResult {\n pub content: String,\n pub is_truncated: bool,\n pub reason: Option,\n}\n\npub enum TruncationReason {\n TokenLimitMiddleDrop,\n SingleNoteOversized,\n FirstLastOversized,\n HardCapOversized,\n}\n```\n\n**Core functions:**\n- `truncate_discussion(notes: &[NoteContent], max_chars: usize) -> TruncationResult`\n- `truncate_utf8(s: &str, max_bytes: usize) -> &str` (shared with fts.rs)\n- `truncate_hard_cap(content: &str) -> TruncationResult` (for any doc type)\n\n**Algorithm for truncate_discussion:**\n1. Format all notes as `@author (date):\\nbody\\n\\n`\n2. If total <= max_chars: return as-is\n3. If single note: truncate at UTF-8 boundary, append `[truncated]`, reason = SingleNoteOversized\n4. Binary search: find max N where first N notes + last 1 note + marker fit within max_chars\n5. If first + last > max_chars: keep only first (truncated), reason = FirstLastOversized\n6. Otherwise: first N + marker + last M, reason = TokenLimitMiddleDrop\n\n**Marker format:** `\\n\\n[... N notes omitted for length ...]\\n\\n`\n\n## Acceptance Criteria\n- [ ] Discussion with total < 32k chars returns untruncated\n- [ ] Discussion > 32k chars: middle notes dropped, first + last preserved\n- [ ] Truncation marker shows correct count of omitted notes\n- [ ] Single note > 32k chars: truncated at UTF-8-safe boundary with `[truncated]` appended\n- [ ] First + last note > 32k: only first note kept (truncated if needed)\n- [ ] Hard cap (2MB) truncates any document type at UTF-8-safe boundary\n- [ ] `truncate_utf8` never panics on multi-byte codepoints (emoji, CJK, accented chars)\n- [ ] `TruncationReason::as_str()` returns DB-compatible strings matching CHECK constraint\n\n## Files\n- `src/documents/truncation.rs` — new file\n- `src/documents/mod.rs` — add `pub use truncation::{truncate_discussion, truncate_hard_cap, TruncationResult, NoteContent};`\n\n## TDD Loop\nRED: Tests in `#[cfg(test)] mod tests`:\n- `test_no_truncation_under_limit` — 3 short notes, all fit\n- `test_middle_notes_dropped` — 10 notes totaling > 32k, first+last preserved\n- `test_single_note_oversized` — one note of 50k chars, truncated safely\n- `test_first_last_oversized` — first=20k, last=20k, only first kept\n- `test_one_note_total` — single note under limit: no truncation\n- `test_utf8_boundary_safety` — content with emoji/CJK at truncation point\n- `test_hard_cap` — 3MB content truncated to 2MB\n- `test_marker_count_correct` — marker says \"[... 5 notes omitted ...]\" when 5 dropped\nGREEN: Implement truncation logic\nVERIFY: `cargo test truncation`\n\n## Edge Cases\n- Empty notes list: return empty content, not truncated\n- All notes are empty strings: total = 0, no truncation\n- Note body contains only multi-byte characters: truncate_utf8 walks backward to find safe boundary\n- Note body with trailing newlines: formatted output should not have excessive blank lines","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-30T15:25:45.597167Z","created_by":"tayloreernisse","updated_at":"2026-01-30T17:21:32.256569Z","closed_at":"2026-01-30T17:21:32.256507Z","close_reason":"Completed: truncate_discussion, truncate_hard_cap, truncate_utf8, TruncationReason with as_str(), 12 tests pass","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-18t","depends_on_id":"bd-36p","type":"blocks","created_at":"2026-01-30T15:29:15.947679Z","created_by":"tayloreernisse"}]} {"id":"bd-1cb","title":"[CP0] gi doctor command - health checks","description":"## Background\n\ndoctor is the primary diagnostic command. It checks all system components and reports their status. Supports JSON output for scripting and CI integration. Must degrade gracefully - warn about optional components (Ollama) without failing.\n\nReference: docs/prd/checkpoint-0.md section \"gi doctor\"\n\n## Approach\n\n**src/cli/commands/doctor.ts:**\n\nPerforms 5 checks:\n1. **Config**: Load and validate config file\n2. **Database**: Open DB, verify pragmas, check schema version\n3. **GitLab**: Auth with token, verify connectivity\n4. **Projects**: Count configured vs resolved in DB\n5. **Ollama**: Ping embedding endpoint (optional - warn if unavailable)\n\n**DoctorResult interface:**\n```typescript\ninterface DoctorResult {\n success: boolean; // All required checks passed\n checks: {\n config: { status: 'ok' | 'error'; path?: string; error?: string };\n database: { status: 'ok' | 'error'; path?: string; schemaVersion?: number; error?: string };\n gitlab: { status: 'ok' | 'error'; url?: string; username?: string; error?: string };\n projects: { status: 'ok' | 'error'; configured?: number; resolved?: number; error?: string };\n ollama: { status: 'ok' | 'warning' | 'error'; url?: string; model?: string; error?: string };\n };\n}\n```\n\n**Human-readable output (default):**\n```\ngi doctor\n\n Config ✓ Loaded from ~/.config/gi/config.json\n Database ✓ ~/.local/share/gi/data.db (schema v1)\n GitLab ✓ https://gitlab.example.com (authenticated as @johndoe)\n Projects ✓ 2 configured, 2 resolved\n Ollama ⚠ Not running (semantic search unavailable)\n\nStatus: Ready (lexical search available, semantic search requires Ollama)\n```\n\n**JSON output (--json flag):**\nOutputs DoctorResult as JSON to stdout\n\n## Acceptance Criteria\n\n- [ ] Config check: shows path and validation status\n- [ ] Database check: shows path, schema version, pragma verification\n- [ ] GitLab check: shows URL and authenticated username\n- [ ] Projects check: shows configured count and resolved count\n- [ ] Ollama check: warns if not running, doesn't fail overall\n- [ ] success=true only if config, database, gitlab, projects all ok\n- [ ] --json outputs valid JSON matching DoctorResult interface\n- [ ] Exit 0 if success=true, exit 1 if any required check fails\n- [ ] Colors and symbols in human output (✓, ⚠, ✗)\n\n## Files\n\nCREATE:\n- src/cli/commands/doctor.ts\n- src/types/doctor.ts (DoctorResult interface)\n\n## TDD Loop\n\nN/A - diagnostic command, verify with manual testing:\n\n```bash\n# All good\ngi doctor\n\n# JSON output\ngi doctor --json | jq .\n\n# With missing Ollama\n# (just don't run Ollama - should show warning)\n\n# With bad config\nmv ~/.config/gi/config.json ~/.config/gi/config.json.bak\ngi doctor # should show config error\n```\n\n## Edge Cases\n\n- Ollama timeout should be short (2s) - don't block on slow network\n- Ollama 404 (wrong model) vs connection refused (not running)\n- Database file exists but wrong schema version\n- Projects in config but not in database (init not run)\n- Token valid for user but project access revoked","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:51.435540Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:30:24.921206Z","closed_at":"2026-01-25T03:30:24.921041Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1cb","depends_on_id":"bd-13b","type":"blocks","created_at":"2026-01-24T16:13:10.427307Z","created_by":"tayloreernisse"},{"issue_id":"bd-1cb","depends_on_id":"bd-1l1","type":"blocks","created_at":"2026-01-24T16:13:10.478469Z","created_by":"tayloreernisse"},{"issue_id":"bd-1cb","depends_on_id":"bd-3ng","type":"blocks","created_at":"2026-01-24T16:13:10.461940Z","created_by":"tayloreernisse"},{"issue_id":"bd-1cb","depends_on_id":"bd-epj","type":"blocks","created_at":"2026-01-24T16:13:10.443612Z","created_by":"tayloreernisse"}]} {"id":"bd-1d5","title":"[CP1] GitLab client pagination methods","description":"Add async generator methods for paginated GitLab API calls.\n\nMethods to add to src/gitlab/client.ts:\n- paginateIssues(gitlabProjectId, updatedAfter?) → AsyncGenerator\n- paginateIssueDiscussions(gitlabProjectId, issueIid) → AsyncGenerator\n- requestWithHeaders(path) → { data: T, headers: Headers }\n\nImplementation:\n- Use scope=all, state=all for issues\n- Order by updated_at ASC\n- Follow X-Next-Page header until empty/absent\n- Apply cursor rewind (subtract cursorRewindSeconds) for tuple semantics\n- Fall back to empty-page detection if headers missing\n\nFiles: src/gitlab/client.ts\nTests: tests/unit/pagination.test.ts\nDone when: Pagination handles multiple pages and respects cursors","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T15:19:43.069869Z","created_by":"tayloreernisse","updated_at":"2026-01-25T15:21:35.156881Z","deleted_at":"2026-01-25T15:21:35.156877Z","deleted_by":"tayloreernisse","delete_reason":"delete","original_type":"task","compaction_level":0,"original_size":0} -{"id":"bd-1ep","title":"Wire resource event fetching into sync pipeline","description":"## Background\nAfter issue/MR primary ingestion and discussion fetch, changed entities need resource_events jobs enqueued and drained. This is the integration point that connects the queue (bd-tir), API client (bd-sqw), DB upserts (bd-1uc), and config flag (bd-2e8).\n\n## Approach\nModify the sync pipeline to add two new phases after discussion sync:\n\n**Phase 1 — Enqueue during ingestion:**\nIn src/ingestion/orchestrator.rs, after each entity upsert (issue or MR), call:\n```rust\nif config.sync.fetch_resource_events {\n enqueue_job(conn, project_id, \"issue\", iid, local_id, \"resource_events\", None)?;\n}\n// For MRs, also enqueue mr_closes_issues (always) and mr_diffs (when fetchMrFileChanges)\n```\n\nThe \"changed entity\" detection uses the existing dirty tracker: if an entity was inserted or updated during this sync run, it gets enqueued. On --full sync, all entities are enqueued.\n\n**Phase 2 — Drain dependent queue:**\nAdd a new drain step in src/cli/commands/sync.rs (or new src/core/drain.rs), called after discussion sync:\n```rust\npub async fn drain_dependent_queue(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n progress: Option,\n) -> Result\n```\n\nFlow:\n1. reclaim_stale_locks(conn, config.sync.stale_lock_minutes)\n2. Loop: claim_jobs(conn, \"resource_events\", batch_size=10)\n3. For each job:\n a. Fetch 3 event types via client (fetch_issue_state_events etc.)\n b. Store via upsert functions (upsert_state_events etc.)\n c. complete_job(conn, job.id) on success\n d. fail_job(conn, job.id, error_msg) on failure\n4. Report progress: \"Fetching resource events... [N/M]\"\n5. Repeat until no more claimable jobs\n\n**Progress reporting:**\nAdd new ProgressEvent variants:\n```rust\nResourceEventsFetchStart { total: usize },\nResourceEventsFetchProgress { completed: usize, total: usize },\nResourceEventsFetchComplete { fetched: usize, failed: usize },\n```\n\n## Acceptance Criteria\n- [ ] Full sync enqueues resource_events jobs for all issues and MRs\n- [ ] Incremental sync only enqueues for entities changed since last sync\n- [ ] --no-events prevents enqueueing resource_events jobs\n- [ ] Drain step fetches all 3 event types per entity\n- [ ] Successful fetches stored and job completed\n- [ ] Failed fetches recorded with error, job retried on next sync\n- [ ] Stale locks reclaimed at drain start\n- [ ] Progress displayed: \"Fetching resource events... [N/M]\"\n- [ ] Robot mode progress suppressed (quiet mode)\n\n## Files\n- src/ingestion/orchestrator.rs (add enqueue calls during upsert)\n- src/cli/commands/sync.rs (add drain step after discussions)\n- src/core/drain.rs (new, optional — or inline in sync.rs)\n\n## TDD Loop\nRED: tests/sync_pipeline_tests.rs (or extend existing):\n- `test_sync_enqueues_resource_events_for_changed_entities` - mock sync, verify jobs enqueued\n- `test_sync_no_events_flag_skips_enqueue` - verify no jobs when flag false\n- `test_drain_completes_jobs_on_success` - mock API responses, verify jobs deleted\n- `test_drain_fails_jobs_on_error` - mock API failure, verify job attempts incremented\n\nNote: Full pipeline integration tests may need mock HTTP server. Start with unit tests on enqueue/drain logic using the real DB with mock API responses.\n\nGREEN: Implement enqueue hooks + drain step\n\nVERIFY: `cargo test sync -- --nocapture && cargo build`\n\n## Edge Cases\n- Entity deleted between enqueue and drain: API returns 404, fail_job with \"entity not found\" (retry won't help but backoff caps it)\n- Rate limiting during drain: GitLabRateLimited error should fail_job with retry (transient)\n- Network error during drain: GitLabNetworkError should fail_job with retry\n- Multiple sync runs competing: locked_at prevents double-processing; stale lock reclaim handles crashes\n- Drain should have a max iterations guard to prevent infinite loop if jobs keep failing and being retried within the same run","status":"open","priority":2,"issue_type":"task","created_at":"2026-02-02T21:31:57.334527Z","created_by":"tayloreernisse","updated_at":"2026-02-02T21:43:04.365143Z","compaction_level":0,"original_size":0,"labels":["gate-1","phase-b","pipeline"],"dependencies":[{"issue_id":"bd-1ep","depends_on_id":"bd-1uc","type":"blocks","created_at":"2026-02-02T21:32:06.225837Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ep","depends_on_id":"bd-2e8","type":"blocks","created_at":"2026-02-02T21:32:06.142442Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ep","depends_on_id":"bd-2zl","type":"parent-child","created_at":"2026-02-02T21:31:57.335847Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ep","depends_on_id":"bd-sqw","type":"blocks","created_at":"2026-02-02T21:32:06.183287Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ep","depends_on_id":"bd-tir","type":"blocks","created_at":"2026-02-02T21:32:06.267800Z","created_by":"tayloreernisse"}]} +{"id":"bd-1ep","title":"Wire resource event fetching into sync pipeline","description":"## Background\nAfter issue/MR primary ingestion and discussion fetch, changed entities need resource_events jobs enqueued and drained. This is the integration point that connects the queue (bd-tir), API client (bd-sqw), DB upserts (bd-1uc), and config flag (bd-2e8).\n\n## Approach\nModify the sync pipeline to add two new phases after discussion sync:\n\n**Phase 1 — Enqueue during ingestion:**\nIn src/ingestion/orchestrator.rs, after each entity upsert (issue or MR), call:\n```rust\nif config.sync.fetch_resource_events {\n enqueue_job(conn, project_id, \"issue\", iid, local_id, \"resource_events\", None)?;\n}\n// For MRs, also enqueue mr_closes_issues (always) and mr_diffs (when fetchMrFileChanges)\n```\n\nThe \"changed entity\" detection uses the existing dirty tracker: if an entity was inserted or updated during this sync run, it gets enqueued. On --full sync, all entities are enqueued.\n\n**Phase 2 — Drain dependent queue:**\nAdd a new drain step in src/cli/commands/sync.rs (or new src/core/drain.rs), called after discussion sync:\n```rust\npub async fn drain_dependent_queue(\n conn: &Connection,\n client: &GitLabClient,\n config: &Config,\n progress: Option,\n) -> Result\n```\n\nFlow:\n1. reclaim_stale_locks(conn, config.sync.stale_lock_minutes)\n2. Loop: claim_jobs(conn, \"resource_events\", batch_size=10)\n3. For each job:\n a. Fetch 3 event types via client (fetch_issue_state_events etc.)\n b. Store via upsert functions (upsert_state_events etc.)\n c. complete_job(conn, job.id) on success\n d. fail_job(conn, job.id, error_msg) on failure\n4. Report progress: \"Fetching resource events... [N/M]\"\n5. Repeat until no more claimable jobs\n\n**Progress reporting:**\nAdd new ProgressEvent variants:\n```rust\nResourceEventsFetchStart { total: usize },\nResourceEventsFetchProgress { completed: usize, total: usize },\nResourceEventsFetchComplete { fetched: usize, failed: usize },\n```\n\n## Acceptance Criteria\n- [ ] Full sync enqueues resource_events jobs for all issues and MRs\n- [ ] Incremental sync only enqueues for entities changed since last sync\n- [ ] --no-events prevents enqueueing resource_events jobs\n- [ ] Drain step fetches all 3 event types per entity\n- [ ] Successful fetches stored and job completed\n- [ ] Failed fetches recorded with error, job retried on next sync\n- [ ] Stale locks reclaimed at drain start\n- [ ] Progress displayed: \"Fetching resource events... [N/M]\"\n- [ ] Robot mode progress suppressed (quiet mode)\n\n## Files\n- src/ingestion/orchestrator.rs (add enqueue calls during upsert)\n- src/cli/commands/sync.rs (add drain step after discussions)\n- src/core/drain.rs (new, optional — or inline in sync.rs)\n\n## TDD Loop\nRED: tests/sync_pipeline_tests.rs (or extend existing):\n- `test_sync_enqueues_resource_events_for_changed_entities` - mock sync, verify jobs enqueued\n- `test_sync_no_events_flag_skips_enqueue` - verify no jobs when flag false\n- `test_drain_completes_jobs_on_success` - mock API responses, verify jobs deleted\n- `test_drain_fails_jobs_on_error` - mock API failure, verify job attempts incremented\n\nNote: Full pipeline integration tests may need mock HTTP server. Start with unit tests on enqueue/drain logic using the real DB with mock API responses.\n\nGREEN: Implement enqueue hooks + drain step\n\nVERIFY: `cargo test sync -- --nocapture && cargo build`\n\n## Edge Cases\n- Entity deleted between enqueue and drain: API returns 404, fail_job with \"entity not found\" (retry won't help but backoff caps it)\n- Rate limiting during drain: GitLabRateLimited error should fail_job with retry (transient)\n- Network error during drain: GitLabNetworkError should fail_job with retry\n- Multiple sync runs competing: locked_at prevents double-processing; stale lock reclaim handles crashes\n- Drain should have a max iterations guard to prevent infinite loop if jobs keep failing and being retried within the same run","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-02T21:31:57.334527Z","created_by":"tayloreernisse","updated_at":"2026-02-03T17:46:51.336138Z","closed_at":"2026-02-03T17:46:51.336077Z","close_reason":"Implemented: enqueue + drain resource events in orchestrator, wired counts through ingest→sync pipeline, added progress events, 4 new tests, all 209 tests pass","compaction_level":0,"original_size":0,"labels":["gate-1","phase-b","pipeline"],"dependencies":[{"issue_id":"bd-1ep","depends_on_id":"bd-1uc","type":"blocks","created_at":"2026-02-02T21:32:06.225837Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ep","depends_on_id":"bd-2e8","type":"blocks","created_at":"2026-02-02T21:32:06.142442Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ep","depends_on_id":"bd-2zl","type":"parent-child","created_at":"2026-02-02T21:31:57.335847Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ep","depends_on_id":"bd-sqw","type":"blocks","created_at":"2026-02-02T21:32:06.183287Z","created_by":"tayloreernisse"},{"issue_id":"bd-1ep","depends_on_id":"bd-tir","type":"blocks","created_at":"2026-02-02T21:32:06.267800Z","created_by":"tayloreernisse"}]} {"id":"bd-1fn","title":"[CP1] Integration tests for discussion watermark","description":"Integration tests verifying discussion sync watermark behavior.\n\n## Tests (tests/discussion_watermark_tests.rs)\n\n- skips_discussion_fetch_when_updated_at_unchanged\n- fetches_discussions_when_updated_at_advanced\n- updates_watermark_after_successful_discussion_sync\n- does_not_update_watermark_on_discussion_sync_failure\n\n## Test Scenario\n1. Ingest issue with updated_at = T1\n2. Verify discussions_synced_for_updated_at = T1\n3. Re-run ingest with same issue (updated_at = T1)\n4. Verify NO discussion API calls made (watermark prevents)\n5. Simulate issue update (updated_at = T2)\n6. Re-run ingest\n7. Verify discussion API calls made for T2\n8. Verify watermark updated to T2\n\n## Why This Matters\nDiscussion API is expensive (1 call per issue). Watermark ensures\nwe only refetch when issue actually changed, even with cursor rewind.\n\nFiles: tests/discussion_watermark_tests.rs\nDone when: Watermark correctly prevents redundant discussion refetch","status":"tombstone","priority":3,"issue_type":"task","created_at":"2026-01-25T16:59:11.362495Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:02.086158Z","deleted_at":"2026-01-25T17:02:02.086154Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} {"id":"bd-1gu","title":"[CP0] gi auth-test command","description":"## Background\n\nauth-test is a quick diagnostic command to verify GitLab connectivity. Used for troubleshooting and CI pipelines. Simpler than doctor because it only checks auth, not full system health.\n\nReference: docs/prd/checkpoint-0.md section \"gi auth-test\"\n\n## Approach\n\n**src/cli/commands/auth-test.ts:**\n```typescript\nimport { Command } from 'commander';\nimport { loadConfig } from '../../core/config';\nimport { GitLabClient } from '../../gitlab/client';\nimport { TokenNotSetError } from '../../core/errors';\n\nexport const authTestCommand = new Command('auth-test')\n .description('Verify GitLab authentication')\n .action(async (options, command) => {\n const globalOpts = command.optsWithGlobals();\n \n // 1. Load config\n const config = loadConfig(globalOpts.config);\n \n // 2. Get token from environment\n const token = process.env[config.gitlab.tokenEnvVar];\n if (!token) {\n throw new TokenNotSetError(config.gitlab.tokenEnvVar);\n }\n \n // 3. Create client and test auth\n const client = new GitLabClient({\n baseUrl: config.gitlab.baseUrl,\n token,\n });\n \n // 4. Get current user\n const user = await client.getCurrentUser();\n \n // 5. Output success\n console.log(`Authenticated as @${user.username} (${user.name})`);\n console.log(`GitLab: ${config.gitlab.baseUrl}`);\n });\n```\n\n**Output format:**\n```\nAuthenticated as @johndoe (John Doe)\nGitLab: https://gitlab.example.com\n```\n\n## Acceptance Criteria\n\n- [ ] Loads config from default or --config path\n- [ ] Gets token from configured env var (default GITLAB_TOKEN)\n- [ ] Throws TokenNotSetError if env var not set\n- [ ] Calls GET /api/v4/user to verify auth\n- [ ] Prints username and display name on success\n- [ ] Exit 0 on success\n- [ ] Exit 1 on auth failure (GitLabAuthError)\n- [ ] Exit 1 if config not found (ConfigNotFoundError)\n\n## Files\n\nCREATE:\n- src/cli/commands/auth-test.ts\n\n## TDD Loop\n\nN/A - simple command, verify manually and with integration test in init.test.ts\n\n```bash\n# Manual verification\nexport GITLAB_TOKEN=\"valid-token\"\ngi auth-test\n\n# With invalid token\nexport GITLAB_TOKEN=\"invalid\"\ngi auth-test # should exit 1\n```\n\n## Edge Cases\n\n- Config exists but token env var not set - clear error message\n- Token exists but wrong scopes - GitLabAuthError (401)\n- Network unreachable - GitLabNetworkError\n- Token with extra whitespace - should trim","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-24T16:09:51.135580Z","created_by":"tayloreernisse","updated_at":"2026-01-25T03:28:16.369542Z","closed_at":"2026-01-25T03:28:16.369481Z","close_reason":"done","compaction_level":0,"original_size":0,"dependencies":[{"issue_id":"bd-1gu","depends_on_id":"bd-13b","type":"blocks","created_at":"2026-01-24T16:13:10.058655Z","created_by":"tayloreernisse"},{"issue_id":"bd-1gu","depends_on_id":"bd-1l1","type":"blocks","created_at":"2026-01-24T16:13:10.077581Z","created_by":"tayloreernisse"}]} {"id":"bd-1hj","title":"[CP1] Ingestion orchestrator","description":"Coordinate issue + dependent discussion sync with bounded concurrency.\n\n## Module\nsrc/ingestion/orchestrator.rs\n\n## Canonical Pattern (CP1)\n\nWhen gi ingest --type=issues runs:\n\n1. **Ingest issues** - cursor-based with incremental cursor updates per page\n2. **Collect touched issues** - record IssueForDiscussionSync for each issue passing cursor filter\n3. **Filter for discussion sync** - enqueue issues where:\n issue.updated_at > issues.discussions_synced_for_updated_at\n4. **Execute discussion sync** - with bounded concurrency (dependent_concurrency from config)\n5. **Update watermark** - after each issue's discussions successfully ingested\n\n## Concurrency Notes\n\nRuntime decision: Use single-threaded Tokio runtime (flavor = \"current_thread\")\n- rusqlite::Connection is !Send, conflicts with multi-threaded runtimes\n- Single-threaded avoids Send bounds entirely\n- Use tokio::task::spawn_local + LocalSet for concurrent discussion fetches\n- Keeps code simple; can upgrade to channel-based DB writer in CP2 if needed\n\n## Configuration Used\n- config.sync.dependent_concurrency - limits parallel discussion requests\n- config.sync.cursor_rewind_seconds - safety margin for cursor\n\n## Progress Reporting\n- Show total issues fetched\n- Show issues needing discussion sync\n- Show discussion/note counts per project\n\nFiles: src/ingestion/orchestrator.rs\nTests: Integration tests with mocked GitLab\nDone when: Full issue + discussion ingestion orchestrated correctly","status":"tombstone","priority":2,"issue_type":"task","created_at":"2026-01-25T16:57:57.325679Z","created_by":"tayloreernisse","updated_at":"2026-01-25T17:02:01.851047Z","deleted_at":"2026-01-25T17:02:01.851043Z","deleted_by":"tayloreernisse","delete_reason":"recreating with correct deps","original_type":"task","compaction_level":0,"original_size":0} diff --git a/.beads/last-touched b/.beads/last-touched index 83946d7..e022aee 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -bd-1m8 +bd-1ep diff --git a/src/cli/commands/embed.rs b/src/cli/commands/embed.rs index 32ccfd5..330c70d 100644 --- a/src/cli/commands/embed.rs +++ b/src/cli/commands/embed.rs @@ -3,12 +3,12 @@ use console::style; use serde::Serialize; +use crate::Config; use crate::core::db::create_connection; use crate::core::error::Result; use crate::core::paths::get_db_path; use crate::embedding::ollama::{OllamaClient, OllamaConfig}; use crate::embedding::pipeline::embed_documents; -use crate::Config; /// Result of the embed command. #[derive(Debug, Default, Serialize)] @@ -69,10 +69,7 @@ pub async fn run_embed( /// Print human-readable output. pub fn print_embed(result: &EmbedCommandResult) { - println!( - "{} Embedding complete", - style("done").green().bold(), - ); + println!("{} Embedding complete", style("done").green().bold(),); println!(" Embedded: {}", result.embedded); if result.failed > 0 { println!(" Failed: {}", style(result.failed).red()); diff --git a/src/cli/commands/generate_docs.rs b/src/cli/commands/generate_docs.rs index eb6aa3a..166dc27 100644 --- a/src/cli/commands/generate_docs.rs +++ b/src/cli/commands/generate_docs.rs @@ -5,12 +5,12 @@ use rusqlite::Connection; use serde::Serialize; use tracing::info; +use crate::Config; use crate::core::db::create_connection; use crate::core::error::Result; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::documents::{SourceType, regenerate_dirty_documents}; -use crate::Config; const FULL_MODE_CHUNK_SIZE: i64 = 2000; @@ -134,7 +134,11 @@ fn seed_dirty( /// Print human-readable output. pub fn print_generate_docs(result: &GenerateDocsResult) { - let mode = if result.full_mode { "full" } else { "incremental" }; + let mode = if result.full_mode { + "full" + } else { + "incremental" + }; println!( "{} Document generation complete ({})", style("done").green().bold(), @@ -147,10 +151,7 @@ pub fn print_generate_docs(result: &GenerateDocsResult) { println!(" Regenerated: {}", result.regenerated); println!(" Unchanged: {}", result.unchanged); if result.errored > 0 { - println!( - " Errored: {}", - style(result.errored).red() - ); + println!(" Errored: {}", style(result.errored).red()); } } diff --git a/src/cli/commands/ingest.rs b/src/cli/commands/ingest.rs index 1c9cd0f..02a0296 100644 --- a/src/cli/commands/ingest.rs +++ b/src/cli/commands/ingest.rs @@ -39,6 +39,9 @@ pub struct IngestResult { pub labels_created: usize, pub discussions_fetched: usize, pub notes_upserted: usize, + // Resource events + pub resource_events_fetched: usize, + pub resource_events_failed: usize, } /// Controls what interactive UI elements `run_ingest` displays. @@ -57,17 +60,26 @@ pub struct IngestDisplay { impl IngestDisplay { /// Interactive mode: everything visible. pub fn interactive() -> Self { - Self { show_progress: true, show_text: true } + Self { + show_progress: true, + show_text: true, + } } /// Robot/JSON mode: everything hidden. pub fn silent() -> Self { - Self { show_progress: false, show_text: false } + Self { + show_progress: false, + show_text: false, + } } /// Progress only (used by sync in interactive mode). pub fn progress_only() -> Self { - Self { show_progress: true, show_text: false } + Self { + show_progress: true, + show_text: false, + } } } @@ -105,9 +117,10 @@ pub async fn run_ingest( lock.acquire(force)?; // Get token from environment - let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet { - env_var: config.gitlab.token_env_var.clone(), - })?; + let token = + std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet { + env_var: config.gitlab.token_env_var.clone(), + })?; // Create GitLab client let client = GitLabClient::new(&config.gitlab.base_url, &token, None); @@ -199,7 +212,9 @@ pub async fn run_ingest( let b = ProgressBar::new(0); b.set_style( ProgressStyle::default_bar() - .template(" {spinner:.blue} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}") + .template( + " {spinner:.blue} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}", + ) .unwrap() .progress_chars("=> "), ); @@ -237,6 +252,23 @@ pub async fn run_ingest( ProgressEvent::MrDiscussionSyncComplete => { disc_bar_clone.finish_and_clear(); } + ProgressEvent::ResourceEventsFetchStarted { total } => { + disc_bar_clone.set_length(total as u64); + disc_bar_clone.set_position(0); + disc_bar_clone.set_style( + ProgressStyle::default_bar() + .template(" {spinner:.blue} Fetching resource events [{bar:30.cyan/dim}] {pos}/{len}") + .unwrap() + .progress_chars("=> "), + ); + disc_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100)); + } + ProgressEvent::ResourceEventFetched { current, total: _ } => { + disc_bar_clone.set_position(current as u64); + } + ProgressEvent::ResourceEventsFetchComplete { .. } => { + disc_bar_clone.finish_and_clear(); + } _ => {} }) }; @@ -269,6 +301,8 @@ pub async fn run_ingest( total.notes_upserted += result.notes_upserted; total.issues_synced_discussions += result.issues_synced_discussions; total.issues_skipped_discussion_sync += result.issues_skipped_discussion_sync; + total.resource_events_fetched += result.resource_events_fetched; + total.resource_events_failed += result.resource_events_failed; } else { let result = ingest_project_merge_requests_with_progress( &conn, @@ -301,6 +335,8 @@ pub async fn run_ingest( total.diffnotes_count += result.diffnotes_count; total.mrs_synced_discussions += result.mrs_synced_discussions; total.mrs_skipped_discussion_sync += result.mrs_skipped_discussion_sync; + total.resource_events_fetched += result.resource_events_fetched; + total.resource_events_failed += result.resource_events_failed; } } diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index 98ba1a4..8bb3735 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -22,19 +22,19 @@ pub use count::{ pub use doctor::{print_doctor_results, run_doctor}; pub use embed::{print_embed, print_embed_json, run_embed}; pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs}; -pub use stats::{print_stats, print_stats_json, run_stats}; -pub use search::{ - print_search_results, print_search_results_json, run_search, SearchCliFilters, SearchResponse, -}; pub use ingest::{IngestDisplay, print_ingest_summary, print_ingest_summary_json, run_ingest}; pub use init::{InitInputs, InitOptions, InitResult, run_init}; pub use list::{ ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs, }; -pub use sync::{print_sync, print_sync_json, run_sync, SyncOptions, SyncResult}; +pub use search::{ + SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search, +}; pub use show::{ print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue, run_show_mr, }; +pub use stats::{print_stats, print_stats_json, run_stats}; +pub use sync::{SyncOptions, SyncResult, print_sync, print_sync_json, run_sync}; pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status}; diff --git a/src/cli/commands/search.rs b/src/cli/commands/search.rs index 2eb69c7..63004d5 100644 --- a/src/cli/commands/search.rs +++ b/src/cli/commands/search.rs @@ -3,6 +3,7 @@ use console::style; use serde::Serialize; +use crate::Config; use crate::core::db::create_connection; use crate::core::error::{LoreError, Result}; use crate::core::paths::get_db_path; @@ -10,10 +11,9 @@ use crate::core::project::resolve_project; use crate::core::time::{ms_to_iso, parse_since}; use crate::documents::SourceType; use crate::search::{ - apply_filters, get_result_snippet, rank_rrf, search_fts, FtsQueryMode, PathFilter, - SearchFilters, + FtsQueryMode, PathFilter, SearchFilters, apply_filters, get_result_snippet, rank_rrf, + search_fts, }; -use crate::Config; /// Display-ready search result with all fields hydrated. #[derive(Debug, Serialize)] @@ -86,9 +86,7 @@ pub fn run_search( mode: "lexical".to_string(), total_results: 0, results: vec![], - warnings: vec![ - "No documents indexed. Run 'lore generate-docs' first.".to_string() - ], + warnings: vec!["No documents indexed. Run 'lore generate-docs' first.".to_string()], }); } @@ -151,9 +149,9 @@ pub fn run_search( // Adaptive recall: wider initial fetch when filters applied let requested = filters.clamp_limit(); let top_k = if filters.has_any_filter() { - (requested * 50).max(200).min(1500) + (requested * 50).clamp(200, 1500) } else { - (requested * 10).max(50).min(1500) + (requested * 10).clamp(50, 1500) }; // FTS search @@ -190,10 +188,8 @@ pub fn run_search( let hydrated = hydrate_results(&conn, &filtered_ids)?; // Build display results preserving filter order - let rrf_map: std::collections::HashMap = ranked - .iter() - .map(|r| (r.document_id, r)) - .collect(); + let rrf_map: std::collections::HashMap = + ranked.iter().map(|r| (r.document_id, r)).collect(); let mut results: Vec = Vec::with_capacity(hydrated.len()); for row in &hydrated { @@ -256,16 +252,13 @@ struct HydratedRow { /// /// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key. /// Labels and paths fetched via correlated json_group_array subqueries. -fn hydrate_results( - conn: &rusqlite::Connection, - document_ids: &[i64], -) -> Result> { +fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result> { if document_ids.is_empty() { return Ok(Vec::new()); } - let ids_json = serde_json::to_string(document_ids) - .map_err(|e| LoreError::Other(e.to_string()))?; + let ids_json = + serde_json::to_string(document_ids).map_err(|e| LoreError::Other(e.to_string()))?; let sql = r#" SELECT d.id, d.source_type, d.title, d.url, d.author_username, @@ -325,10 +318,7 @@ pub fn print_search_results(response: &SearchResponse) { } if response.results.is_empty() { - println!( - "No results found for '{}'", - style(&response.query).bold() - ); + println!("No results found for '{}'", style(&response.query).bold()); return; } @@ -371,17 +361,11 @@ pub fn print_search_results(response: &SearchResponse) { ); if !result.labels.is_empty() { - println!( - " Labels: {}", - result.labels.join(", ") - ); + println!(" Labels: {}", result.labels.join(", ")); } // Strip HTML tags from snippet for terminal display - let clean_snippet = result - .snippet - .replace("", "") - .replace("", ""); + let clean_snippet = result.snippet.replace("", "").replace("", ""); println!(" {}", style(clean_snippet).dim()); if let Some(ref explain) = result.explain { diff --git a/src/cli/commands/show.rs b/src/cli/commands/show.rs index bbdff2b..8855b95 100644 --- a/src/cli/commands/show.rs +++ b/src/cli/commands/show.rs @@ -154,10 +154,7 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu FROM issues i JOIN projects p ON i.project_id = p.id WHERE i.iid = ? AND i.project_id = ?", - vec![ - Box::new(iid), - Box::new(project_id), - ], + vec![Box::new(iid), Box::new(project_id)], ) } None => ( @@ -346,10 +343,7 @@ fn find_mr(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Result< FROM merge_requests m JOIN projects p ON m.project_id = p.id WHERE m.iid = ? AND m.project_id = ?", - vec![ - Box::new(iid), - Box::new(project_id), - ], + vec![Box::new(iid), Box::new(project_id)], ) } None => ( diff --git a/src/cli/commands/stats.rs b/src/cli/commands/stats.rs index d16c1bc..2303ad4 100644 --- a/src/cli/commands/stats.rs +++ b/src/cli/commands/stats.rs @@ -4,10 +4,10 @@ use console::style; use rusqlite::Connection; use serde::Serialize; +use crate::Config; use crate::core::db::create_connection; use crate::core::error::Result; use crate::core::paths::get_db_path; -use crate::Config; /// Result of the stats command. #[derive(Debug, Default, Serialize)] @@ -75,11 +75,7 @@ pub struct RepairResult { } /// Run the stats command. -pub fn run_stats( - config: &Config, - check: bool, - repair: bool, -) -> Result { +pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result { let db_path = get_db_path(config.storage.db_path.as_deref()); let conn = create_connection(&db_path)?; @@ -87,14 +83,22 @@ pub fn run_stats( // Document counts result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?; - result.documents.issues = - count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'")?; - result.documents.merge_requests = - count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'")?; - result.documents.discussions = - count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'")?; - result.documents.truncated = - count_query(&conn, "SELECT COUNT(*) FROM documents WHERE is_truncated = 1")?; + result.documents.issues = count_query( + &conn, + "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'", + )?; + result.documents.merge_requests = count_query( + &conn, + "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'", + )?; + result.documents.discussions = count_query( + &conn, + "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'", + )?; + result.documents.truncated = count_query( + &conn, + "SELECT COUNT(*) FROM documents WHERE is_truncated = 1", + )?; // Embedding stats — skip gracefully if table doesn't exist (Gate A only) if table_exists(&conn, "embedding_metadata") { @@ -119,10 +123,14 @@ pub fn run_stats( result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?; // Queue stats - result.queues.dirty_sources = - count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL")?; - result.queues.dirty_sources_failed = - count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL")?; + result.queues.dirty_sources = count_query( + &conn, + "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL", + )?; + result.queues.dirty_sources_failed = count_query( + &conn, + "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL", + )?; if table_exists(&conn, "pending_discussion_fetches") { result.queues.pending_discussion_fetches = count_query( @@ -151,6 +159,7 @@ pub fn run_stats( } // Integrity check + #[allow(clippy::field_reassign_with_default)] if check { let mut integrity = IntegrityResult::default(); @@ -276,9 +285,7 @@ pub fn run_stats( } fn count_query(conn: &Connection, sql: &str) -> Result { - let count: i64 = conn - .query_row(sql, [], |row| row.get(0)) - .unwrap_or(0); + let count: i64 = conn.query_row(sql, [], |row| row.get(0)).unwrap_or(0); Ok(count) } @@ -300,7 +307,10 @@ pub fn print_stats(result: &StatsResult) { println!(" Merge Requests: {}", result.documents.merge_requests); println!(" Discussions: {}", result.documents.discussions); if result.documents.truncated > 0 { - println!(" Truncated: {}", style(result.documents.truncated).yellow()); + println!( + " Truncated: {}", + style(result.documents.truncated).yellow() + ); } println!(); @@ -318,13 +328,13 @@ pub fn print_stats(result: &StatsResult) { println!(); println!("{}", style("Queues").cyan().bold()); - println!(" Dirty sources: {} pending, {} failed", - result.queues.dirty_sources, - result.queues.dirty_sources_failed + println!( + " Dirty sources: {} pending, {} failed", + result.queues.dirty_sources, result.queues.dirty_sources_failed ); - println!(" Discussion fetch: {} pending, {} failed", - result.queues.pending_discussion_fetches, - result.queues.pending_discussion_fetches_failed + println!( + " Discussion fetch: {} pending, {} failed", + result.queues.pending_discussion_fetches, result.queues.pending_discussion_fetches_failed ); if result.queues.pending_dependent_fetches > 0 || result.queues.pending_dependent_fetches_failed > 0 @@ -431,10 +441,12 @@ pub fn print_stats_json(result: &StatsResult) { let output = StatsJsonOutput { ok: true, data: StatsResult { - documents: DocumentStats { ..*&result.documents }, - embeddings: EmbeddingStats { ..*&result.embeddings }, - fts: FtsStats { ..*&result.fts }, - queues: QueueStats { ..*&result.queues }, + documents: DocumentStats { ..result.documents }, + embeddings: EmbeddingStats { + ..result.embeddings + }, + fts: FtsStats { ..result.fts }, + queues: QueueStats { ..result.queues }, integrity: result.integrity.as_ref().map(|i| IntegrityResult { ok: i.ok, fts_doc_mismatch: i.fts_doc_mismatch, diff --git a/src/cli/commands/sync.rs b/src/cli/commands/sync.rs index 1bd0a85..d42bcf3 100644 --- a/src/cli/commands/sync.rs +++ b/src/cli/commands/sync.rs @@ -29,6 +29,8 @@ pub struct SyncResult { pub issues_updated: usize, pub mrs_updated: usize, pub discussions_fetched: usize, + pub resource_events_fetched: usize, + pub resource_events_failed: usize, pub documents_regenerated: usize, pub documents_embedded: usize, } @@ -70,26 +72,61 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result Result { @@ -112,11 +154,7 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result Result Result 0 || result.resource_events_failed > 0 { + println!( + " Resource events fetched: {}", + result.resource_events_fetched + ); + if result.resource_events_failed > 0 { + println!( + " Resource events failed: {}", + result.resource_events_failed + ); + } + } + println!( + " Documents regenerated: {}", + result.documents_regenerated + ); + println!(" Documents embedded: {}", result.documents_embedded); + println!(" Elapsed: {:.1}s", elapsed.as_secs_f64()); } /// JSON output for sync. diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 8186d72..5e6e416 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -213,7 +213,12 @@ pub struct IssuesArgs { pub iid: Option, /// Maximum results - #[arg(short = 'n', long = "limit", default_value = "50", help_heading = "Output")] + #[arg( + short = 'n', + long = "limit", + default_value = "50", + help_heading = "Output" + )] pub limit: usize, /// Filter by state (opened, closed, all) @@ -249,7 +254,11 @@ pub struct IssuesArgs { pub due_before: Option, /// Show only issues with a due date - #[arg(long = "has-due", help_heading = "Filters", overrides_with = "no_has_due")] + #[arg( + long = "has-due", + help_heading = "Filters", + overrides_with = "no_has_due" + )] pub has_due: bool, #[arg(long = "no-has-due", hide = true, overrides_with = "has_due")] @@ -267,7 +276,12 @@ pub struct IssuesArgs { pub no_asc: bool, /// Open first matching item in browser - #[arg(short = 'o', long, help_heading = "Actions", overrides_with = "no_open")] + #[arg( + short = 'o', + long, + help_heading = "Actions", + overrides_with = "no_open" + )] pub open: bool, #[arg(long = "no-open", hide = true, overrides_with = "open")] @@ -281,7 +295,12 @@ pub struct MrsArgs { pub iid: Option, /// Maximum results - #[arg(short = 'n', long = "limit", default_value = "50", help_heading = "Output")] + #[arg( + short = 'n', + long = "limit", + default_value = "50", + help_heading = "Output" + )] pub limit: usize, /// Filter by state (opened, merged, closed, locked, all) @@ -313,11 +332,21 @@ pub struct MrsArgs { pub since: Option, /// Show only draft MRs - #[arg(short = 'd', long, conflicts_with = "no_draft", help_heading = "Filters")] + #[arg( + short = 'd', + long, + conflicts_with = "no_draft", + help_heading = "Filters" + )] pub draft: bool, /// Exclude draft MRs - #[arg(short = 'D', long = "no-draft", conflicts_with = "draft", help_heading = "Filters")] + #[arg( + short = 'D', + long = "no-draft", + conflicts_with = "draft", + help_heading = "Filters" + )] pub no_draft: bool, /// Filter by target branch @@ -340,7 +369,12 @@ pub struct MrsArgs { pub no_asc: bool, /// Open first matching item in browser - #[arg(short = 'o', long, help_heading = "Actions", overrides_with = "no_open")] + #[arg( + short = 'o', + long, + help_heading = "Actions", + overrides_with = "no_open" + )] pub open: bool, #[arg(long = "no-open", hide = true, overrides_with = "open")] @@ -427,7 +461,12 @@ pub struct SearchArgs { pub updated_after: Option, /// Maximum results (default 20, max 100) - #[arg(short = 'n', long = "limit", default_value = "20", help_heading = "Output")] + #[arg( + short = 'n', + long = "limit", + default_value = "20", + help_heading = "Output" + )] pub limit: usize, /// Show ranking explanation per result diff --git a/src/core/backoff.rs b/src/core/backoff.rs index 1ee035d..d1f0bd3 100644 --- a/src/core/backoff.rs +++ b/src/core/backoff.rs @@ -86,7 +86,10 @@ mod tests { let result = compute_next_attempt_at(now, 1); let delay = result - now; // attempt 1: base = 2000ms, with jitter: 1800-2200ms - assert!(delay >= 1800 && delay <= 2200, "first retry delay: {delay}ms"); + assert!( + (1800..=2200).contains(&delay), + "first retry delay: {delay}ms" + ); } #[test] diff --git a/src/core/db.rs b/src/core/db.rs index 2c6fc3c..bee0fa4 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -31,22 +31,10 @@ const MIGRATIONS: &[(&str, &str)] = &[ "006", include_str!("../../migrations/006_merge_requests.sql"), ), - ( - "007", - include_str!("../../migrations/007_documents.sql"), - ), - ( - "008", - include_str!("../../migrations/008_fts5.sql"), - ), - ( - "009", - include_str!("../../migrations/009_embeddings.sql"), - ), - ( - "010", - include_str!("../../migrations/010_chunk_config.sql"), - ), + ("007", include_str!("../../migrations/007_documents.sql")), + ("008", include_str!("../../migrations/008_fts5.sql")), + ("009", include_str!("../../migrations/009_embeddings.sql")), + ("010", include_str!("../../migrations/010_chunk_config.sql")), ( "011", include_str!("../../migrations/011_resource_events.sql"), diff --git a/src/core/dependent_queue.rs b/src/core/dependent_queue.rs index 0b8ea44..8ffad0e 100644 --- a/src/core/dependent_queue.rs +++ b/src/core/dependent_queue.rs @@ -40,7 +40,15 @@ pub fn enqueue_job( "INSERT OR IGNORE INTO pending_dependent_fetches (project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, enqueued_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - rusqlite::params![project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, now], + rusqlite::params![ + project_id, + entity_type, + entity_iid, + entity_local_id, + job_type, + payload_json, + now + ], )?; Ok(changes > 0) @@ -69,21 +77,18 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul )?; let jobs: Vec = select_stmt - .query_map( - rusqlite::params![job_type, now, batch_size as i64], - |row| { - Ok(PendingJob { - id: row.get(0)?, - project_id: row.get(1)?, - entity_type: row.get(2)?, - entity_iid: row.get(3)?, - entity_local_id: row.get(4)?, - job_type: row.get(5)?, - payload_json: row.get(6)?, - attempts: row.get(7)?, - }) - }, - )? + .query_map(rusqlite::params![job_type, now, batch_size as i64], |row| { + Ok(PendingJob { + id: row.get(0)?, + project_id: row.get(1)?, + entity_type: row.get(2)?, + entity_iid: row.get(3)?, + entity_local_id: row.get(4)?, + job_type: row.get(5)?, + payload_json: row.get(6)?, + attempts: row.get(7)?, + }) + })? .collect::, _>>()?; // Lock the claimed jobs diff --git a/src/core/error.rs b/src/core/error.rs index 17795db..8e42e5f 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -222,9 +222,9 @@ impl LoreError { "Check database file permissions or reset with 'lore reset'.\n\n Example:\n lore doctor\n lore reset --yes", ), Self::Http(_) => Some("Check network connection"), - Self::NotFound(_) => Some( - "Verify the entity exists.\n\n Example:\n lore issues\n lore mrs", - ), + Self::NotFound(_) => { + Some("Verify the entity exists.\n\n Example:\n lore issues\n lore mrs") + } Self::Ambiguous(_) => Some( "Use -p to choose a specific project.\n\n Example:\n lore issues 42 -p group/project-a\n lore mrs 99 -p group/project-b", ), diff --git a/src/core/events_db.rs b/src/core/events_db.rs index cefd830..0e5ee5b 100644 --- a/src/core/events_db.rs +++ b/src/core/events_db.rs @@ -150,7 +150,10 @@ pub fn upsert_milestone_events( /// Resolve entity type string to (issue_id, merge_request_id) pair. /// Exactly one is Some, the other is None. -fn resolve_entity_ids(entity_type: &str, entity_local_id: i64) -> Result<(Option, Option)> { +fn resolve_entity_ids( + entity_type: &str, + entity_local_id: i64, +) -> Result<(Option, Option)> { match entity_type { "issue" => Ok((Some(entity_local_id), None)), "merge_request" => Ok((None, Some(entity_local_id))), diff --git a/src/core/project.rs b/src/core/project.rs index 033d050..828693f 100644 --- a/src/core/project.rs +++ b/src/core/project.rs @@ -33,7 +33,7 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { let mut suffix_stmt = conn.prepare( "SELECT id, path_with_namespace FROM projects WHERE path_with_namespace LIKE '%/' || ?1 - OR path_with_namespace = ?1" + OR path_with_namespace = ?1", )?; let suffix_matches: Vec<(i64, String)> = suffix_stmt .query_map(rusqlite::params![project_str], |row| { @@ -48,7 +48,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { return Err(LoreError::Ambiguous(format!( "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}", project_str, - matching.iter().map(|p| format!(" {}", p)).collect::>().join("\n"), + matching + .iter() + .map(|p| format!(" {}", p)) + .collect::>() + .join("\n"), matching[0] ))); } @@ -58,7 +62,7 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { // Step 4: Case-insensitive substring match (unambiguous) let mut substr_stmt = conn.prepare( "SELECT id, path_with_namespace FROM projects - WHERE LOWER(path_with_namespace) LIKE '%' || LOWER(?1) || '%'" + WHERE LOWER(path_with_namespace) LIKE '%' || LOWER(?1) || '%'", )?; let substr_matches: Vec<(i64, String)> = substr_stmt .query_map(rusqlite::params![project_str], |row| { @@ -73,7 +77,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { return Err(LoreError::Ambiguous(format!( "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}", project_str, - matching.iter().map(|p| format!(" {}", p)).collect::>().join("\n"), + matching + .iter() + .map(|p| format!(" {}", p)) + .collect::>() + .join("\n"), matching[0] ))); } @@ -81,9 +89,8 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { } // Step 5: No match — list available projects - let mut all_stmt = conn.prepare( - "SELECT path_with_namespace FROM projects ORDER BY path_with_namespace" - )?; + let mut all_stmt = + conn.prepare("SELECT path_with_namespace FROM projects ORDER BY path_with_namespace")?; let all_projects: Vec = all_stmt .query_map([], |row| row.get(0))? .collect::, _>>()?; @@ -98,7 +105,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { Err(LoreError::Other(format!( "Project '{}' not found.\n\nAvailable projects:\n{}\n\nHint: Use the full path, e.g., --project={}", project_str, - all_projects.iter().map(|p| format!(" {}", p)).collect::>().join("\n"), + all_projects + .iter() + .map(|p| format!(" {}", p)) + .collect::>() + .join("\n"), all_projects[0] ))) } @@ -109,7 +120,8 @@ mod tests { fn setup_db() -> Connection { let conn = Connection::open_in_memory().unwrap(); - conn.execute_batch(" + conn.execute_batch( + " CREATE TABLE projects ( id INTEGER PRIMARY KEY, gitlab_project_id INTEGER UNIQUE NOT NULL, @@ -121,7 +133,9 @@ mod tests { raw_payload_id INTEGER ); CREATE INDEX idx_projects_path ON projects(path_with_namespace); - ").unwrap(); + ", + ) + .unwrap(); conn } @@ -129,7 +143,8 @@ mod tests { conn.execute( "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (?1, ?2, ?3)", rusqlite::params![id, id * 100, path], - ).unwrap(); + ) + .unwrap(); } #[test] @@ -164,7 +179,11 @@ mod tests { insert_project(&conn, 2, "frontend/auth-service"); let err = resolve_project(&conn, "auth-service").unwrap_err(); let msg = err.to_string(); - assert!(msg.contains("ambiguous"), "Expected ambiguous error, got: {}", msg); + assert!( + msg.contains("ambiguous"), + "Expected ambiguous error, got: {}", + msg + ); assert!(msg.contains("backend/auth-service")); assert!(msg.contains("frontend/auth-service")); } @@ -195,7 +214,11 @@ mod tests { // "code" matches both projects let err = resolve_project(&conn, "code").unwrap_err(); let msg = err.to_string(); - assert!(msg.contains("ambiguous"), "Expected ambiguous error, got: {}", msg); + assert!( + msg.contains("ambiguous"), + "Expected ambiguous error, got: {}", + msg + ); assert!(msg.contains("vs/python-code")); assert!(msg.contains("vs/typescript-code")); } @@ -217,7 +240,11 @@ mod tests { insert_project(&conn, 1, "backend/auth-service"); let err = resolve_project(&conn, "nonexistent").unwrap_err(); let msg = err.to_string(); - assert!(msg.contains("not found"), "Expected not found error, got: {}", msg); + assert!( + msg.contains("not found"), + "Expected not found error, got: {}", + msg + ); assert!(msg.contains("backend/auth-service")); } diff --git a/src/documents/extractor.rs b/src/documents/extractor.rs index 8b3b04e..14d9358 100644 --- a/src/documents/extractor.rs +++ b/src/documents/extractor.rs @@ -4,10 +4,10 @@ use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::collections::BTreeSet; -use crate::core::error::Result; use super::truncation::{ - truncate_discussion, truncate_hard_cap, NoteContent, MAX_DISCUSSION_BYTES, + MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap, }; +use crate::core::error::Result; /// Source type for documents. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] @@ -98,22 +98,34 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result