Compare commits
7 Commits
0fe3737035
...
fd0a40b181
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fd0a40b181 | ||
|
|
b2811b5e45 | ||
|
|
2d2e470621 | ||
|
|
23efb15599 | ||
|
|
a45c37c7e4 | ||
|
|
8657e10822 | ||
|
|
7fdeafa330 |
File diff suppressed because one or more lines are too long
@@ -1 +1 @@
|
||||
bd-1tv8
|
||||
bd-8con
|
||||
|
||||
58
migrations/028_discussions_mr_fk.sql
Normal file
58
migrations/028_discussions_mr_fk.sql
Normal file
@@ -0,0 +1,58 @@
|
||||
-- Migration 028: Add FK constraint on discussions.merge_request_id
|
||||
-- Schema version: 28
|
||||
-- Fixes missing foreign key that causes orphaned discussions when MRs are deleted
|
||||
|
||||
-- SQLite doesn't support ALTER TABLE ADD CONSTRAINT, so we must recreate the table.
|
||||
|
||||
-- Step 1: Create new table with the FK constraint
|
||||
CREATE TABLE discussions_new (
|
||||
id INTEGER PRIMARY KEY,
|
||||
gitlab_discussion_id TEXT NOT NULL,
|
||||
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
||||
issue_id INTEGER REFERENCES issues(id) ON DELETE CASCADE,
|
||||
merge_request_id INTEGER REFERENCES merge_requests(id) ON DELETE CASCADE, -- FK was missing!
|
||||
noteable_type TEXT NOT NULL CHECK (noteable_type IN ('Issue', 'MergeRequest')),
|
||||
individual_note INTEGER NOT NULL DEFAULT 0,
|
||||
first_note_at INTEGER,
|
||||
last_note_at INTEGER,
|
||||
last_seen_at INTEGER NOT NULL,
|
||||
resolvable INTEGER NOT NULL DEFAULT 0,
|
||||
resolved INTEGER NOT NULL DEFAULT 0,
|
||||
raw_payload_id INTEGER REFERENCES raw_payloads(id), -- Added in migration 004
|
||||
CHECK (
|
||||
(noteable_type = 'Issue' AND issue_id IS NOT NULL AND merge_request_id IS NULL) OR
|
||||
(noteable_type = 'MergeRequest' AND merge_request_id IS NOT NULL AND issue_id IS NULL)
|
||||
)
|
||||
);
|
||||
|
||||
-- Step 2: Copy data (only rows with valid FK references to avoid constraint violations)
|
||||
INSERT INTO discussions_new
|
||||
SELECT d.* FROM discussions d
|
||||
WHERE (d.merge_request_id IS NULL OR EXISTS (SELECT 1 FROM merge_requests m WHERE m.id = d.merge_request_id));
|
||||
|
||||
-- Step 3: Drop old table and rename
|
||||
DROP TABLE discussions;
|
||||
ALTER TABLE discussions_new RENAME TO discussions;
|
||||
|
||||
-- Step 4: Recreate ALL indexes that were on the discussions table
|
||||
-- From migration 002 (original table)
|
||||
CREATE UNIQUE INDEX uq_discussions_project_discussion_id ON discussions(project_id, gitlab_discussion_id);
|
||||
CREATE INDEX idx_discussions_issue ON discussions(issue_id);
|
||||
CREATE INDEX idx_discussions_mr ON discussions(merge_request_id);
|
||||
CREATE INDEX idx_discussions_last_note ON discussions(last_note_at);
|
||||
-- From migration 003 (orphan detection)
|
||||
CREATE INDEX idx_discussions_last_seen ON discussions(last_seen_at);
|
||||
-- From migration 006 (MR indexes)
|
||||
CREATE INDEX idx_discussions_mr_id ON discussions(merge_request_id);
|
||||
CREATE INDEX idx_discussions_mr_resolved ON discussions(merge_request_id, resolved, resolvable);
|
||||
-- From migration 017 (who command indexes)
|
||||
CREATE INDEX idx_discussions_unresolved_recent ON discussions(project_id, last_note_at) WHERE resolvable = 1 AND resolved = 0;
|
||||
CREATE INDEX idx_discussions_unresolved_recent_global ON discussions(last_note_at) WHERE resolvable = 1 AND resolved = 0;
|
||||
-- From migration 019 (list performance)
|
||||
CREATE INDEX idx_discussions_issue_resolved ON discussions(issue_id, resolvable, resolved);
|
||||
-- From migration 022 (notes query optimization)
|
||||
CREATE INDEX idx_discussions_issue_id ON discussions(issue_id);
|
||||
|
||||
-- Record migration
|
||||
INSERT INTO schema_version (version, applied_at, description)
|
||||
VALUES (28, strftime('%s', 'now') * 1000, 'Add FK constraint on discussions.merge_request_id');
|
||||
@@ -2,166 +2,445 @@
|
||||
plan: true
|
||||
title: "GitLab TODOs Integration"
|
||||
status: proposed
|
||||
iteration: 1
|
||||
target_iterations: 3
|
||||
iteration: 4
|
||||
target_iterations: 4
|
||||
beads_revision: 1
|
||||
related_plans: []
|
||||
created: 2026-02-23
|
||||
updated: 2026-02-23
|
||||
updated: 2026-02-26
|
||||
audit_revision: 4
|
||||
---
|
||||
|
||||
# GitLab TODOs Integration
|
||||
|
||||
## Summary
|
||||
|
||||
Add GitLab TODO support to lore. Todos are fetched during sync, stored locally, and surfaced through:
|
||||
1. A new `--todos` section in `lore me`
|
||||
2. Enrichment of the activity feed in `lore me`
|
||||
3. A standalone `lore todos` command
|
||||
Add GitLab TODO support to lore. Todos are fetched during sync, stored locally, and surfaced through a standalone `lore todos` command and integration into the `lore me` dashboard.
|
||||
|
||||
**Scope:** Read-only. No mark-as-done operations.
|
||||
|
||||
---
|
||||
|
||||
## Design Decisions (from interview)
|
||||
## Workflows
|
||||
|
||||
| Decision | Choice |
|
||||
|----------|--------|
|
||||
| Write operations | **Read-only** — no mark-as-done |
|
||||
| Storage | **Persist locally** in SQLite |
|
||||
| Integration | Three-way: activity enrichment + `--todos` flag + `lore todos` |
|
||||
| Action types | Core only: assigned, mentioned, directly_addressed, approval_required, build_failed, unmergeable |
|
||||
| Niche actions | Skip display (but store): merge_train_removed, member_access_requested, marked |
|
||||
| Project filter | **Always account-wide** — `--project` does NOT filter todos |
|
||||
| Sync timing | During normal `lore sync` |
|
||||
| Non-synced projects | Include with `[external]` indicator |
|
||||
| Attention state | **Separate signal** — todos don't boost attention |
|
||||
| Summary header | Include pending todo count |
|
||||
| Grouping | By action type: Assignments \| Mentions \| Approvals \| Build Issues |
|
||||
| History | **Pending only** — done todos not tracked |
|
||||
| `lore todos` filters | **None** — show all pending, simple |
|
||||
| Robot mode | Yes, standard envelope |
|
||||
| Target types | All GitLab supports (Issue, MR, Epic, Commit, etc.) |
|
||||
### Workflow 1: Morning Triage (Human)
|
||||
|
||||
---
|
||||
1. User runs `lore me` to see personal dashboard
|
||||
2. Summary header shows "5 pending todos" alongside issue/MR counts
|
||||
3. Todos section groups items: 2 Assignments, 2 Mentions, 1 Approval Required
|
||||
4. User scans Assignments — sees issue #42 assigned by @manager
|
||||
5. User runs `lore todos` for full detail with body snippets
|
||||
6. User clicks target URL to address highest-priority item
|
||||
7. After marking done in GitLab, next `lore sync` removes it locally
|
||||
|
||||
## Out of Scope
|
||||
### Workflow 2: Agent Polling (Robot Mode)
|
||||
|
||||
- Write operations (mark as done)
|
||||
- Done todo history tracking
|
||||
- Filters on `lore todos` command
|
||||
- Todo-based attention state boosting
|
||||
- Notification settings API integration (deferred to separate plan)
|
||||
1. Agent runs `lore --robot health` as pre-flight check
|
||||
2. Agent runs `lore --robot me --fields minimal` for dashboard
|
||||
3. Agent extracts `pending_todo_count` from summary — if 0, skip todos
|
||||
4. If count > 0, agent runs `lore --robot todos`
|
||||
5. Agent iterates `data.todos[]`, filtering by `action` type
|
||||
6. Agent prioritizes `approval_required` and `build_failed` for immediate attention
|
||||
7. Agent logs external todos (`is_external: true`) for manual review
|
||||
|
||||
### Workflow 3: Cross-Project Visibility
|
||||
|
||||
1. User is mentioned in a project they don't sync (e.g., company-wide repo)
|
||||
2. `lore sync` fetches the todo anyway (account-wide fetch)
|
||||
3. `lore todos` shows item with `[external]` indicator and project path
|
||||
4. User can still click target URL to view in GitLab
|
||||
5. Target title may be unavailable — graceful fallback to "Untitled"
|
||||
|
||||
---
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
### AC-1: Database Schema
|
||||
Behavioral contract. Each AC is a single testable statement.
|
||||
|
||||
- [ ] **AC-1.1:** Create `todos` table with columns:
|
||||
- `id` INTEGER PRIMARY KEY
|
||||
- `gitlab_todo_id` INTEGER NOT NULL UNIQUE
|
||||
- `project_id` INTEGER REFERENCES projects(id) ON DELETE SET NULL (nullable for non-synced)
|
||||
- `target_type` TEXT NOT NULL (Issue, MergeRequest, Commit, Epic, etc.)
|
||||
- `target_id` INTEGER (GitLab ID of target entity)
|
||||
- `target_iid` INTEGER (IID for issues/MRs, nullable)
|
||||
- `target_url` TEXT NOT NULL
|
||||
- `target_title` TEXT
|
||||
- `action_name` TEXT NOT NULL (assigned, mentioned, etc.)
|
||||
- `author_id` INTEGER
|
||||
- `author_username` TEXT
|
||||
- `body` TEXT (the todo message/snippet)
|
||||
- `state` TEXT NOT NULL (pending)
|
||||
- `created_at` INTEGER NOT NULL (epoch ms)
|
||||
- `updated_at` INTEGER NOT NULL (epoch ms)
|
||||
- `synced_at` INTEGER NOT NULL (epoch ms)
|
||||
- `project_path` TEXT (for display even if project not synced)
|
||||
- [ ] **AC-1.2:** Create index `idx_todos_state_action` on `(state, action_name)`
|
||||
- [ ] **AC-1.3:** Create index `idx_todos_target` on `(target_type, target_id)`
|
||||
- [ ] **AC-1.4:** Create index `idx_todos_created` on `(created_at DESC)`
|
||||
- [ ] **AC-1.5:** Migration increments schema version
|
||||
### Storage
|
||||
|
||||
### AC-2: GitLab API Client
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-1 | Todos are persisted locally in SQLite |
|
||||
| AC-2 | Each todo is uniquely identified by its GitLab todo ID |
|
||||
| AC-3 | Todos from non-synced projects are stored with their project path |
|
||||
|
||||
- [ ] **AC-2.1:** Add `fetch_todos()` method to GitLab client
|
||||
- [ ] **AC-2.2:** Fetch only `state=pending` todos
|
||||
- [ ] **AC-2.3:** Handle pagination (use existing pagination pattern)
|
||||
- [ ] **AC-2.4:** Parse all target types GitLab returns
|
||||
- [ ] **AC-2.5:** Extract project path from `target_url` for non-synced projects
|
||||
### Sync
|
||||
|
||||
### AC-3: Sync Pipeline
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-4 | `lore sync` fetches all pending todos from GitLab |
|
||||
| AC-5 | Sync fetches todos account-wide, not per-project |
|
||||
| AC-6 | Todos marked done in GitLab are removed locally on next sync |
|
||||
| AC-7 | Transient sync errors do not delete valid local todos |
|
||||
| AC-8 | `lore sync --no-todos` skips todo fetching |
|
||||
| AC-9 | Sync logs todo statistics (fetched, inserted, updated, deleted) |
|
||||
|
||||
- [ ] **AC-3.1:** Add todos sync step to `lore sync` pipeline
|
||||
- [ ] **AC-3.2:** Sync todos AFTER issues/MRs (ordering consistency)
|
||||
- [ ] **AC-3.3:** Snapshot semantics: fetch all pending, upsert, delete missing (= marked done elsewhere)
|
||||
- [ ] **AC-3.4:** Track `synced_at` timestamp
|
||||
- [ ] **AC-3.5:** Log todo sync stats: fetched, inserted, updated, deleted
|
||||
- [ ] **AC-3.6:** Add `--no-todos` flag to skip todo sync
|
||||
### `lore todos` Command
|
||||
|
||||
### AC-4: Action Type Handling
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-10 | `lore todos` displays all pending todos |
|
||||
| AC-11 | Todos are grouped by action type: Assignments, Mentions, Approvals, Build Issues |
|
||||
| AC-12 | Each todo shows: target title, project path, author, age |
|
||||
| AC-13 | Non-synced project todos display `[external]` indicator |
|
||||
| AC-14 | `lore todos --limit N` limits output to N todos |
|
||||
| AC-15 | `lore --robot todos` returns JSON with standard `{ok, data, meta}` envelope |
|
||||
| AC-16 | `lore --robot todos --fields minimal` returns reduced field set |
|
||||
| AC-17 | `todo` and `td` are recognized as aliases for `todos` |
|
||||
|
||||
- [ ] **AC-4.1:** Store ALL action types from GitLab
|
||||
- [ ] **AC-4.2:** Display only core actions:
|
||||
- `assigned` — assigned to issue/MR
|
||||
- `mentioned` — @mentioned in comment
|
||||
- `directly_addressed` — @mentioned at start of comment
|
||||
- `approval_required` — approval needed on MR
|
||||
- `build_failed` — CI failed on your MR
|
||||
- `unmergeable` — merge conflicts on your MR
|
||||
- [ ] **AC-4.3:** Skip display (but store) niche actions: `merge_train_removed`, `member_access_requested`, `marked`
|
||||
### `lore me` Integration
|
||||
|
||||
### AC-5: `lore todos` Command
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-18 | `lore me` summary includes pending todo count |
|
||||
| AC-19 | `lore me` includes a todos section in the full dashboard |
|
||||
| AC-20 | `lore me --todos` shows only the todos section |
|
||||
| AC-21 | Todos are NOT filtered by `--project` flag (always account-wide) |
|
||||
| AC-22 | Warning is displayed if `--project` is passed with `--todos` |
|
||||
| AC-23 | Todo events appear in the activity feed for local entities |
|
||||
|
||||
- [ ] **AC-5.1:** New subcommand `lore todos` (alias: `todo`)
|
||||
- [ ] **AC-5.2:** Display all pending todos, no filters
|
||||
- [ ] **AC-5.3:** Group by action type: Assignments | Mentions | Approvals | Build Issues
|
||||
- [ ] **AC-5.4:** Per-todo display: target title, project path, author, age, action
|
||||
- [ ] **AC-5.5:** Flag non-synced project todos with `[external]` indicator
|
||||
- [ ] **AC-5.6:** Human-readable output with colors/icons
|
||||
- [ ] **AC-5.7:** Robot mode: standard `{ok, data, meta}` envelope
|
||||
### Action Types
|
||||
|
||||
### AC-6: `lore me --todos` Section
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-24 | Core actions are displayed: assigned, mentioned, directly_addressed, approval_required, build_failed, unmergeable |
|
||||
| AC-25 | Niche actions are stored but not displayed: merge_train_removed, member_access_requested, marked |
|
||||
|
||||
- [ ] **AC-6.1:** Add `--todos` flag to `MeArgs`
|
||||
- [ ] **AC-6.2:** When no section flags: show todos in full dashboard
|
||||
- [ ] **AC-6.3:** When `--todos` flag only: show only todos section
|
||||
- [ ] **AC-6.4:** Todos section grouped by action type
|
||||
- [ ] **AC-6.5:** Todos NOT filtered by `--project` (always account-wide)
|
||||
- [ ] **AC-6.6:** Robot mode includes `todos` array in dashboard response
|
||||
### Attention State
|
||||
|
||||
### AC-7: `lore me` Summary Header
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-26 | Todos do not affect attention state calculation |
|
||||
| AC-27 | Todos do not appear in "since last check" cursor-based inbox |
|
||||
|
||||
- [ ] **AC-7.1:** Add `pending_todo_count` to `MeSummary` struct
|
||||
- [ ] **AC-7.2:** Display todo count in summary line (human mode)
|
||||
- [ ] **AC-7.3:** Include `pending_todo_count` in robot mode summary
|
||||
### Error Handling
|
||||
|
||||
### AC-8: Activity Feed Enrichment
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-28 | 403 Forbidden on todos API logs warning and continues sync |
|
||||
| AC-29 | 429 Rate Limited respects Retry-After header |
|
||||
| AC-30 | Malformed todo JSON logs warning, skips that item, and disables purge for that sync |
|
||||
|
||||
- [ ] **AC-8.1:** Todos with local issue/MR target appear in activity feed
|
||||
- [ ] **AC-8.2:** New `ActivityEventType::Todo` variant
|
||||
- [ ] **AC-8.3:** Todo events show: action type, author, target in summary
|
||||
- [ ] **AC-8.4:** Sorted chronologically with other activity events
|
||||
- [ ] **AC-8.5:** Respect `--since` filter on todo `created_at`
|
||||
### Documentation
|
||||
|
||||
### AC-9: Non-Synced Project Handling
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-31 | `lore todos` appears in CLI help |
|
||||
| AC-32 | `lore robot-docs` includes todos schema |
|
||||
| AC-33 | CLAUDE.md documents the todos command |
|
||||
|
||||
- [ ] **AC-9.1:** Store todos even if target project not in config
|
||||
- [ ] **AC-9.2:** Display `[external]` indicator for non-synced project todos
|
||||
- [ ] **AC-9.3:** Show project path (extracted from target URL)
|
||||
- [ ] **AC-9.4:** Graceful fallback when target title unavailable
|
||||
### Quality
|
||||
|
||||
### AC-10: Attention State
|
||||
| ID | Behavior |
|
||||
|----|----------|
|
||||
| AC-34 | All quality gates pass: check, clippy, fmt, test |
|
||||
|
||||
- [ ] **AC-10.1:** Attention state calculation remains note-based (unchanged)
|
||||
- [ ] **AC-10.2:** Todos are separate signal, do not affect attention state
|
||||
- [ ] **AC-10.3:** Document this design decision in code comments
|
||||
---
|
||||
|
||||
### AC-11: Robot Mode Schema
|
||||
## Architecture
|
||||
|
||||
- [ ] **AC-11.1:** `lore todos --robot` returns:
|
||||
```json
|
||||
{
|
||||
Designed to fulfill the acceptance criteria above.
|
||||
|
||||
### Module Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── gitlab/
|
||||
│ ├── client.rs # fetch_todos() method (AC-4, AC-5)
|
||||
│ └── types.rs # GitLabTodo struct
|
||||
├── ingestion/
|
||||
│ └── todos.rs # sync_todos(), purge-safe deletion (AC-6, AC-7)
|
||||
├── cli/commands/
|
||||
│ ├── todos.rs # lore todos command (AC-10-17)
|
||||
│ └── me/
|
||||
│ ├── types.rs # MeTodo, extend MeSummary (AC-18)
|
||||
│ └── queries.rs # query_todos() (AC-19, AC-23)
|
||||
└── core/
|
||||
└── db.rs # Migration 028 (AC-1, AC-2, AC-3)
|
||||
```
|
||||
|
||||
### Data Flow
|
||||
|
||||
```
|
||||
GitLab API Local SQLite CLI Output
|
||||
─────────── ──────────── ──────────
|
||||
GET /api/v4/todos → todos table → lore todos
|
||||
(account-wide) (purge-safe sync) lore me --todos
|
||||
```
|
||||
|
||||
### Key Design Decisions
|
||||
|
||||
| Decision | Rationale | ACs |
|
||||
|----------|-----------|-----|
|
||||
| Account-wide fetch | GitLab todos API is user-scoped, not project-scoped | AC-5, AC-21 |
|
||||
| Purge-safe deletion | Transient errors should not delete valid data | AC-7 |
|
||||
| Separate from attention | Todos are notifications, not engagement signals | AC-26, AC-27 |
|
||||
| Store all actions, display core | Future-proofs for new action types | AC-24, AC-25 |
|
||||
|
||||
### Existing Code to Extend
|
||||
|
||||
| Type | Location | Extension |
|
||||
|------|----------|-----------|
|
||||
| `MeSummary` | `src/cli/commands/me/types.rs` | Add `pending_todo_count` field |
|
||||
| `ActivityEventType` | `src/cli/commands/me/types.rs` | Add `Todo` variant |
|
||||
| `MeDashboard` | `src/cli/commands/me/types.rs` | Add `todos: Vec<MeTodo>` field |
|
||||
| `SyncArgs` | `src/cli/mod.rs` | Add `--no-todos` flag |
|
||||
| `MeArgs` | `src/cli/mod.rs` | Add `--todos` flag |
|
||||
|
||||
---
|
||||
|
||||
## Implementation Specifications
|
||||
|
||||
Each IMP section details HOW to fulfill specific ACs.
|
||||
|
||||
### IMP-1: Database Schema
|
||||
|
||||
**Fulfills:** AC-1, AC-2, AC-3
|
||||
|
||||
**Migration 028:**
|
||||
|
||||
```sql
|
||||
CREATE TABLE todos (
|
||||
id INTEGER PRIMARY KEY,
|
||||
gitlab_todo_id INTEGER NOT NULL UNIQUE,
|
||||
project_id INTEGER REFERENCES projects(id) ON DELETE SET NULL,
|
||||
gitlab_project_id INTEGER,
|
||||
target_type TEXT NOT NULL,
|
||||
target_id TEXT,
|
||||
target_iid INTEGER,
|
||||
target_url TEXT NOT NULL,
|
||||
target_title TEXT,
|
||||
action_name TEXT NOT NULL,
|
||||
author_id INTEGER,
|
||||
author_username TEXT,
|
||||
body TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
synced_at INTEGER NOT NULL,
|
||||
sync_generation INTEGER NOT NULL DEFAULT 0,
|
||||
project_path TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_todos_action_created ON todos(action_name, created_at DESC);
|
||||
CREATE INDEX idx_todos_target ON todos(target_type, target_id);
|
||||
CREATE INDEX idx_todos_created ON todos(created_at DESC);
|
||||
CREATE INDEX idx_todos_sync_gen ON todos(sync_generation);
|
||||
CREATE INDEX idx_todos_gitlab_project ON todos(gitlab_project_id);
|
||||
CREATE INDEX idx_todos_target_lookup ON todos(target_type, project_id, target_iid);
|
||||
```
|
||||
|
||||
**Notes:**
|
||||
- `project_id` nullable for non-synced projects (AC-3)
|
||||
- `gitlab_project_id` nullable — TODO targets include non-project entities (Namespace, etc.)
|
||||
- No `state` column — we only store pending todos
|
||||
- `sync_generation` enables two-generation grace purge (AC-7)
|
||||
|
||||
---
|
||||
|
||||
### IMP-2: GitLab API Client
|
||||
|
||||
**Fulfills:** AC-4, AC-5
|
||||
|
||||
**Endpoint:** `GET /api/v4/todos?state=pending`
|
||||
|
||||
**Types to add in `src/gitlab/types.rs`:**
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GitLabTodo {
|
||||
pub id: i64,
|
||||
pub project: Option<GitLabTodoProject>,
|
||||
pub author: Option<GitLabTodoAuthor>,
|
||||
pub action_name: String,
|
||||
pub target_type: String,
|
||||
pub target: Option<GitLabTodoTarget>,
|
||||
pub target_url: String,
|
||||
pub body: Option<String>,
|
||||
pub state: String,
|
||||
pub created_at: String,
|
||||
pub updated_at: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GitLabTodoProject {
|
||||
pub id: i64,
|
||||
pub path_with_namespace: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GitLabTodoTarget {
|
||||
pub id: serde_json::Value, // i64 or String (commit SHA)
|
||||
pub iid: Option<i64>,
|
||||
pub title: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GitLabTodoAuthor {
|
||||
pub id: i64,
|
||||
pub username: String,
|
||||
}
|
||||
```
|
||||
|
||||
**Client method in `src/gitlab/client.rs`:**
|
||||
|
||||
```rust
|
||||
pub fn fetch_todos(&self) -> impl Stream<Item = Result<GitLabTodo>> {
|
||||
self.paginate("/api/v4/todos?state=pending")
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### IMP-3: Sync Pipeline Integration
|
||||
|
||||
**Fulfills:** AC-4, AC-5, AC-6, AC-7, AC-8, AC-9
|
||||
|
||||
**New file: `src/ingestion/todos.rs`**
|
||||
|
||||
**Sync position:** Account-wide step after per-project sync and status enrichment.
|
||||
|
||||
```
|
||||
Sync order:
|
||||
1. Issues (per project)
|
||||
2. MRs (per project)
|
||||
3. Status enrichment (account-wide GraphQL)
|
||||
4. Todos (account-wide REST) ← NEW
|
||||
```
|
||||
|
||||
**Purge-safe deletion pattern:**
|
||||
|
||||
```rust
|
||||
pub struct TodoSyncResult {
|
||||
pub fetched: usize,
|
||||
pub upserted: usize,
|
||||
pub deleted: usize,
|
||||
pub generation: i64,
|
||||
pub purge_allowed: bool,
|
||||
}
|
||||
|
||||
pub fn sync_todos(conn: &Connection, client: &GitLabClient) -> Result<TodoSyncResult> {
|
||||
// 1. Get next generation
|
||||
let generation: i64 = conn.query_row(
|
||||
"SELECT COALESCE(MAX(sync_generation), 0) + 1 FROM todos",
|
||||
[], |r| r.get(0)
|
||||
)?;
|
||||
|
||||
let mut fetched = 0;
|
||||
let mut purge_allowed = true;
|
||||
|
||||
// 2. Fetch and upsert all todos
|
||||
for result in client.fetch_todos()? {
|
||||
match result {
|
||||
Ok(todo) => {
|
||||
upsert_todo_guarded(conn, &todo, generation)?;
|
||||
fetched += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
// Malformed JSON: log warning, skip item, disable purge
|
||||
warn!("Skipping malformed todo: {e}");
|
||||
purge_allowed = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Two-generation grace purge: delete only if missing for 2+ consecutive syncs
|
||||
// This protects against pagination drift (new todos inserted during traversal)
|
||||
let deleted = if purge_allowed {
|
||||
conn.execute("DELETE FROM todos WHERE sync_generation < ? - 1", [generation])?
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
Ok(TodoSyncResult { fetched, upserted: fetched, deleted, generation, purge_allowed })
|
||||
}
|
||||
```
|
||||
|
||||
**Concurrent-safe upsert:**
|
||||
|
||||
```sql
|
||||
INSERT INTO todos (..., sync_generation) VALUES (?, ..., ?)
|
||||
ON CONFLICT(gitlab_todo_id) DO UPDATE SET
|
||||
...,
|
||||
sync_generation = excluded.sync_generation,
|
||||
synced_at = excluded.synced_at
|
||||
WHERE excluded.sync_generation >= todos.sync_generation;
|
||||
```
|
||||
|
||||
**"Success" for purge (all must be true):**
|
||||
- Every page fetch completed without error
|
||||
- Every todo JSON decoded successfully (any decode failure sets `purge_allowed=false`)
|
||||
- Pagination traversal completed (not interrupted)
|
||||
- Response was not 401/403
|
||||
- Zero todos IS valid for purge when above conditions met
|
||||
|
||||
**Two-generation grace purge:**
|
||||
Todos are deleted only if missing for 2 consecutive successful syncs (`sync_generation < current - 1`).
|
||||
This protects against false deletions from pagination drift (new todos inserted during traversal).
|
||||
|
||||
---
|
||||
|
||||
### IMP-4: Project Path Extraction
|
||||
|
||||
**Fulfills:** AC-3, AC-13
|
||||
|
||||
```rust
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
|
||||
pub fn extract_project_path(url: &str) -> Option<&str> {
|
||||
static RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r"https?://[^/]+/(.+?)/-/(?:issues|merge_requests|epics|commits)/")
|
||||
.expect("valid regex")
|
||||
});
|
||||
|
||||
RE.captures(url)
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str())
|
||||
}
|
||||
```
|
||||
|
||||
**Usage:** Prefer `project.path_with_namespace` from API when available. Fall back to URL extraction for external projects.
|
||||
|
||||
---
|
||||
|
||||
### IMP-5: `lore todos` Command
|
||||
|
||||
**Fulfills:** AC-10, AC-11, AC-12, AC-13, AC-14, AC-15, AC-16, AC-17
|
||||
|
||||
**New file: `src/cli/commands/todos.rs`**
|
||||
|
||||
**Args:**
|
||||
|
||||
```rust
|
||||
#[derive(Parser)]
|
||||
#[command(alias = "todo")]
|
||||
pub struct TodosArgs {
|
||||
#[arg(short = 'n', long)]
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
```
|
||||
|
||||
**Autocorrect aliases in `src/cli/mod.rs`:**
|
||||
|
||||
```rust
|
||||
("td", "todos"),
|
||||
("todo", "todos"),
|
||||
```
|
||||
|
||||
**Action type grouping:**
|
||||
|
||||
| Group | Actions |
|
||||
|-------|---------|
|
||||
| Assignments | `assigned` |
|
||||
| Mentions | `mentioned`, `directly_addressed` |
|
||||
| Approvals | `approval_required` |
|
||||
| Build Issues | `build_failed`, `unmergeable` |
|
||||
|
||||
**Robot mode schema:**
|
||||
|
||||
```json
|
||||
{
|
||||
"ok": true,
|
||||
"data": {
|
||||
"todos": [{
|
||||
@@ -184,91 +463,186 @@ Add GitLab TODO support to lore. Todos are fetched during sync, stored locally,
|
||||
"mentioned": 5,
|
||||
"approval_required": 1,
|
||||
"build_failed": 0,
|
||||
"unmergeable": 0
|
||||
"unmergeable": 0,
|
||||
"other": 0
|
||||
}
|
||||
},
|
||||
"meta": {"elapsed_ms": 42}
|
||||
}
|
||||
```
|
||||
- [ ] **AC-11.2:** `lore me --robot` includes `todos` and `pending_todo_count` in response
|
||||
- [ ] **AC-11.3:** Support `--fields minimal` for token efficiency
|
||||
}
|
||||
```
|
||||
|
||||
### AC-12: Documentation
|
||||
|
||||
- [ ] **AC-12.1:** Update CLAUDE.md with `lore todos` command reference
|
||||
- [ ] **AC-12.2:** Update `lore robot-docs` manifest with todos schema
|
||||
- [ ] **AC-12.3:** Add todos to CLI help output
|
||||
|
||||
### AC-13: Quality Gates
|
||||
|
||||
- [ ] **AC-13.1:** `cargo check --all-targets` passes
|
||||
- [ ] **AC-13.2:** `cargo clippy --all-targets -- -D warnings` passes
|
||||
- [ ] **AC-13.3:** `cargo fmt --check` passes
|
||||
- [ ] **AC-13.4:** `cargo test` passes with new tests
|
||||
**Minimal fields:** `gitlab_todo_id`, `action`, `target_type`, `target_iid`, `project_path`, `is_external`
|
||||
|
||||
---
|
||||
|
||||
## Technical Notes
|
||||
### IMP-6: `lore me` Integration
|
||||
|
||||
### GitLab API Endpoint
|
||||
**Fulfills:** AC-18, AC-19, AC-20, AC-21, AC-22, AC-23
|
||||
|
||||
```
|
||||
GET /api/v4/todos?state=pending
|
||||
**Types to add/extend in `src/cli/commands/me/types.rs`:**
|
||||
|
||||
```rust
|
||||
// EXTEND
|
||||
pub struct MeSummary {
|
||||
// ... existing fields ...
|
||||
pub pending_todo_count: usize, // ADD
|
||||
}
|
||||
|
||||
// EXTEND
|
||||
pub enum ActivityEventType {
|
||||
// ... existing variants ...
|
||||
Todo, // ADD
|
||||
}
|
||||
|
||||
// EXTEND
|
||||
pub struct MeDashboard {
|
||||
// ... existing fields ...
|
||||
pub todos: Vec<MeTodo>, // ADD
|
||||
}
|
||||
|
||||
// NEW
|
||||
pub struct MeTodo {
|
||||
pub id: i64,
|
||||
pub gitlab_todo_id: i64,
|
||||
pub action: String,
|
||||
pub target_type: String,
|
||||
pub target_iid: Option<i64>,
|
||||
pub target_title: Option<String>,
|
||||
pub target_url: String,
|
||||
pub project_path: String,
|
||||
pub author_username: Option<String>,
|
||||
pub body: Option<String>,
|
||||
pub created_at: i64,
|
||||
pub is_external: bool,
|
||||
}
|
||||
```
|
||||
|
||||
Response fields: id, project, author, action_name, target_type, target, target_url, body, state, created_at, updated_at
|
||||
**Warning for `--project` with `--todos` (AC-22):**
|
||||
|
||||
### Sync Deletion Strategy
|
||||
|
||||
Snapshot semantics: a todo disappearing from API response means it was marked done elsewhere. Delete from local DB to stay in sync.
|
||||
|
||||
### Project Path Extraction
|
||||
|
||||
For non-synced projects, extract path from `target_url`:
|
||||
```
|
||||
https://gitlab.com/group/subgroup/repo/-/issues/42
|
||||
^^^^^^^^^^^^^^^^^ extract this
|
||||
```rust
|
||||
if args.todos && args.project.is_some() {
|
||||
eprintln!("Warning: Todos are account-wide; project filter not applied");
|
||||
}
|
||||
```
|
||||
|
||||
### Action Type Grouping
|
||||
---
|
||||
|
||||
| Group | Actions |
|
||||
|-------|---------|
|
||||
| Assignments | `assigned` |
|
||||
| Mentions | `mentioned`, `directly_addressed` |
|
||||
| Approvals | `approval_required` |
|
||||
| Build Issues | `build_failed`, `unmergeable` |
|
||||
### IMP-7: Error Handling
|
||||
|
||||
**Fulfills:** AC-28, AC-29, AC-30
|
||||
|
||||
| Error | Behavior |
|
||||
|-------|----------|
|
||||
| 403 Forbidden | Log warning, skip todo sync, continue with other entities |
|
||||
| 429 Rate Limited | Respect `Retry-After` header using existing retry policy |
|
||||
| Malformed JSON | Log warning with todo ID, skip item, set `purge_allowed=false`, continue batch |
|
||||
|
||||
**Rationale for purge disable on malformed JSON:** If we can't decode a todo, we don't know its `gitlab_todo_id`. Without that, we might accidentally purge a valid todo that was simply malformed in transit. Disabling purge for that sync is the safe choice.
|
||||
|
||||
---
|
||||
|
||||
### IMP-8: Test Fixtures
|
||||
|
||||
**Fulfills:** AC-34
|
||||
|
||||
**Location:** `tests/fixtures/todos/`
|
||||
|
||||
**`todos_pending.json`:**
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": 102,
|
||||
"project": {"id": 2, "path_with_namespace": "diaspora/client"},
|
||||
"author": {"id": 1, "username": "admin"},
|
||||
"action_name": "mentioned",
|
||||
"target_type": "Issue",
|
||||
"target": {"id": 11, "iid": 4, "title": "Inventory system"},
|
||||
"target_url": "https://gitlab.example.com/diaspora/client/-/issues/4",
|
||||
"body": "@user please review",
|
||||
"state": "pending",
|
||||
"created_at": "2026-02-20T10:00:00.000Z",
|
||||
"updated_at": "2026-02-20T10:00:00.000Z"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
**`todos_empty.json`:** `[]`
|
||||
|
||||
**`todos_commit_target.json`:** (target.id is string SHA)
|
||||
|
||||
**`todos_niche_actions.json`:** (merge_train_removed, etc.)
|
||||
|
||||
---
|
||||
|
||||
## Rollout Slices
|
||||
|
||||
### Dependency Graph
|
||||
|
||||
```
|
||||
Slice A ──────► Slice B ──────┬──────► Slice C
|
||||
(Schema) (Sync) │ (`lore todos`)
|
||||
│
|
||||
└──────► Slice D
|
||||
(`lore me`)
|
||||
|
||||
Slice C ───┬───► Slice E
|
||||
Slice D ───┘ (Polish)
|
||||
```
|
||||
|
||||
### Slice A: Schema + Client
|
||||
- Migration 028
|
||||
- `GitLabTodo` type
|
||||
- `fetch_todos()` client method
|
||||
- Unit tests for deserialization
|
||||
|
||||
**ACs:** AC-1, AC-2, AC-3, AC-4, AC-5
|
||||
**IMPs:** IMP-1, IMP-2, IMP-4
|
||||
**Deliverable:** Migration + client method + deserialization tests pass
|
||||
|
||||
### Slice B: Sync Integration
|
||||
- `src/ingestion/todos.rs`
|
||||
- Integrate into `lore sync`
|
||||
- `--no-todos` flag
|
||||
- Sync stats
|
||||
|
||||
**ACs:** AC-6, AC-7, AC-8, AC-9, AC-28, AC-29, AC-30
|
||||
**IMPs:** IMP-3, IMP-7
|
||||
**Deliverable:** `lore sync` fetches todos; `--no-todos` works
|
||||
|
||||
### Slice C: `lore todos` Command
|
||||
- CLI args + dispatch
|
||||
- Human + robot rendering
|
||||
- Autocorrect aliases
|
||||
|
||||
**ACs:** AC-10, AC-11, AC-12, AC-13, AC-14, AC-15, AC-16, AC-17, AC-24, AC-25
|
||||
**IMPs:** IMP-5
|
||||
**Deliverable:** `lore todos` and `lore --robot todos` work
|
||||
|
||||
### Slice D: `lore me` Integration
|
||||
- `--todos` flag
|
||||
- Summary count
|
||||
- Activity feed enrichment
|
||||
|
||||
**ACs:** AC-18, AC-19, AC-20, AC-21, AC-22, AC-23, AC-26, AC-27
|
||||
**IMPs:** IMP-6
|
||||
**Deliverable:** `lore me --todos` works; summary shows count
|
||||
|
||||
### Slice E: Polish
|
||||
- Edge case tests
|
||||
- Documentation updates
|
||||
- `robot-docs` manifest
|
||||
|
||||
**ACs:** AC-31, AC-32, AC-33, AC-34
|
||||
**IMPs:** IMP-8
|
||||
**Deliverable:** Docs updated; all quality gates pass
|
||||
|
||||
---
|
||||
|
||||
## Design Decisions
|
||||
|
||||
| Decision | Choice | Rationale |
|
||||
|----------|--------|-----------|
|
||||
| Write operations | Read-only | Complexity; glab handles writes |
|
||||
| Storage | SQLite | Consistent with existing architecture |
|
||||
| Project filter | Account-wide only | GitLab API is user-scoped |
|
||||
| Action type display | Core only | Reduce noise; store all for future |
|
||||
| Attention state | Separate signal | Todos are notifications, not engagement |
|
||||
| History | Pending only | Simplicity; done todos have no value locally |
|
||||
| Grouping | By action type | Matches GitLab UI; aids triage |
|
||||
| Purge strategy | Two-generation grace | Protects against pagination drift during sync |
|
||||
|
||||
---
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- Write operations (mark as done)
|
||||
- Done todo history tracking
|
||||
- Filters beyond `--limit`
|
||||
- Todo-based attention state boosting
|
||||
- Notification settings API
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -183,6 +183,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
|
||||
"--max-evidence",
|
||||
],
|
||||
),
|
||||
("related", &["--limit", "--project"]),
|
||||
(
|
||||
"who",
|
||||
&[
|
||||
|
||||
@@ -11,6 +11,7 @@ pub mod ingest;
|
||||
pub mod init;
|
||||
pub mod list;
|
||||
pub mod me;
|
||||
pub mod related;
|
||||
pub mod search;
|
||||
pub mod show;
|
||||
pub mod stats;
|
||||
@@ -48,6 +49,7 @@ pub use list::{
|
||||
print_list_notes, print_list_notes_json, query_notes, run_list_issues, run_list_mrs,
|
||||
};
|
||||
pub use me::run_me;
|
||||
pub use related::{RelatedResponse, print_related_human, print_related_json, run_related};
|
||||
pub use search::{
|
||||
SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search,
|
||||
};
|
||||
|
||||
637
src/cli/commands/related.rs
Normal file
637
src/cli/commands/related.rs
Normal file
@@ -0,0 +1,637 @@
|
||||
//! Semantic similarity discovery: find related entities via vector search.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use rusqlite::Connection;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::cli::render::{Icons, Theme};
|
||||
use crate::cli::robot::RobotMeta;
|
||||
use crate::core::config::Config;
|
||||
use crate::core::db::create_connection;
|
||||
use crate::core::error::{LoreError, Result};
|
||||
use crate::core::paths::get_db_path;
|
||||
use crate::core::project::resolve_project;
|
||||
use crate::core::time::ms_to_iso;
|
||||
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
|
||||
use crate::search::search_vector;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Response types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct RelatedResponse {
|
||||
pub mode: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source: Option<RelatedSource>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub query: Option<String>,
|
||||
pub results: Vec<RelatedResult>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub warnings: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct RelatedSource {
|
||||
pub source_type: String,
|
||||
pub iid: i64,
|
||||
pub title: String,
|
||||
pub project_path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct RelatedResult {
|
||||
pub source_type: String,
|
||||
pub iid: i64,
|
||||
pub title: String,
|
||||
pub url: String,
|
||||
pub similarity_score: f64,
|
||||
pub project_path: String,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub shared_labels: Vec<String>,
|
||||
pub author: Option<String>,
|
||||
pub updated_at: String,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal row types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct DocumentRow {
|
||||
id: i64,
|
||||
source_type: String,
|
||||
source_id: i64,
|
||||
#[allow(dead_code)]
|
||||
project_id: i64,
|
||||
#[allow(dead_code)]
|
||||
title: Option<String>,
|
||||
url: Option<String>,
|
||||
content_text: String,
|
||||
label_names: Option<String>,
|
||||
author_username: Option<String>,
|
||||
updated_at: Option<i64>,
|
||||
}
|
||||
|
||||
struct EntityInfo {
|
||||
#[allow(dead_code)]
|
||||
iid: i64,
|
||||
title: String,
|
||||
project_path: String,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main entry point
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Run the related command.
|
||||
///
|
||||
/// Modes:
|
||||
/// - Entity mode: `lore related issues 42` or `lore related mrs 99`
|
||||
/// - Query mode: `lore related 'search terms'`
|
||||
pub async fn run_related(
|
||||
config: &Config,
|
||||
query_or_type: &str,
|
||||
iid: Option<i64>,
|
||||
limit: usize,
|
||||
project: Option<&str>,
|
||||
) -> Result<RelatedResponse> {
|
||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||
let conn = create_connection(&db_path)?;
|
||||
|
||||
// Check if embeddings exist
|
||||
let embedding_count: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM embedding_metadata", [], |row| {
|
||||
row.get(0)
|
||||
})
|
||||
.unwrap_or(0);
|
||||
|
||||
if embedding_count == 0 {
|
||||
return Err(LoreError::Other(
|
||||
"No embeddings found. Run 'lore embed' first to generate vector embeddings.".into(),
|
||||
));
|
||||
}
|
||||
|
||||
// Validate input
|
||||
if query_or_type.trim().is_empty() {
|
||||
return Err(LoreError::Other(
|
||||
"Query cannot be empty. Provide an entity type (issues/mrs) and IID, or a search query.".into(),
|
||||
));
|
||||
}
|
||||
|
||||
// Determine mode: entity vs query
|
||||
let entity_type = match query_or_type.to_lowercase().as_str() {
|
||||
"issues" | "issue" | "i" => Some("issue"),
|
||||
"mrs" | "mr" | "m" | "merge_request" => Some("merge_request"),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(etype) = entity_type {
|
||||
// Entity mode
|
||||
let iid = iid.ok_or_else(|| {
|
||||
LoreError::Other("Entity mode requires an IID (e.g., 'lore related issues 42')".into())
|
||||
})?;
|
||||
run_related_entity(&conn, config, etype, iid, limit, project).await
|
||||
} else {
|
||||
// Query mode - treat query_or_type as free text
|
||||
run_related_query(&conn, config, query_or_type, limit, project).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_related_entity(
|
||||
conn: &Connection,
|
||||
config: &Config,
|
||||
entity_type: &str,
|
||||
iid: i64,
|
||||
limit: usize,
|
||||
project_filter: Option<&str>,
|
||||
) -> Result<RelatedResponse> {
|
||||
// Find the source document
|
||||
let source_doc = find_entity_document(conn, entity_type, iid, project_filter)?;
|
||||
let source_info = get_entity_info(conn, entity_type, source_doc.source_id)?;
|
||||
|
||||
// Embed the source content
|
||||
let embedding = embed_text(config, &source_doc.content_text).await?;
|
||||
|
||||
// Search for similar documents (limit + 1 to account for filtering self)
|
||||
let vector_results = search_vector(conn, &embedding, limit.saturating_add(1))?;
|
||||
|
||||
// Filter out self and hydrate results
|
||||
let source_labels = parse_label_names(&source_doc.label_names);
|
||||
let mut results = Vec::new();
|
||||
let mut warnings = Vec::new();
|
||||
|
||||
for vr in vector_results {
|
||||
// Skip self
|
||||
if vr.document_id == source_doc.id {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(result) = hydrate_result(conn, vr.document_id, vr.distance, &source_labels)? {
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
if results.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for low similarity
|
||||
if !results.is_empty() && results.iter().all(|r| r.similarity_score < 0.3) {
|
||||
warnings.push("No strongly related entities found (all scores < 0.3)".to_string());
|
||||
}
|
||||
|
||||
Ok(RelatedResponse {
|
||||
mode: "entity".to_string(),
|
||||
source: Some(RelatedSource {
|
||||
source_type: entity_type.to_string(),
|
||||
iid,
|
||||
title: source_info.title,
|
||||
project_path: source_info.project_path,
|
||||
}),
|
||||
query: None,
|
||||
results,
|
||||
warnings,
|
||||
})
|
||||
}
|
||||
|
||||
async fn run_related_query(
|
||||
conn: &Connection,
|
||||
config: &Config,
|
||||
query: &str,
|
||||
limit: usize,
|
||||
project_filter: Option<&str>,
|
||||
) -> Result<RelatedResponse> {
|
||||
let mut warnings = Vec::new();
|
||||
|
||||
// Warn if query is very short
|
||||
if query.split_whitespace().count() <= 2 {
|
||||
warnings.push("Short queries may produce noisy results".to_string());
|
||||
}
|
||||
|
||||
// Embed the query
|
||||
let embedding = embed_text(config, query).await?;
|
||||
|
||||
// Search for similar documents (fetch extra to allow for project filtering)
|
||||
let vector_results = search_vector(conn, &embedding, limit.saturating_mul(2))?;
|
||||
|
||||
// Filter by project if specified and hydrate
|
||||
let project_id = project_filter
|
||||
.map(|p| resolve_project(conn, p))
|
||||
.transpose()?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
let empty_labels: HashSet<String> = HashSet::new();
|
||||
|
||||
for vr in vector_results {
|
||||
// Check project filter
|
||||
if let Some(pid) = project_id {
|
||||
let doc_project_id: Option<i64> = conn
|
||||
.query_row(
|
||||
"SELECT project_id FROM documents WHERE id = ?1",
|
||||
[vr.document_id],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.ok();
|
||||
|
||||
if doc_project_id != Some(pid) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(result) = hydrate_result(conn, vr.document_id, vr.distance, &empty_labels)? {
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
if results.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for low similarity
|
||||
if !results.is_empty() && results.iter().all(|r| r.similarity_score < 0.3) {
|
||||
warnings.push("No strongly related entities found (all scores < 0.3)".to_string());
|
||||
}
|
||||
|
||||
Ok(RelatedResponse {
|
||||
mode: "query".to_string(),
|
||||
source: None,
|
||||
query: Some(query.to_string()),
|
||||
results,
|
||||
warnings,
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DB helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn find_entity_document(
|
||||
conn: &Connection,
|
||||
entity_type: &str,
|
||||
iid: i64,
|
||||
project_filter: Option<&str>,
|
||||
) -> Result<DocumentRow> {
|
||||
let table = match entity_type {
|
||||
"issue" => "issues",
|
||||
"merge_request" => "merge_requests",
|
||||
_ => {
|
||||
return Err(LoreError::Other(format!(
|
||||
"Unknown entity type: {entity_type}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let (sql, params): (String, Vec<Box<dyn rusqlite::ToSql>>) = match project_filter {
|
||||
Some(project) => {
|
||||
let project_id = resolve_project(conn, project)?;
|
||||
(
|
||||
format!(
|
||||
"SELECT d.id, d.source_type, d.source_id, d.project_id, d.title, d.url,
|
||||
d.content_text, d.label_names, d.author_username, d.updated_at
|
||||
FROM documents d
|
||||
JOIN {table} e ON d.source_id = e.id
|
||||
WHERE d.source_type = ?1 AND e.iid = ?2 AND e.project_id = ?3"
|
||||
),
|
||||
vec![
|
||||
Box::new(entity_type.to_string()),
|
||||
Box::new(iid),
|
||||
Box::new(project_id),
|
||||
],
|
||||
)
|
||||
}
|
||||
None => (
|
||||
format!(
|
||||
"SELECT d.id, d.source_type, d.source_id, d.project_id, d.title, d.url,
|
||||
d.content_text, d.label_names, d.author_username, d.updated_at
|
||||
FROM documents d
|
||||
JOIN {table} e ON d.source_id = e.id
|
||||
WHERE d.source_type = ?1 AND e.iid = ?2"
|
||||
),
|
||||
vec![Box::new(entity_type.to_string()), Box::new(iid)],
|
||||
),
|
||||
};
|
||||
|
||||
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
|
||||
|
||||
let mut stmt = conn.prepare(&sql)?;
|
||||
let rows: Vec<DocumentRow> = stmt
|
||||
.query_map(param_refs.as_slice(), |row| {
|
||||
Ok(DocumentRow {
|
||||
id: row.get(0)?,
|
||||
source_type: row.get(1)?,
|
||||
source_id: row.get(2)?,
|
||||
project_id: row.get(3)?,
|
||||
title: row.get(4)?,
|
||||
url: row.get(5)?,
|
||||
content_text: row.get(6)?,
|
||||
label_names: row.get(7)?,
|
||||
author_username: row.get(8)?,
|
||||
updated_at: row.get(9)?,
|
||||
})
|
||||
})?
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||
|
||||
match rows.len() {
|
||||
0 => Err(LoreError::NotFound(format!(
|
||||
"{entity_type} #{iid} not found (run 'lore sync' first?)"
|
||||
))),
|
||||
1 => Ok(rows.into_iter().next().unwrap()),
|
||||
_ => Err(LoreError::Ambiguous(format!(
|
||||
"{entity_type} #{iid} exists in multiple projects. Use --project to specify."
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_entity_info(conn: &Connection, entity_type: &str, entity_id: i64) -> Result<EntityInfo> {
|
||||
let table = match entity_type {
|
||||
"issue" => "issues",
|
||||
"merge_request" => "merge_requests",
|
||||
_ => {
|
||||
return Err(LoreError::Other(format!(
|
||||
"Unknown entity type: {entity_type}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let sql = format!(
|
||||
"SELECT e.iid, e.title, p.path_with_namespace
|
||||
FROM {table} e
|
||||
JOIN projects p ON e.project_id = p.id
|
||||
WHERE e.id = ?1"
|
||||
);
|
||||
|
||||
conn.query_row(&sql, [entity_id], |row| {
|
||||
Ok(EntityInfo {
|
||||
iid: row.get(0)?,
|
||||
title: row.get(1)?,
|
||||
project_path: row.get(2)?,
|
||||
})
|
||||
})
|
||||
.map_err(|e| LoreError::NotFound(format!("Entity not found: {e}")))
|
||||
}
|
||||
|
||||
fn hydrate_result(
|
||||
conn: &Connection,
|
||||
document_id: i64,
|
||||
distance: f64,
|
||||
source_labels: &HashSet<String>,
|
||||
) -> Result<Option<RelatedResult>> {
|
||||
let doc: Option<DocumentRow> = conn
|
||||
.query_row(
|
||||
"SELECT d.id, d.source_type, d.source_id, d.project_id, d.title, d.url,
|
||||
d.content_text, d.label_names, d.author_username, d.updated_at
|
||||
FROM documents d
|
||||
WHERE d.id = ?1",
|
||||
[document_id],
|
||||
|row| {
|
||||
Ok(DocumentRow {
|
||||
id: row.get(0)?,
|
||||
source_type: row.get(1)?,
|
||||
source_id: row.get(2)?,
|
||||
project_id: row.get(3)?,
|
||||
title: row.get(4)?,
|
||||
url: row.get(5)?,
|
||||
content_text: row.get(6)?,
|
||||
label_names: row.get(7)?,
|
||||
author_username: row.get(8)?,
|
||||
updated_at: row.get(9)?,
|
||||
})
|
||||
},
|
||||
)
|
||||
.ok();
|
||||
|
||||
let Some(doc) = doc else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
// Skip discussion/note documents - we want entities only
|
||||
if doc.source_type == "discussion" || doc.source_type == "note" {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Get IID from the source entity
|
||||
let table = match doc.source_type.as_str() {
|
||||
"issue" => "issues",
|
||||
"merge_request" => "merge_requests",
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
// Get IID and title from the source entity - skip gracefully if not found
|
||||
// (this handles orphaned documents where the entity was deleted)
|
||||
let entity_info: Option<(i64, String, String)> = conn
|
||||
.query_row(
|
||||
&format!(
|
||||
"SELECT e.iid, e.title, p.path_with_namespace
|
||||
FROM {table} e
|
||||
JOIN projects p ON e.project_id = p.id
|
||||
WHERE e.id = ?1"
|
||||
),
|
||||
[doc.source_id],
|
||||
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
|
||||
)
|
||||
.ok();
|
||||
|
||||
let Some((iid, title, project_path)) = entity_info else {
|
||||
// Entity not found in database - skip this result
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
// Compute shared labels
|
||||
let result_labels = parse_label_names(&doc.label_names);
|
||||
let shared_labels: Vec<String> = source_labels
|
||||
.intersection(&result_labels)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
Ok(Some(RelatedResult {
|
||||
source_type: doc.source_type,
|
||||
iid,
|
||||
title,
|
||||
url: doc.url.unwrap_or_default(),
|
||||
similarity_score: distance_to_similarity(distance),
|
||||
project_path,
|
||||
shared_labels,
|
||||
author: doc.author_username,
|
||||
updated_at: doc.updated_at.map(ms_to_iso).unwrap_or_default(),
|
||||
}))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Embedding helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async fn embed_text(config: &Config, text: &str) -> Result<Vec<f32>> {
|
||||
let ollama = OllamaClient::new(OllamaConfig {
|
||||
base_url: config.embedding.base_url.clone(),
|
||||
model: config.embedding.model.clone(),
|
||||
timeout_secs: 60,
|
||||
});
|
||||
|
||||
let embeddings = ollama.embed_batch(&[text]).await?;
|
||||
embeddings
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| LoreError::EmbeddingFailed {
|
||||
document_id: 0,
|
||||
reason: "No embedding returned".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Convert L2 distance to a 0-1 similarity score.
|
||||
/// Uses inverse relationship: closer (lower distance) = higher similarity.
|
||||
fn distance_to_similarity(distance: f64) -> f64 {
|
||||
1.0 / (1.0 + distance)
|
||||
}
|
||||
|
||||
fn parse_label_names(label_names_json: &Option<String>) -> HashSet<String> {
|
||||
label_names_json
|
||||
.as_deref()
|
||||
.and_then(|s| serde_json::from_str::<Vec<String>>(s).ok())
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Printers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn print_related_human(response: &RelatedResponse) {
|
||||
// Header
|
||||
let header = match &response.source {
|
||||
Some(src) => format!("Related to {} #{}: {}", src.source_type, src.iid, src.title),
|
||||
None => format!(
|
||||
"Related to query: \"{}\"",
|
||||
response.query.as_deref().unwrap_or("")
|
||||
),
|
||||
};
|
||||
println!("{}", Theme::bold().render(&header));
|
||||
println!("{}", "-".repeat(header.len().min(70)));
|
||||
println!();
|
||||
|
||||
if response.results.is_empty() {
|
||||
println!("No related entities found.");
|
||||
return;
|
||||
}
|
||||
|
||||
for (i, result) in response.results.iter().enumerate() {
|
||||
let type_icon = match result.source_type.as_str() {
|
||||
"issue" => Icons::issue_opened(),
|
||||
"merge_request" => Icons::mr_opened(),
|
||||
_ => " ",
|
||||
};
|
||||
|
||||
let score_bar_len = (result.similarity_score * 10.0) as usize;
|
||||
let score_bar: String = "\u{2588}".repeat(score_bar_len);
|
||||
|
||||
println!(
|
||||
"{:>2}. {} {} #{} ({:.0}%) {}",
|
||||
i + 1,
|
||||
type_icon,
|
||||
result.source_type,
|
||||
result.iid,
|
||||
result.similarity_score * 100.0,
|
||||
score_bar
|
||||
);
|
||||
println!(" {}", result.title);
|
||||
println!(
|
||||
" {} | @{}",
|
||||
result.project_path,
|
||||
result.author.as_deref().unwrap_or("?")
|
||||
);
|
||||
|
||||
if !result.shared_labels.is_empty() {
|
||||
println!(" Labels shared: {}", result.shared_labels.join(", "));
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
// Warnings
|
||||
for warning in &response.warnings {
|
||||
println!("{} {}", Theme::warning().render(Icons::warning()), warning);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn print_related_json(response: &RelatedResponse, elapsed_ms: u64) {
|
||||
let meta = RobotMeta { elapsed_ms };
|
||||
let output = serde_json::json!({
|
||||
"ok": true,
|
||||
"data": response,
|
||||
"meta": meta,
|
||||
});
|
||||
match serde_json::to_string(&output) {
|
||||
Ok(json) => println!("{json}"),
|
||||
Err(e) => eprintln!("Error serializing to JSON: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_identical() {
|
||||
assert!((distance_to_similarity(0.0) - 1.0).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_midpoint() {
|
||||
assert!((distance_to_similarity(1.0) - 0.5).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_large() {
|
||||
let sim = distance_to_similarity(2.0);
|
||||
assert!(sim > 0.0 && sim < 0.5);
|
||||
assert!((sim - 0.333_333_333_333_333_3).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_range() {
|
||||
for d in [0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0] {
|
||||
let sim = distance_to_similarity(d);
|
||||
assert!(
|
||||
sim > 0.0 && sim <= 1.0,
|
||||
"score {sim} out of range for distance {d}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_label_names_valid() {
|
||||
let json = Some(r#"["bug", "priority::high"]"#.to_string());
|
||||
let labels = parse_label_names(&json);
|
||||
assert!(labels.contains("bug"));
|
||||
assert!(labels.contains("priority::high"));
|
||||
assert_eq!(labels.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_label_names_empty() {
|
||||
let labels = parse_label_names(&None);
|
||||
assert!(labels.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_label_names_invalid_json() {
|
||||
let json = Some("not valid json".to_string());
|
||||
let labels = parse_label_names(&json);
|
||||
assert!(labels.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_label_names_empty_array() {
|
||||
let json = Some("[]".to_string());
|
||||
let labels = parse_label_names(&json);
|
||||
assert!(labels.is_empty());
|
||||
}
|
||||
}
|
||||
@@ -175,7 +175,7 @@ pub async fn run_timeline(config: &Config, params: &TimelineParams) -> Result<Ti
|
||||
query: params.query.clone(),
|
||||
search_mode: seed_result.search_mode,
|
||||
events,
|
||||
total_events_before_limit: total_before_limit,
|
||||
total_filtered_events: total_before_limit,
|
||||
seed_entities: seed_result.seed_entities,
|
||||
expanded_entities: expand_result.expanded_entities,
|
||||
unresolved_references: expand_result.unresolved_references,
|
||||
@@ -342,7 +342,7 @@ fn format_entity_ref(entity_type: &str, iid: i64) -> String {
|
||||
/// Render timeline as robot-mode JSON in {ok, data, meta} envelope.
|
||||
pub fn print_timeline_json_with_meta(
|
||||
result: &TimelineResult,
|
||||
total_events_before_limit: usize,
|
||||
total_filtered_events: usize,
|
||||
depth: u32,
|
||||
include_mentions: bool,
|
||||
fields: Option<&[String]>,
|
||||
@@ -355,7 +355,7 @@ pub fn print_timeline_json_with_meta(
|
||||
expansion_depth: depth,
|
||||
include_mentions,
|
||||
total_entities: result.seed_entities.len() + result.expanded_entities.len(),
|
||||
total_events: total_events_before_limit,
|
||||
total_events: total_filtered_events,
|
||||
evidence_notes_included: count_evidence_notes(&result.events),
|
||||
discussion_threads_included: count_discussion_threads(&result.events),
|
||||
unresolved_references: result.unresolved_references.len(),
|
||||
|
||||
@@ -293,6 +293,28 @@ pub enum Commands {
|
||||
project: Option<String>,
|
||||
},
|
||||
|
||||
/// Find semantically related entities via vector search
|
||||
#[command(after_help = "\x1b[1mExamples:\x1b[0m
|
||||
lore related issues 42 # Find entities related to issue #42
|
||||
lore related mrs 99 -p group/repo # Related to MR #99 in specific project
|
||||
lore related 'authentication flow' # Find entities matching free text query
|
||||
lore --robot related issues 42 -n 5 # JSON output, limit 5 results")]
|
||||
Related {
|
||||
/// Entity type (issues, mrs) or free text query
|
||||
query_or_type: String,
|
||||
|
||||
/// Entity IID (required when first arg is entity type)
|
||||
iid: Option<i64>,
|
||||
|
||||
/// Maximum results
|
||||
#[arg(short = 'n', long, default_value = "10")]
|
||||
limit: usize,
|
||||
|
||||
/// Scope to project (fuzzy match)
|
||||
#[arg(short, long)]
|
||||
project: Option<String>,
|
||||
},
|
||||
|
||||
/// Manage cron-based automatic syncing
|
||||
#[command(after_help = "\x1b[1mExamples:\x1b[0m
|
||||
lore cron install # Install cron job (every 8 minutes)
|
||||
|
||||
@@ -93,6 +93,10 @@ const MIGRATIONS: &[(&str, &str)] = &[
|
||||
"027",
|
||||
include_str!("../../migrations/027_surgical_sync_runs.sql"),
|
||||
),
|
||||
(
|
||||
"028",
|
||||
include_str!("../../migrations/028_discussions_mr_fk.sql"),
|
||||
),
|
||||
];
|
||||
|
||||
pub fn create_connection(db_path: &Path) -> Result<Connection> {
|
||||
@@ -130,21 +134,20 @@ pub fn create_connection(db_path: &Path) -> Result<Connection> {
|
||||
}
|
||||
|
||||
pub fn run_migrations(conn: &Connection) -> Result<()> {
|
||||
let has_version_table: bool = conn
|
||||
.query_row(
|
||||
// Note: sqlite_master always exists, so errors here indicate real DB problems
|
||||
// (corruption, locked, etc.) - we must not silently treat them as "fresh DB"
|
||||
let has_version_table: bool = conn.query_row(
|
||||
"SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='schema_version'",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap_or(false);
|
||||
)?;
|
||||
|
||||
let current_version: i32 = if has_version_table {
|
||||
conn.query_row(
|
||||
"SELECT COALESCE(MAX(version), 0) FROM schema_version",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap_or(0)
|
||||
)?
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
@@ -164,9 +164,10 @@ pub struct TimelineResult {
|
||||
/// The search mode actually used for seeding (e.g. "hybrid", "lexical", "lexical (hybrid fallback)").
|
||||
pub search_mode: String,
|
||||
pub events: Vec<TimelineEvent>,
|
||||
/// Total events before the `--limit` was applied (for meta.total_events vs meta.showing).
|
||||
/// Total events after filters (e.g., --since) but before --limit was applied.
|
||||
/// Use this to show "showing X of Y filtered events".
|
||||
#[serde(skip)]
|
||||
pub total_events_before_limit: usize,
|
||||
pub total_filtered_events: usize,
|
||||
pub seed_entities: Vec<EntityRef>,
|
||||
pub expanded_entities: Vec<ExpandedEntityRef>,
|
||||
pub unresolved_references: Vec<UnresolvedRef>,
|
||||
|
||||
@@ -260,6 +260,9 @@ fn resolve_documents_to_entities(
|
||||
}
|
||||
|
||||
/// Find evidence notes: FTS5-matched discussion notes that provide context.
|
||||
///
|
||||
/// Uses round-robin selection across discussions to ensure diverse evidence
|
||||
/// rather than all notes coming from a single high-traffic discussion.
|
||||
fn find_evidence_notes(
|
||||
conn: &Connection,
|
||||
fts_query: &str,
|
||||
@@ -267,6 +270,10 @@ fn find_evidence_notes(
|
||||
since_ms: Option<i64>,
|
||||
max_evidence: usize,
|
||||
) -> Result<Vec<TimelineEvent>> {
|
||||
// Fetch extra rows to enable round-robin across discussions.
|
||||
// We'll select from multiple discussions in rotation.
|
||||
let fetch_limit = (max_evidence * 5).max(50);
|
||||
|
||||
let sql = r"
|
||||
SELECT n.id AS note_id, n.body, n.created_at, n.author_username,
|
||||
disc.id AS discussion_id,
|
||||
@@ -286,7 +293,7 @@ fn find_evidence_notes(
|
||||
|
||||
let mut stmt = conn.prepare(sql)?;
|
||||
let rows = stmt.query_map(
|
||||
rusqlite::params![fts_query, project_id, since_ms, max_evidence as i64],
|
||||
rusqlite::params![fts_query, project_id, since_ms, fetch_limit as i64],
|
||||
|row| {
|
||||
Ok((
|
||||
row.get::<_, i64>(0)?, // note_id
|
||||
@@ -331,7 +338,9 @@ fn find_evidence_notes(
|
||||
}
|
||||
};
|
||||
|
||||
events.push(TimelineEvent {
|
||||
events.push((
|
||||
discussion_id,
|
||||
TimelineEvent {
|
||||
timestamp: created_at,
|
||||
entity_type: parent_type,
|
||||
entity_id: parent_entity_id,
|
||||
@@ -346,10 +355,67 @@ fn find_evidence_notes(
|
||||
actor: author,
|
||||
url: None,
|
||||
is_seed: true,
|
||||
});
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
Ok(events)
|
||||
// Round-robin selection across discussions for diverse evidence
|
||||
Ok(round_robin_select_by_discussion(events, max_evidence))
|
||||
}
|
||||
|
||||
/// Round-robin select events across discussions to ensure diverse evidence.
|
||||
///
|
||||
/// Groups events by discussion_id, then iterates through discussions in order,
|
||||
/// taking one event from each until the limit is reached.
|
||||
fn round_robin_select_by_discussion(
|
||||
events: Vec<(i64, TimelineEvent)>,
|
||||
max_evidence: usize,
|
||||
) -> Vec<TimelineEvent> {
|
||||
use std::collections::HashMap;
|
||||
|
||||
if events.is_empty() || max_evidence == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Group events by discussion_id, preserving order within each group
|
||||
let mut by_discussion: HashMap<i64, Vec<TimelineEvent>> = HashMap::new();
|
||||
let mut discussion_order: Vec<i64> = Vec::new();
|
||||
|
||||
for (discussion_id, event) in events {
|
||||
if !by_discussion.contains_key(&discussion_id) {
|
||||
discussion_order.push(discussion_id);
|
||||
}
|
||||
by_discussion.entry(discussion_id).or_default().push(event);
|
||||
}
|
||||
|
||||
// Round-robin selection
|
||||
let mut result = Vec::with_capacity(max_evidence);
|
||||
let mut indices: Vec<usize> = vec![0; discussion_order.len()];
|
||||
|
||||
'outer: loop {
|
||||
let mut made_progress = false;
|
||||
|
||||
for (disc_idx, &discussion_id) in discussion_order.iter().enumerate() {
|
||||
let notes = by_discussion.get(&discussion_id).unwrap();
|
||||
let note_idx = indices[disc_idx];
|
||||
|
||||
if note_idx < notes.len() {
|
||||
result.push(notes[note_idx].clone());
|
||||
indices[disc_idx] += 1;
|
||||
made_progress = true;
|
||||
|
||||
if result.len() >= max_evidence {
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !made_progress {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -6,10 +6,12 @@ use std::collections::{BTreeSet, HashMap};
|
||||
use std::fmt::Write as _;
|
||||
|
||||
use super::truncation::{
|
||||
MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap,
|
||||
MAX_DISCUSSION_BYTES, MAX_DOCUMENT_BYTES_HARD, NoteContent, pre_truncate_description,
|
||||
truncate_discussion, truncate_hard_cap,
|
||||
};
|
||||
use crate::core::error::Result;
|
||||
use crate::core::time::ms_to_iso;
|
||||
use tracing::warn;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
@@ -158,7 +160,16 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
|
||||
|
||||
if let Some(ref desc) = description {
|
||||
content.push_str("\n--- Description ---\n\n");
|
||||
content.push_str(desc);
|
||||
// Pre-truncate to avoid unbounded memory allocation for huge descriptions
|
||||
let pre_trunc = pre_truncate_description(desc, MAX_DOCUMENT_BYTES_HARD);
|
||||
if pre_trunc.was_truncated {
|
||||
warn!(
|
||||
iid,
|
||||
original_bytes = pre_trunc.original_bytes,
|
||||
"Issue description truncated (oversized)"
|
||||
);
|
||||
}
|
||||
content.push_str(&pre_trunc.content);
|
||||
}
|
||||
|
||||
let labels_hash = compute_list_hash(&labels);
|
||||
@@ -268,7 +279,16 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
|
||||
|
||||
if let Some(ref desc) = description {
|
||||
content.push_str("\n--- Description ---\n\n");
|
||||
content.push_str(desc);
|
||||
// Pre-truncate to avoid unbounded memory allocation for huge descriptions
|
||||
let pre_trunc = pre_truncate_description(desc, MAX_DOCUMENT_BYTES_HARD);
|
||||
if pre_trunc.was_truncated {
|
||||
warn!(
|
||||
iid,
|
||||
original_bytes = pre_trunc.original_bytes,
|
||||
"MR description truncated (oversized)"
|
||||
);
|
||||
}
|
||||
content.push_str(&pre_trunc.content);
|
||||
}
|
||||
|
||||
let labels_hash = compute_list_hash(&labels);
|
||||
|
||||
@@ -48,6 +48,56 @@ pub fn truncate_utf8(s: &str, max_bytes: usize) -> &str {
|
||||
&s[..end]
|
||||
}
|
||||
|
||||
/// Result of pre-truncating a description to avoid unbounded memory allocation.
|
||||
pub struct DescriptionPreTruncateResult {
|
||||
pub content: String,
|
||||
pub was_truncated: bool,
|
||||
pub original_bytes: usize,
|
||||
}
|
||||
|
||||
/// Pre-truncate a description to avoid allocating huge amounts of memory.
|
||||
///
|
||||
/// This is called BEFORE appending to the document content, so we don't
|
||||
/// allocate memory for pathologically large descriptions (e.g., 500MB base64 blob).
|
||||
///
|
||||
/// Returns the (potentially truncated) description and whether truncation occurred.
|
||||
pub fn pre_truncate_description(desc: &str, max_bytes: usize) -> DescriptionPreTruncateResult {
|
||||
let original_bytes = desc.len();
|
||||
|
||||
if original_bytes <= max_bytes {
|
||||
return DescriptionPreTruncateResult {
|
||||
content: desc.to_string(),
|
||||
was_truncated: false,
|
||||
original_bytes,
|
||||
};
|
||||
}
|
||||
|
||||
// Truncate at UTF-8 boundary and add indicator
|
||||
let truncated = truncate_utf8(desc, max_bytes.saturating_sub(50)); // Reserve space for marker
|
||||
let mut content = truncated.to_string();
|
||||
content.push_str("\n\n[... description truncated from ");
|
||||
content.push_str(&format_bytes(original_bytes));
|
||||
content.push_str(" to ");
|
||||
content.push_str(&format_bytes(max_bytes));
|
||||
content.push_str(" ...]");
|
||||
|
||||
DescriptionPreTruncateResult {
|
||||
content,
|
||||
was_truncated: true,
|
||||
original_bytes,
|
||||
}
|
||||
}
|
||||
|
||||
fn format_bytes(bytes: usize) -> String {
|
||||
if bytes >= 1_000_000 {
|
||||
format!("{:.1}MB", bytes as f64 / 1_000_000.0)
|
||||
} else if bytes >= 1_000 {
|
||||
format!("{:.1}KB", bytes as f64 / 1_000.0)
|
||||
} else {
|
||||
format!("{}B", bytes)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn truncate_discussion(notes: &[NoteContent], max_bytes: usize) -> TruncationResult {
|
||||
if notes.is_empty() {
|
||||
return TruncationResult {
|
||||
|
||||
@@ -130,6 +130,12 @@ pub async fn ingest_project_issues_with_progress(
|
||||
progress: Option<ProgressCallback>,
|
||||
signal: &ShutdownSignal,
|
||||
) -> Result<IngestProjectResult> {
|
||||
// Reclaim stale locks once at entry, not per-drain-function
|
||||
let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?;
|
||||
if reclaimed > 0 {
|
||||
debug!(reclaimed, "Reclaimed stale locks at issue sync start");
|
||||
}
|
||||
|
||||
let mut result = IngestProjectResult::default();
|
||||
let emit = |event: ProgressEvent| {
|
||||
if let Some(ref cb) = progress {
|
||||
@@ -176,7 +182,7 @@ pub async fn ingest_project_issues_with_progress(
|
||||
None => {
|
||||
warn!("Cannot enrich statuses: project path not found for project_id={project_id}");
|
||||
result.status_enrichment_error = Some("project_path_missing".into());
|
||||
result.status_enrichment_mode = "fetched".into();
|
||||
result.status_enrichment_mode = "error".into();
|
||||
emit(ProgressEvent::StatusEnrichmentComplete {
|
||||
enriched: 0,
|
||||
cleared: 0,
|
||||
@@ -260,7 +266,7 @@ pub async fn ingest_project_issues_with_progress(
|
||||
Err(e) => {
|
||||
warn!("Status enrichment fetch failed: {e}");
|
||||
result.status_enrichment_error = Some(e.to_string());
|
||||
result.status_enrichment_mode = "fetched".into();
|
||||
result.status_enrichment_mode = "fetch_error".into();
|
||||
emit(ProgressEvent::StatusEnrichmentComplete {
|
||||
enriched: 0,
|
||||
cleared: 0,
|
||||
@@ -460,7 +466,8 @@ async fn sync_discussions_sequential(
|
||||
progress: &Option<ProgressCallback>,
|
||||
signal: &ShutdownSignal,
|
||||
) -> Result<Vec<super::discussions::IngestDiscussionsResult>> {
|
||||
let batch_size = config.sync.dependent_concurrency as usize;
|
||||
// Guard against batch_size == 0 which would panic in .chunks()
|
||||
let batch_size = (config.sync.dependent_concurrency as usize).max(1);
|
||||
let total = issues.len();
|
||||
|
||||
let mut results = Vec::with_capacity(issues.len());
|
||||
@@ -531,6 +538,12 @@ pub async fn ingest_project_merge_requests_with_progress(
|
||||
progress: Option<ProgressCallback>,
|
||||
signal: &ShutdownSignal,
|
||||
) -> Result<IngestMrProjectResult> {
|
||||
// Reclaim stale locks once at entry, not per-drain-function
|
||||
let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?;
|
||||
if reclaimed > 0 {
|
||||
debug!(reclaimed, "Reclaimed stale locks at MR sync start");
|
||||
}
|
||||
|
||||
let mut result = IngestMrProjectResult::default();
|
||||
let emit = |event: ProgressEvent| {
|
||||
if let Some(ref cb) = progress {
|
||||
@@ -766,7 +779,8 @@ async fn sync_mr_discussions_sequential(
|
||||
progress: &Option<ProgressCallback>,
|
||||
signal: &ShutdownSignal,
|
||||
) -> Result<Vec<super::mr_discussions::IngestMrDiscussionsResult>> {
|
||||
let batch_size = config.sync.dependent_concurrency as usize;
|
||||
// Guard against batch_size == 0 which would panic in .chunks()
|
||||
let batch_size = (config.sync.dependent_concurrency as usize).max(1);
|
||||
let total = mrs.len();
|
||||
|
||||
let mut results = Vec::with_capacity(mrs.len());
|
||||
@@ -941,10 +955,7 @@ async fn drain_resource_events(
|
||||
let mut result = DrainResult::default();
|
||||
let batch_size = config.sync.dependent_concurrency as usize;
|
||||
|
||||
let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?;
|
||||
if reclaimed > 0 {
|
||||
debug!(reclaimed, "Reclaimed stale resource event locks");
|
||||
}
|
||||
// Note: stale locks are reclaimed once at sync entry point, not here
|
||||
|
||||
let claimable_counts = count_claimable_jobs(conn, project_id)?;
|
||||
let total_pending = claimable_counts
|
||||
@@ -1263,10 +1274,7 @@ async fn drain_mr_closes_issues(
|
||||
let mut result = DrainResult::default();
|
||||
let batch_size = config.sync.dependent_concurrency as usize;
|
||||
|
||||
let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?;
|
||||
if reclaimed > 0 {
|
||||
debug!(reclaimed, "Reclaimed stale mr_closes_issues locks");
|
||||
}
|
||||
// Note: stale locks are reclaimed once at sync entry point, not here
|
||||
|
||||
let claimable_counts = count_claimable_jobs(conn, project_id)?;
|
||||
let total_pending = claimable_counts
|
||||
@@ -1523,10 +1531,7 @@ async fn drain_mr_diffs(
|
||||
let mut result = DrainResult::default();
|
||||
let batch_size = config.sync.dependent_concurrency as usize;
|
||||
|
||||
let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?;
|
||||
if reclaimed > 0 {
|
||||
debug!(reclaimed, "Reclaimed stale mr_diffs locks");
|
||||
}
|
||||
// Note: stale locks are reclaimed once at sync entry point, not here
|
||||
|
||||
let claimable_counts = count_claimable_jobs(conn, project_id)?;
|
||||
let total_pending = claimable_counts.get("mr_diffs").copied().unwrap_or(0);
|
||||
|
||||
59
src/main.rs
59
src/main.rs
@@ -18,15 +18,16 @@ use lore::cli::commands::{
|
||||
print_event_count, print_event_count_json, print_file_history, print_file_history_json,
|
||||
print_generate_docs, print_generate_docs_json, print_ingest_summary, print_ingest_summary_json,
|
||||
print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json,
|
||||
print_list_notes, print_list_notes_json, print_search_results, print_search_results_json,
|
||||
print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, print_stats,
|
||||
print_stats_json, print_sync, print_sync_json, print_sync_status, print_sync_status_json,
|
||||
print_timeline, print_timeline_json_with_meta, print_trace, print_trace_json, print_who_human,
|
||||
print_who_json, query_notes, run_auth_test, run_count, run_count_events, run_cron_install,
|
||||
run_cron_status, run_cron_uninstall, run_doctor, run_drift, run_embed, run_file_history,
|
||||
run_generate_docs, run_ingest, run_ingest_dry_run, run_init, run_list_issues, run_list_mrs,
|
||||
run_me, run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status,
|
||||
run_timeline, run_token_set, run_token_show, run_who,
|
||||
print_list_notes, print_list_notes_json, print_related_human, print_related_json,
|
||||
print_search_results, print_search_results_json, print_show_issue, print_show_issue_json,
|
||||
print_show_mr, print_show_mr_json, print_stats, print_stats_json, print_sync, print_sync_json,
|
||||
print_sync_status, print_sync_status_json, print_timeline, print_timeline_json_with_meta,
|
||||
print_trace, print_trace_json, print_who_human, print_who_json, query_notes, run_auth_test,
|
||||
run_count, run_count_events, run_cron_install, run_cron_status, run_cron_uninstall, run_doctor,
|
||||
run_drift, run_embed, run_file_history, run_generate_docs, run_ingest, run_ingest_dry_run,
|
||||
run_init, run_list_issues, run_list_mrs, run_me, run_related, run_search, run_show_issue,
|
||||
run_show_mr, run_stats, run_sync, run_sync_status, run_timeline, run_token_set, run_token_show,
|
||||
run_who,
|
||||
};
|
||||
use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme};
|
||||
use lore::cli::robot::{RobotMeta, strip_schemas};
|
||||
@@ -225,6 +226,22 @@ async fn main() {
|
||||
)
|
||||
.await
|
||||
}
|
||||
Some(Commands::Related {
|
||||
query_or_type,
|
||||
iid,
|
||||
limit,
|
||||
project,
|
||||
}) => {
|
||||
handle_related(
|
||||
cli.config.as_deref(),
|
||||
&query_or_type,
|
||||
iid,
|
||||
limit,
|
||||
project.as_deref(),
|
||||
robot_mode,
|
||||
)
|
||||
.await
|
||||
}
|
||||
Some(Commands::Stats(args)) => handle_stats(cli.config.as_deref(), args, robot_mode).await,
|
||||
Some(Commands::Embed(args)) => handle_embed(cli.config.as_deref(), args, robot_mode).await,
|
||||
Some(Commands::Sync(args)) => {
|
||||
@@ -1996,7 +2013,7 @@ async fn handle_timeline(
|
||||
if robot_mode {
|
||||
print_timeline_json_with_meta(
|
||||
&result,
|
||||
result.total_events_before_limit,
|
||||
result.total_filtered_events,
|
||||
params.depth,
|
||||
!params.no_mentions,
|
||||
args.fields.as_deref(),
|
||||
@@ -3256,6 +3273,28 @@ async fn handle_drift(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_related(
|
||||
config_override: Option<&str>,
|
||||
query_or_type: &str,
|
||||
iid: Option<i64>,
|
||||
limit: usize,
|
||||
project: Option<&str>,
|
||||
robot_mode: bool,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let start = std::time::Instant::now();
|
||||
let config = Config::load(config_override)?;
|
||||
let effective_project = config.effective_project(project);
|
||||
let response = run_related(&config, query_or_type, iid, limit, effective_project).await?;
|
||||
let elapsed_ms = start.elapsed().as_millis() as u64;
|
||||
|
||||
if robot_mode {
|
||||
print_related_json(&response, elapsed_ms);
|
||||
} else {
|
||||
print_related_human(&response);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn handle_list_compat(
|
||||
config_override: Option<&str>,
|
||||
|
||||
@@ -54,7 +54,9 @@ pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
|
||||
|
||||
// FTS5 boolean operators are case-sensitive uppercase keywords.
|
||||
// Pass them through unquoted so users can write "switch AND health".
|
||||
const FTS5_OPERATORS: &[&str] = &["AND", "OR", "NOT", "NEAR"];
|
||||
// Note: NEAR is a function NEAR(term1 term2, N), not an infix operator.
|
||||
// Users who need NEAR syntax should use FtsQueryMode::Raw.
|
||||
const FTS5_OPERATORS: &[&str] = &["AND", "OR", "NOT"];
|
||||
|
||||
let mut result = String::with_capacity(trimmed.len() + 20);
|
||||
for (i, token) in trimmed.split_whitespace().enumerate() {
|
||||
|
||||
Reference in New Issue
Block a user