feat(cli): implement 'lore file-history' command (bd-z94)
Adds file-history command showing which MRs touched a file, with:
- Rename chain resolution via BFS (resolve_rename_chain from bd-1yx)
- DiffNote discussion snippets with --discussions flag
- --merged filter, --no-follow-renames, -n limit
- Human output with styled MR list and rename chain display
- Robot JSON output with {ok, data, meta} envelope
- Autocorrect registry and robot-docs manifest entry
- Fixes pre-existing --no-status missing from sync autocorrect registry
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -1 +1 @@
|
||||
bd-1yx
|
||||
bd-z94
|
||||
|
||||
202
docs/plan-expose-discussion-ids.feedback-1.md
Normal file
202
docs/plan-expose-discussion-ids.feedback-1.md
Normal file
@@ -0,0 +1,202 @@
|
||||
No `## Rejected Recommendations` section appears in the plan you pasted, so the revisions below are all net-new.
|
||||
|
||||
1. **Add an explicit “Bridge Contract” and fix scope inconsistency**
|
||||
Analysis: The plan says “Three changes” but defines four. More importantly, identifier requirements are scattered. A single contract section prevents drift and makes every new read surface prove it can drive a write call.
|
||||
|
||||
```diff
|
||||
@@
|
||||
-**Scope**: Three changes, delivered in order:
|
||||
+**Scope**: Four workstreams, delivered in order:
|
||||
1. Add `gitlab_discussion_id` to notes output
|
||||
2. Add `gitlab_discussion_id` to show command discussion groups
|
||||
3. Add a standalone `discussions` list command
|
||||
4. Fix robot-docs to list actual field names instead of opaque type references
|
||||
+
|
||||
+## Bridge Contract (Cross-Cutting)
|
||||
+Every read payload that surfaces notes/discussions MUST include:
|
||||
+- `project_path`
|
||||
+- `noteable_type`
|
||||
+- `parent_iid`
|
||||
+- `gitlab_discussion_id`
|
||||
+- `gitlab_note_id` (when note-level data is returned)
|
||||
+This contract is required so agents can deterministically construct `glab api` write calls.
|
||||
```
|
||||
|
||||
2. **Normalize identifier naming now (break ambiguous names)**
|
||||
Analysis: Current `id`/`gitlab_id` naming is ambiguous in mixed payloads. Rename to explicit `note_id` and `gitlab_note_id` now (you explicitly don’t care about backward compatibility). This reduces automation mistakes.
|
||||
|
||||
```diff
|
||||
@@ 1b. Add field to `NoteListRow`
|
||||
-pub struct NoteListRow {
|
||||
- pub id: i64,
|
||||
- pub gitlab_id: i64,
|
||||
+pub struct NoteListRow {
|
||||
+ pub note_id: i64, // local DB id
|
||||
+ pub gitlab_note_id: i64, // GitLab note id
|
||||
@@
|
||||
@@ 1c. Add field to `NoteListRowJson`
|
||||
-pub struct NoteListRowJson {
|
||||
- pub id: i64,
|
||||
- pub gitlab_id: i64,
|
||||
+pub struct NoteListRowJson {
|
||||
+ pub note_id: i64,
|
||||
+ pub gitlab_note_id: i64,
|
||||
@@
|
||||
-#### 2f. Add `gitlab_note_id` to note detail structs in show
|
||||
-While we're here, add `gitlab_id` to `NoteDetail`, `MrNoteDetail`, and their JSON
|
||||
+#### 2f. Add `gitlab_note_id` to note detail structs in show
|
||||
+While we're here, add `gitlab_note_id` to `NoteDetail`, `MrNoteDetail`, and their JSON
|
||||
counterparts.
|
||||
```
|
||||
|
||||
3. **Stop positional column indexing for these changes**
|
||||
Analysis: In `list.rs`, row extraction is positional (`row.get(18)`, etc.). Adding fields is fragile and easy to break silently. Use named aliases and named lookup for robustness.
|
||||
|
||||
```diff
|
||||
@@ 1a/1b SQL + query_map
|
||||
- p.path_with_namespace AS project_path
|
||||
+ p.path_with_namespace AS project_path,
|
||||
+ d.gitlab_discussion_id AS gitlab_discussion_id
|
||||
@@
|
||||
- project_path: row.get(18)?,
|
||||
- gitlab_discussion_id: row.get(19)?,
|
||||
+ project_path: row.get("project_path")?,
|
||||
+ gitlab_discussion_id: row.get("gitlab_discussion_id")?,
|
||||
```
|
||||
|
||||
4. **Redesign `discussions` query to avoid correlated subquery fanout**
|
||||
Analysis: Proposed query uses many correlated subqueries per row. That’s acceptable for tiny MR-scoped sets, but degrades for project-wide scans. Use a base CTE + one rollup pass over notes.
|
||||
|
||||
```diff
|
||||
@@ 3c. SQL Query
|
||||
-SELECT
|
||||
- d.id,
|
||||
- ...
|
||||
- (SELECT COUNT(*) FROM notes n2 WHERE n2.discussion_id = d.id AND n2.is_system = 0) AS note_count,
|
||||
- (SELECT n3.author_username FROM notes n3 WHERE n3.discussion_id = d.id ORDER BY n3.position LIMIT 1) AS first_author,
|
||||
- ...
|
||||
-FROM discussions d
|
||||
+WITH base AS (
|
||||
+ SELECT d.id, d.gitlab_discussion_id, d.noteable_type, d.project_id, d.issue_id, d.merge_request_id,
|
||||
+ d.individual_note, d.first_note_at, d.last_note_at, d.resolvable, d.resolved
|
||||
+ FROM discussions d
|
||||
+ {where_sql}
|
||||
+),
|
||||
+note_rollup AS (
|
||||
+ SELECT n.discussion_id,
|
||||
+ COUNT(*) FILTER (WHERE n.is_system = 0) AS user_note_count,
|
||||
+ COUNT(*) AS total_note_count,
|
||||
+ MIN(CASE WHEN n.is_system = 0 THEN n.position END) AS first_user_pos
|
||||
+ FROM notes n
|
||||
+ JOIN base b ON b.id = n.discussion_id
|
||||
+ GROUP BY n.discussion_id
|
||||
+)
|
||||
+SELECT ...
|
||||
+FROM base b
|
||||
+LEFT JOIN note_rollup r ON r.discussion_id = b.id
|
||||
```
|
||||
|
||||
5. **Add explicit index work for new access patterns**
|
||||
Analysis: Existing indexes are good but not ideal for new list patterns (`project + last_note`, note position ordering inside discussion). Add migration entries to keep latency stable.
|
||||
|
||||
```diff
|
||||
@@ ## 3. Add Standalone `discussions` List Command
|
||||
+#### 3h. Add migration for discussion-list performance
|
||||
+**File**: `migrations/027_discussions_list_indexes.sql`
|
||||
+```sql
|
||||
+CREATE INDEX IF NOT EXISTS idx_discussions_project_last_note
|
||||
+ ON discussions(project_id, last_note_at DESC, id DESC);
|
||||
+CREATE INDEX IF NOT EXISTS idx_discussions_project_first_note
|
||||
+ ON discussions(project_id, first_note_at DESC, id DESC);
|
||||
+CREATE INDEX IF NOT EXISTS idx_notes_discussion_position
|
||||
+ ON notes(discussion_id, position);
|
||||
+```
|
||||
```
|
||||
|
||||
6. **Add keyset pagination (critical for agent workflows)**
|
||||
Analysis: `--limit` alone is not enough for automation over large datasets. Add cursor-based pagination with deterministic sort keys and `next_cursor` in JSON.
|
||||
|
||||
```diff
|
||||
@@ 3a. CLI Args
|
||||
+ /// Keyset cursor from previous response
|
||||
+ #[arg(long, help_heading = "Output")]
|
||||
+ pub cursor: Option<String>,
|
||||
@@
|
||||
@@ Response Schema
|
||||
- "total_count": 15,
|
||||
- "showing": 15
|
||||
+ "total_count": 15,
|
||||
+ "showing": 15,
|
||||
+ "next_cursor": "eyJsYXN0X25vdGVfYXQiOjE3MDAwMDAwMDAwMDAsImlkIjoxMjN9"
|
||||
@@
|
||||
@@ Validation Criteria
|
||||
+7. `lore -J discussions ... --cursor <token>` returns the next stable page without duplicates/skips
|
||||
```
|
||||
|
||||
7. **Fix semantic ambiguities in discussion summary fields**
|
||||
Analysis: `note_count` is ambiguous, and `first_author` can accidentally be a system note author. Make fields explicit and consistent with non-system default behavior.
|
||||
|
||||
```diff
|
||||
@@ Response Schema
|
||||
- "note_count": 3,
|
||||
- "first_author": "elovegrove",
|
||||
+ "user_note_count": 3,
|
||||
+ "total_note_count": 4,
|
||||
+ "first_user_author": "elovegrove",
|
||||
@@
|
||||
@@ 3d. Filters struct / path behavior
|
||||
-- `path` → `EXISTS (SELECT 1 FROM notes n WHERE n.discussion_id = d.id AND n.position_new_path LIKE ?)`
|
||||
+- `path` → match on BOTH `position_new_path` and `position_old_path` (exact/prefix)
|
||||
```
|
||||
|
||||
8. **Enrich show outputs with actionable thread metadata**
|
||||
Analysis: Adding only discussion id helps, but agents still need thread state and note ids to pick targets correctly. Add `resolvable`, `resolved`, `last_note_at_iso`, and `gitlab_note_id` in show discussion payloads.
|
||||
|
||||
```diff
|
||||
@@ 2a/2b show discussion structs
|
||||
pub struct DiscussionDetailJson {
|
||||
pub gitlab_discussion_id: String,
|
||||
+ pub resolvable: bool,
|
||||
+ pub resolved: bool,
|
||||
+ pub last_note_at_iso: String,
|
||||
pub notes: Vec<NoteDetailJson>,
|
||||
@@
|
||||
pub struct NoteDetailJson {
|
||||
+ pub gitlab_note_id: i64,
|
||||
pub author_username: String,
|
||||
```
|
||||
|
||||
9. **Harden robot-docs against schema drift with tests**
|
||||
Analysis: Static JSON in `main.rs` will drift again. Add a lightweight contract test that asserts docs include required fields for `notes`, `discussions`, and show payloads.
|
||||
|
||||
```diff
|
||||
@@ 4. Fix Robot-Docs Response Schemas
|
||||
+#### 4f. Add robot-docs contract tests
|
||||
+**File**: `src/main.rs` (or dedicated test module)
|
||||
+- Assert `robot-docs` contains `gitlab_discussion_id` and `gitlab_note_id` in:
|
||||
+ - `notes.response_schema`
|
||||
+ - `issues.response_schema.show`
|
||||
+ - `mrs.response_schema.show`
|
||||
+ - `discussions.response_schema`
|
||||
```
|
||||
|
||||
10. **Adjust delivery order to reduce rework and include missing CSV path**
|
||||
Analysis: In your sample `handle_discussions`, `csv` is declared in args but not handled. Also, robot-docs should land after all payload changes. Sequence should minimize churn.
|
||||
|
||||
```diff
|
||||
@@ Delivery Order
|
||||
-3. **Change 4** (robot-docs) — depends on 1 and 2 being done so schemas are accurate.
|
||||
-4. **Change 3** (discussions command) — largest change, depends on 1 for design consistency.
|
||||
+3. **Change 3** (discussions command + indexes + pagination) — largest change.
|
||||
+4. **Change 4** (robot-docs + contract tests) — last, after payloads are final.
|
||||
@@ 3e. Handler wiring
|
||||
- match format {
|
||||
+ match format {
|
||||
"json" => ...
|
||||
"jsonl" => ...
|
||||
+ "csv" => print_list_discussions_csv(&result),
|
||||
_ => ...
|
||||
}
|
||||
```
|
||||
|
||||
If you want, I can produce a single consolidated revised plan markdown with these edits applied so you can drop it in directly.
|
||||
162
docs/plan-expose-discussion-ids.feedback-2.md
Normal file
162
docs/plan-expose-discussion-ids.feedback-2.md
Normal file
@@ -0,0 +1,162 @@
|
||||
Best non-rejected upgrades I’d make to this plan are below. They focus on reducing schema drift, making robot output safer to consume, and improving performance behavior at scale.
|
||||
|
||||
1. Add a shared contract model and field constants first (before workstreams 1-4)
|
||||
Rationale: Right now each command has its own structs and ad-hoc mapping. That is exactly how drift happens. A single contract definition reused by `notes`, `show`, `discussions`, and robot-docs gives compile-time coupling between output payloads and docs. It also makes future fields cheaper and safer to add.
|
||||
|
||||
```diff
|
||||
@@ Scope: Four workstreams, delivered in order:
|
||||
-1. Add `gitlab_discussion_id` to notes output
|
||||
-2. Add `gitlab_discussion_id` to show command discussion groups
|
||||
-3. Add a standalone `discussions` list command
|
||||
-4. Fix robot-docs to list actual field names instead of opaque type references
|
||||
+0. Introduce shared Bridge Contract model/constants used by notes/show/discussions/robot-docs
|
||||
+1. Add `gitlab_discussion_id` to notes output
|
||||
+2. Add `gitlab_discussion_id` to show command discussion groups
|
||||
+3. Add a standalone `discussions` list command
|
||||
+4. Fix robot-docs to list actual field names instead of opaque type references
|
||||
|
||||
+## 0. Shared Contract Model (Cross-Cutting)
|
||||
+Define canonical required-field constants and shared mapping helpers, then consume them in:
|
||||
+- `src/cli/commands/list.rs`
|
||||
+- `src/cli/commands/show.rs`
|
||||
+- `src/cli/robot.rs`
|
||||
+- `src/main.rs` robot-docs builder
|
||||
+This removes duplicated field-name strings and prevents docs/output mismatch.
|
||||
```
|
||||
|
||||
2. Make bridge fields “non-droppable” in robot mode
|
||||
Rationale: The current plan adds fields, but `--fields` can still remove them. That breaks the core read/write bridge contract in exactly the workflows this change is trying to fix. In robot mode, contract fields should always be force-included.
|
||||
|
||||
```diff
|
||||
@@ ## Bridge Contract (Cross-Cutting)
|
||||
Every read payload that surfaces notes or discussions **MUST** include:
|
||||
- `project_path`
|
||||
- `noteable_type`
|
||||
- `parent_iid`
|
||||
- `gitlab_discussion_id`
|
||||
- `gitlab_note_id` (when note-level data is returned — i.e., in notes list and show detail)
|
||||
|
||||
+### Field Filtering Guardrail
|
||||
+In robot mode, `filter_fields` must force-include Bridge Contract fields even when users pass a narrower `--fields` list.
|
||||
+Human/table mode keeps existing behavior.
|
||||
```
|
||||
|
||||
3. Replace correlated subqueries in `discussions` rollup with a single-pass window/aggregate pattern
|
||||
Rationale: Your CTE is better than naive fanout, but it still uses multiple correlated sub-selects per discussion for first author/body/path. At 200K+ discussions this can regress badly depending on cache/index state. A window-ranked `notes` CTE with grouped aggregates is usually faster and more predictable in SQLite.
|
||||
|
||||
```diff
|
||||
@@ #### 3c. SQL Query
|
||||
-Core query uses a CTE + rollup to avoid correlated subquery fanout on larger result sets:
|
||||
+Core query uses a CTE + ranked-notes rollup (window function) to avoid per-row correlated subqueries:
|
||||
|
||||
-WITH filtered_discussions AS (...),
|
||||
-note_rollup AS (
|
||||
- SELECT
|
||||
- n.discussion_id,
|
||||
- SUM(...) AS note_count,
|
||||
- (SELECT ... LIMIT 1) AS first_author,
|
||||
- (SELECT ... LIMIT 1) AS first_note_body,
|
||||
- (SELECT ... LIMIT 1) AS position_new_path,
|
||||
- (SELECT ... LIMIT 1) AS position_new_line
|
||||
- FROM notes n
|
||||
- ...
|
||||
-)
|
||||
+WITH filtered_discussions AS (...),
|
||||
+ranked_notes AS (
|
||||
+ SELECT
|
||||
+ n.*,
|
||||
+ ROW_NUMBER() OVER (PARTITION BY n.discussion_id ORDER BY n.position, n.id) AS rn
|
||||
+ FROM notes n
|
||||
+ WHERE n.discussion_id IN (SELECT id FROM filtered_discussions)
|
||||
+),
|
||||
+note_rollup AS (
|
||||
+ SELECT
|
||||
+ discussion_id,
|
||||
+ SUM(CASE WHEN is_system = 0 THEN 1 ELSE 0 END) AS note_count,
|
||||
+ MAX(CASE WHEN rn = 1 AND is_system = 0 THEN author_username END) AS first_author,
|
||||
+ MAX(CASE WHEN rn = 1 AND is_system = 0 THEN body END) AS first_note_body,
|
||||
+ MAX(CASE WHEN position_new_path IS NOT NULL THEN position_new_path END) AS position_new_path,
|
||||
+ MAX(CASE WHEN position_new_line IS NOT NULL THEN position_new_line END) AS position_new_line
|
||||
+ FROM ranked_notes
|
||||
+ GROUP BY discussion_id
|
||||
+)
|
||||
```
|
||||
|
||||
4. Add direct GitLab ID filters for deterministic bridging
|
||||
Rationale: Bridge workflows often start from one known ID. You already have `gitlab_note_id` in notes filters, but discussion filtering still looks internal-ID-centric. Add explicit GitLab-ID filters so agents do not need extra translation calls.
|
||||
|
||||
```diff
|
||||
@@ #### 3a. CLI Args
|
||||
pub struct DiscussionsArgs {
|
||||
+ /// Filter by GitLab discussion ID
|
||||
+ #[arg(long, help_heading = "Filters")]
|
||||
+ pub gitlab_discussion_id: Option<String>,
|
||||
@@
|
||||
|
||||
@@ #### 3d. Filters struct
|
||||
pub struct DiscussionListFilters {
|
||||
+ pub gitlab_discussion_id: Option<String>,
|
||||
@@
|
||||
}
|
||||
```
|
||||
|
||||
```diff
|
||||
@@ ## 1. Add `gitlab_discussion_id` to Notes Output
|
||||
+#### 1g. Add `--gitlab-discussion-id` filter to notes
|
||||
+Allow filtering notes directly by GitLab thread ID (not only internal discussion ID).
|
||||
+This enables one-hop note retrieval from external references.
|
||||
```
|
||||
|
||||
5. Add optional note expansion to `discussions` for fewer round-trips
|
||||
Rationale: Today the agent flow is often `discussions -> show`. Optional embedded notes (`--include-notes N`) gives a fast path for “list unresolved threads with latest context” without forcing full show payloads.
|
||||
|
||||
```diff
|
||||
@@ ### Design
|
||||
lore -J discussions --for-mr 99 --resolution unresolved
|
||||
+lore -J discussions --for-mr 99 --resolution unresolved --include-notes 2
|
||||
|
||||
@@ #### 3a. CLI Args
|
||||
+ /// Include up to N latest notes per discussion (0 = none)
|
||||
+ #[arg(long, default_value = "0", help_heading = "Output")]
|
||||
+ pub include_notes: usize,
|
||||
```
|
||||
|
||||
6. Upgrade robot-docs from string blobs to structured schema + explicit contract block
|
||||
Rationale: `contains("gitlab_discussion_id")` tests on schema strings are brittle. A structured schema object gives machine-checked docs and reliable test assertions. Add a contract section for agent consumers.
|
||||
|
||||
```diff
|
||||
@@ ## 4. Fix Robot-Docs Response Schemas
|
||||
-#### 4a. Notes response_schema
|
||||
-Replace stringly-typed schema snippets...
|
||||
+#### 4a. Notes response_schema (structured)
|
||||
+Represent response fields as JSON objects (field -> type/nullable), not freeform strings.
|
||||
|
||||
+#### 4g. Add `bridge_contract` section in robot-docs
|
||||
+Publish canonical required fields per entity:
|
||||
+- notes
|
||||
+- discussions
|
||||
+- show.discussions
|
||||
+- show.notes
|
||||
```
|
||||
|
||||
7. Strengthen validation: add CLI-level contract tests and perf guardrails
|
||||
Rationale: Most current tests are unit-level struct/query checks. Add end-to-end JSON contract tests via command handlers, plus a benchmark-style regression test (ignored by default) so performance work stays intentional.
|
||||
|
||||
```diff
|
||||
@@ ## Validation Criteria
|
||||
8. Bridge Contract fields (...) are present in every applicable read payload
|
||||
+9. Contract fields remain present even with `--fields` in robot mode
|
||||
+10. `discussions` query meets performance guardrail on representative fixture (documented threshold)
|
||||
|
||||
@@ ### Tests
|
||||
+#### Test: robot-mode fields cannot drop bridge contract keys
|
||||
+Run notes/discussions JSON output through `filter_fields` path and assert required keys remain.
|
||||
+
|
||||
+#### Test: CLI contract integration
|
||||
+Invoke command handlers for `notes`, `discussions`, `mrs <iid>`, parse JSON, assert required keys and types.
|
||||
+
|
||||
+#### Test (ignored): large-fixture performance regression
|
||||
+Generate representative fixture and assert `query_discussions` stays under target elapsed time.
|
||||
```
|
||||
|
||||
If you want, I can now produce a full “v2 plan” document that applies these diffs end-to-end (including revised delivery order and complete updated sections).
|
||||
160
docs/plan-expose-discussion-ids.feedback-4.md.bak
Normal file
160
docs/plan-expose-discussion-ids.feedback-4.md.bak
Normal file
@@ -0,0 +1,160 @@
|
||||
I reviewed the plan end-to-end and focused only on new improvements (none of the items in `## Rejected Recommendations` are re-proposed).
|
||||
|
||||
1. Add direct `--discussion-id` retrieval paths
|
||||
Rationale: This removes a full discovery hop for the exact workflow that failed (replying to a known thread). It also reduces ambiguity and query cost when an agent already has the thread ID.
|
||||
|
||||
```diff
|
||||
@@ Core Changes
|
||||
| 7 | Fix robot-docs to list actual field names | Docs | Small |
|
||||
+| 8 | Add direct `--discussion-id` filter to notes/discussions/show | Core | Small |
|
||||
|
||||
@@ Change 3: Add Standalone `discussions` List Command
|
||||
lore -J discussions --for-mr 99 --cursor <token> # keyset pagination
|
||||
+lore -J discussions --discussion-id 6a9c1750b37d... # direct lookup
|
||||
|
||||
@@ 3a. CLI Args
|
||||
+ #[arg(long, conflicts_with_all = ["for_issue", "for_mr"], help_heading = "Filters")]
|
||||
+ pub discussion_id: Option<String>,
|
||||
|
||||
@@ Change 1: Add `gitlab_discussion_id` to Notes Output
|
||||
+Add `--discussion-id <hex>` filter to `notes` for direct note retrieval within one thread.
|
||||
```
|
||||
|
||||
2. Add a shared filter compiler to eliminate count/query drift
|
||||
Rationale: The plan currently repeats filters across data query, `total_count`, and `incomplete_rows` count queries. That is a classic reliability bug source. A single compiled filter object makes count semantics provably consistent.
|
||||
|
||||
```diff
|
||||
@@ Count Semantics (Cross-Cutting Convention)
|
||||
+## Filter Compiler (NEW, Cross-Cutting Convention)
|
||||
+All list commands must build predicates via a shared `CompiledFilters` object that emits:
|
||||
+- SQL predicate fragment
|
||||
+- bind parameters
|
||||
+- canonical filter string (for cursor hash)
|
||||
+The same compiled object is reused by:
|
||||
+- page data query
|
||||
+- `total_count` query
|
||||
+- `incomplete_rows` query
|
||||
```
|
||||
|
||||
3. Harden keyset pagination semantics for `DESC`, limits, and client ergonomics
|
||||
Rationale: `(sort_value, id) > (?, ?)` is only correct for ascending order. Descending sort needs `<`. Also add explicit `has_more` so clients don’t infer from cursor nullability.
|
||||
|
||||
```diff
|
||||
@@ Keyset Pagination (Cross-Cutting, Change B)
|
||||
-```sql
|
||||
-WHERE (sort_value, id) > (?, ?)
|
||||
-```
|
||||
+Use comparator by order:
|
||||
+- ASC: `(sort_value, id) > (?, ?)`
|
||||
+- DESC: `(sort_value, id) < (?, ?)`
|
||||
|
||||
@@ 3a. CLI Args
|
||||
+ #[arg(short = 'n', long = "limit", default_value = "50", value_parser = clap::value_parser!(usize).range(1..=500), help_heading = "Output")]
|
||||
+ pub limit: usize,
|
||||
|
||||
@@ Response Schema
|
||||
- "next_cursor": "aW...xyz=="
|
||||
+ "next_cursor": "aW...xyz==",
|
||||
+ "has_more": true
|
||||
```
|
||||
|
||||
4. Add DB-level entity integrity invariants (not just response invariants)
|
||||
Rationale: Response-side filtering is good, but DB correctness should also be guarded. This prevents silent corruption and bad joins from ingestion or future migrations.
|
||||
|
||||
```diff
|
||||
@@ Contract Invariants (NEW)
|
||||
+### Entity Integrity Invariants (DB + Ingest)
|
||||
+1. `discussions` must belong to exactly one parent (`issue_id XOR merge_request_id`).
|
||||
+2. `discussions.noteable_type` must match the populated parent column.
|
||||
+3. Natural-key uniqueness is enforced where valid:
|
||||
+ - `(project_id, gitlab_discussion_id)` unique for discussions.
|
||||
+4. Ingestion must reject/quarantine rows violating invariants and report counts.
|
||||
|
||||
@@ Supporting Indexes (Cross-Cutting, Change D)
|
||||
+CREATE UNIQUE INDEX IF NOT EXISTS idx_discussions_project_gitlab_discussion_id
|
||||
+ ON discussions(project_id, gitlab_discussion_id);
|
||||
```
|
||||
|
||||
5. Switch bulk note loading to streaming grouping (avoid large intermediate vecs)
|
||||
Rationale: Current bulk strategy still materializes all notes before grouping. Streaming into the map cuts peak memory and improves large-MR stability.
|
||||
|
||||
```diff
|
||||
@@ Change 2e. Constructor — use bulk notes map
|
||||
-let all_note_rows: Vec<MrNoteDetail> = ... // From bulk query above
|
||||
-let notes_by_discussion: HashMap<i64, Vec<MrNoteDetail>> =
|
||||
- all_note_rows.into_iter().fold(HashMap::new(), |mut map, note| {
|
||||
- map.entry(note.discussion_id).or_insert_with(Vec::new).push(note);
|
||||
- map
|
||||
- });
|
||||
+let mut notes_by_discussion: HashMap<i64, Vec<MrNoteDetail>> = HashMap::new();
|
||||
+for row in bulk_note_stmt.query_map(params, map_note_row)? {
|
||||
+ let note = row?;
|
||||
+ notes_by_discussion.entry(note.discussion_id).or_default().push(note);
|
||||
+}
|
||||
```
|
||||
|
||||
6. Make freshness tri-state (`fresh|stale|unknown`) and fail closed on unknown with `--require-fresh`
|
||||
Rationale: `stale: bool` alone cannot represent “never synced / unknown project freshness.” For write safety, unknown freshness should be explicit and reject under freshness constraints.
|
||||
|
||||
```diff
|
||||
@@ Freshness Metadata & Staleness Guards
|
||||
pub struct ResponseMeta {
|
||||
pub elapsed_ms: i64,
|
||||
pub data_as_of_iso: String,
|
||||
pub sync_lag_seconds: i64,
|
||||
pub stale: bool,
|
||||
+ pub freshness_state: String, // "fresh" | "stale" | "unknown"
|
||||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||||
+ pub freshness_reason: Option<String>,
|
||||
pub incomplete_rows: i64,
|
||||
@@
|
||||
-if sync_lag_seconds > max_age_secs {
|
||||
+if freshness_state == "unknown" || sync_lag_seconds > max_age_secs {
|
||||
```
|
||||
|
||||
7. Tune indexes to match actual ORDER BY paths in window queries
|
||||
Rationale: `idx_notes_discussion_position` is likely insufficient for the two window orderings. A covering-style index aligned with partition/order keys reduces random table lookups.
|
||||
|
||||
```diff
|
||||
@@ Supporting Indexes (Cross-Cutting, Change D)
|
||||
--- Notes: window function ORDER BY (discussion_id, position) for ROW_NUMBER()
|
||||
-CREATE INDEX IF NOT EXISTS idx_notes_discussion_position
|
||||
- ON notes(discussion_id, position);
|
||||
+-- Notes: support dual ROW_NUMBER() orderings and reduce table lookups
|
||||
+CREATE INDEX IF NOT EXISTS idx_notes_discussion_window
|
||||
+ ON notes(discussion_id, is_system, position, created_at, gitlab_id);
|
||||
```
|
||||
|
||||
8. Add a phased rollout gate before strict exclusion becomes default
|
||||
Rationale: Enforcing `gitlab_* IS NOT NULL` immediately can hide data if existing rows are incomplete. A short observation gate prevents sudden regressions while preserving the end-state contract.
|
||||
|
||||
```diff
|
||||
@@ Delivery Order
|
||||
+Batch 0: Observability gate (NEW)
|
||||
+- Ship `incomplete_rows` and freshness meta first
|
||||
+- Measure incomplete rate across real datasets
|
||||
+- If incomplete ratio <= threshold, enable strict exclusion defaults
|
||||
+- If above threshold, block rollout and fix ingestion quality first
|
||||
+
|
||||
Change 1 (notes output) ──┐
|
||||
```
|
||||
|
||||
9. Add property-based invariants for pagination/count correctness
|
||||
Rationale: Your current tests are scenario-based and good, but randomized property tests are much better at catching edge-case cursor/count bugs.
|
||||
|
||||
```diff
|
||||
@@ Tests (Change 3 / Change B)
|
||||
+**Test 12**: Property-based pagination invariants (`proptest`)
|
||||
+```rust
|
||||
+#[test]
|
||||
+fn prop_discussion_cursor_no_overlap_no_gap_under_random_data() { /* ... */ }
|
||||
+```
|
||||
+
|
||||
+**Test 13**: Property-based count invariants
|
||||
+```rust
|
||||
+#[test]
|
||||
+fn prop_total_count_and_incomplete_rows_match_filter_partition() { /* ... */ }
|
||||
+```
|
||||
```
|
||||
|
||||
If you want, I can now produce a fully consolidated “Plan v4” that applies these diffs cleanly into your original document so it reads as a single coherent spec.
|
||||
158
docs/plan-expose-discussion-ids.feedback-5.md.bak
Normal file
158
docs/plan-expose-discussion-ids.feedback-5.md.bak
Normal file
@@ -0,0 +1,158 @@
|
||||
I reviewed the whole plan and only proposed changes that are not in your `## Rejected Recommendations`.
|
||||
|
||||
1. **Fix plan-internal inconsistencies first**
|
||||
Analysis: The plan currently has a few self-contradictions (`8` vs `9` cross-cutting improvements, `stale` still referenced after moving to tri-state freshness). Cleaning this prevents implementation drift and bad AC validation.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@
|
||||
-**Scope**: 8 core changes + 8 cross-cutting architectural improvements across 3 tiers:
|
||||
+**Scope**: 8 core changes + 9 cross-cutting architectural improvements across 3 tiers:
|
||||
@@ AC-7: Freshness Metadata Present & Staleness Guards Work
|
||||
-lore -J notes -n 1 | jq '.meta | {data_as_of_iso, sync_lag_seconds, stale}'
|
||||
-# All fields present, stale=false if recently synced
|
||||
+lore -J notes -n 1 | jq '.meta | {data_as_of_iso, sync_lag_seconds, freshness_state}'
|
||||
+# All fields present, freshness_state is one of fresh|stale|unknown
|
||||
@@ Change 6 Response Schema example
|
||||
- "stale": false,
|
||||
+ "freshness_state": "fresh",
|
||||
```
|
||||
|
||||
2. **Require snapshot-consistent list responses (page + counts)**
|
||||
Analysis: `total_count`, `incomplete_rows`, and page rows can drift if sync writes between queries. Enforcing a single read snapshot for all list commands makes pagination and counts deterministic.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ Count Semantics (Cross-Cutting Convention)
|
||||
All list commands use consistent count fields:
|
||||
+All three queries (`page`, `total_count`, `incomplete_rows`) MUST execute inside one read transaction/snapshot.
|
||||
+This guarantees count/page consistency under concurrent sync writes.
|
||||
```
|
||||
|
||||
3. **Use RAII transactions instead of manual `BEGIN/COMMIT`**
|
||||
Analysis: Manual `execute_batch("BEGIN...")` is fragile on early returns. `rusqlite::Transaction` guarantees rollback on error and removes transaction-leak risk.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ Change 2: Consistency guarantee
|
||||
-conn.execute_batch("BEGIN DEFERRED")?;
|
||||
-// ... discussion query ...
|
||||
-// ... bulk note query ...
|
||||
-conn.execute_batch("COMMIT")?;
|
||||
+let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Deferred)?;
|
||||
+// ... discussion query ...
|
||||
+// ... bulk note query ...
|
||||
+tx.commit()?;
|
||||
```
|
||||
|
||||
4. **Allow small focused new modules for query infrastructure**
|
||||
Analysis: Keeping everything in `list.rs`/`show.rs` will become a maintenance hotspot as filters/cursors/freshness expand. A small module split reduces coupling and regression risk.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ Change 3: File Architecture
|
||||
-**No new files.** Follow existing patterns:
|
||||
+Allow focused infra modules for shared logic:
|
||||
+- `src/cli/query/filters.rs` (CompiledFilters + builders)
|
||||
+- `src/cli/query/cursor.rs` (encode/decode/validate v2 cursors)
|
||||
+- `src/cli/query/freshness.rs` (freshness computation + guards)
|
||||
+Command handlers remain in existing files.
|
||||
```
|
||||
|
||||
5. **Add ingest-time `discussion_rollups` to avoid repeated heavy window scans**
|
||||
Analysis: Window functions are good, but doing them on every read over large note volumes is still expensive. Precomputing rollups during ingest gives lower and more predictable p95 latency while keeping read paths simpler.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ Architectural Improvements (Cross-Cutting)
|
||||
+| J | Ingest-time discussion rollups (`discussion_rollups`) | Performance | Medium |
|
||||
@@ Change 3 SQL strategy
|
||||
-Use `ROW_NUMBER()` window function instead of correlated subqueries...
|
||||
+Primary path: join precomputed `discussion_rollups` for `note_count`, `first_author`,
|
||||
+`first_note_body`, `position_new_path`, `position_new_line`.
|
||||
+Fallback path: window-function recompute if rollup row is missing (defensive correctness).
|
||||
```
|
||||
|
||||
6. **Add deterministic numeric project selector `--project-id`**
|
||||
Analysis: `-p group/repo` is human-friendly, but numeric project IDs are safer for robots and avoid fuzzy/project-path ambiguity. This reduces false ambiguity failures and lookup overhead.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ DiscussionsArgs
|
||||
#[arg(short = 'p', long, help_heading = "Filters")]
|
||||
pub project: Option<String>,
|
||||
+ #[arg(long, conflicts_with = "project", help_heading = "Filters")]
|
||||
+ pub project_id: Option<i64>,
|
||||
@@ Ambiguity handling
|
||||
+If `--project-id` is provided, IID resolution is scoped directly to that project.
|
||||
+`--project-id` takes precedence over path-based project matching.
|
||||
```
|
||||
|
||||
7. **Make path filtering rename-aware (`old` + `new`)**
|
||||
Analysis: Current `--path` strategy only using `position_new_path` misses deleted/renamed-file discussions. Supporting side selection makes the feature materially more useful for review workflows.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ DiscussionsArgs
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub path: Option<String>,
|
||||
+ #[arg(long, value_parser = ["either", "new", "old"], default_value = "either", help_heading = "Filters")]
|
||||
+ pub path_side: String,
|
||||
@@ Change 3 filtering
|
||||
-Path filter matches `position_new_path`.
|
||||
+Path filter semantics:
|
||||
+- `either` (default): match `position_new_path` OR `position_old_path`
|
||||
+- `new`: match only `position_new_path`
|
||||
+- `old`: match only `position_old_path`
|
||||
```
|
||||
|
||||
8. **Add explicit freshness behavior for empty-result queries + bootstrap backfill**
|
||||
Analysis: Freshness based only on “participating rows” is undefined when results are empty. Define deterministic behavior and backfill `project_sync_state` on migration so `unknown` doesn’t spike unexpectedly after deploy.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ Freshness state logic
|
||||
+Empty-result rules:
|
||||
+- If query is project-scoped (`-p` or `--project-id`), freshness is computed from that project even when no rows match.
|
||||
+- If query is unscoped and returns zero rows, freshness is computed from all tracked projects.
|
||||
@@ A1. Track per-project sync timestamp
|
||||
+Migration step: seed `project_sync_state` from latest known sync metadata where available
|
||||
+to avoid mass `unknown` freshness immediately after rollout.
|
||||
```
|
||||
|
||||
9. **Upgrade `--discussion-id` from filter-only to first-class thread retrieval**
|
||||
Analysis: Filtering list output by discussion ID still returns list-shaped data and partial note context. A direct thread retrieval mode is faster for agent workflows and avoids extra commands.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ Core Changes
|
||||
-| 8 | Add direct `--discussion-id` filter to notes/discussions/show | Core | Small |
|
||||
+| 8 | Add direct `--discussion-id` filter + single-thread retrieval mode | Core | Medium |
|
||||
@@ Change 8
|
||||
+lore -J discussions --discussion-id <id> --full-thread
|
||||
+# Returns one discussion with full notes payload (same note schema as show command).
|
||||
```
|
||||
|
||||
10. **Replace ad-hoc AC performance timing with repeatable perf harness**
|
||||
Analysis: `time lore ...` is noisy and machine-dependent. A reproducible seeded benchmark test gives stable guardrails and catches regressions earlier.
|
||||
|
||||
```diff
|
||||
--- a/plan.md
|
||||
+++ b/plan.md
|
||||
@@ AC-10: Performance Budget
|
||||
-time lore -J discussions --for-mr <iid> -n 100
|
||||
-# real 0m0.100s (p95 < 150ms)
|
||||
+cargo test --test perf_discussions -- --ignored --nocapture
|
||||
+# Uses seeded fixture DB and N repeated runs; asserts p95 < 150ms for target query shape.
|
||||
```
|
||||
|
||||
If you want, I can also produce a fully merged “iteration 5” rewritten plan document with these edits applied end-to-end so it’s directly executable by an implementation agent.
|
||||
143
docs/plan-expose-discussion-ids.feedback-6.md.bak
Normal file
143
docs/plan-expose-discussion-ids.feedback-6.md.bak
Normal file
@@ -0,0 +1,143 @@
|
||||
Strong plan overall. The biggest gaps I’d fix are around sync-health correctness, idempotency/integrity under repeated ingests, deleted-entity lifecycle, and reducing schema drift risk without heavy reflection machinery.
|
||||
|
||||
I avoided everything in your `## Rejected Recommendations` section.
|
||||
|
||||
**1. Add Sync Health Semantics (not just age)**
|
||||
Time freshness alone can mislead after partial/failed syncs. Agents need to know whether data is both recent and complete.
|
||||
|
||||
```diff
|
||||
@@ ## Freshness Metadata & Staleness Guards (Cross-Cutting, Change A/F/G)
|
||||
- pub freshness_state: String, // "fresh" | "stale" | "unknown"
|
||||
+ pub freshness_state: String, // "fresh" | "stale" | "unknown"
|
||||
+ pub sync_status: String, // "ok" | "partial" | "failed" | "never"
|
||||
+ pub last_successful_sync_run_id: Option<i64>,
|
||||
+ pub last_attempted_sync_run_id: Option<i64>,
|
||||
@@
|
||||
-#[arg(long, help_heading = "Freshness")]
|
||||
-pub require_fresh: Option<String>,
|
||||
+#[arg(long, help_heading = "Freshness")]
|
||||
+pub require_fresh: Option<String>,
|
||||
+#[arg(long, help_heading = "Freshness")]
|
||||
+pub require_sync_ok: bool,
|
||||
```
|
||||
|
||||
Rationale: this prevents false confidence when one project is fresh-by-time but latest sync actually failed or was partial.
|
||||
|
||||
---
|
||||
|
||||
**2. Add `--require-complete` Guard for Missing Required IDs**
|
||||
You already expose `meta.incomplete_rows`; add a hard gate for automation.
|
||||
|
||||
```diff
|
||||
@@ ## Count Semantics (Cross-Cutting Convention)
|
||||
`incomplete_rows` is computed via a dedicated COUNT query...
|
||||
+Add CLI guard:
|
||||
+`--require-complete` fails with exit code 19 when `meta.incomplete_rows > 0`.
|
||||
+Suggested action: `lore sync --full`.
|
||||
```
|
||||
|
||||
Rationale: agents can fail fast instead of silently acting on partial datasets.
|
||||
|
||||
---
|
||||
|
||||
**3. Strengthen Ingestion Idempotency + Referential Integrity for Notes**
|
||||
You added natural-key uniqueness for discussions; do the same for notes and enforce parent integrity at DB level.
|
||||
|
||||
```diff
|
||||
@@ ## Supporting Indexes (Cross-Cutting, Change D)
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_discussions_project_gitlab_discussion_id
|
||||
ON discussions(project_id, gitlab_discussion_id);
|
||||
+CREATE UNIQUE INDEX IF NOT EXISTS idx_notes_project_gitlab_id
|
||||
+ ON notes(project_id, gitlab_id);
|
||||
+
|
||||
+-- Referential integrity
|
||||
+-- notes.discussion_id REFERENCES discussions(id)
|
||||
+-- notes.project_id REFERENCES projects(id)
|
||||
```
|
||||
|
||||
Rationale: repeated syncs and retries won’t duplicate notes, and orphaned rows can’t accumulate.
|
||||
|
||||
---
|
||||
|
||||
**4. Add Deleted/Tombstoned Entity Lifecycle**
|
||||
Current plan excludes null IDs but doesn’t define behavior when GitLab entities are deleted after sync.
|
||||
|
||||
```diff
|
||||
@@ ## Contract Invariants (NEW)
|
||||
+### Deletion Lifecycle Invariant
|
||||
+1. Notes/discussions deleted upstream are tombstoned locally (`deleted_at`), not hard-deleted.
|
||||
+2. All list/show commands exclude tombstoned rows by default.
|
||||
+3. Optional flag `--include-deleted` exposes tombstoned rows for audit/debug.
|
||||
```
|
||||
|
||||
Rationale: preserves auditability, prevents ghost actions on deleted objects, and avoids destructive resync behavior.
|
||||
|
||||
---
|
||||
|
||||
**5. Expand Discussions Payload for Rename Accuracy + Better Triage**
|
||||
`--path-side old` is great, but output currently only returns `position_new_*`.
|
||||
|
||||
```diff
|
||||
@@ ## Change 3: Add Standalone `discussions` List Command
|
||||
pub position_new_path: Option<String>,
|
||||
pub position_new_line: Option<i64>,
|
||||
+ pub position_old_path: Option<String>,
|
||||
+ pub position_old_line: Option<i64>,
|
||||
+ pub last_author: Option<String>,
|
||||
+ pub participant_usernames: Vec<String>,
|
||||
```
|
||||
|
||||
Rationale: for renamed/deleted files, agents need old and new coordinates to act confidently; participants/last_author improve thread routing and prioritization.
|
||||
|
||||
---
|
||||
|
||||
**6. Add SQLite Busy Handling + Retry Policy**
|
||||
Read transactions + concurrent sync writes can still produce `SQLITE_BUSY` under load.
|
||||
|
||||
```diff
|
||||
@@ ## Count Semantics (Cross-Cutting Convention)
|
||||
**Snapshot consistency**: All three queries ... inside a single read transaction ...
|
||||
+**Busy handling**: set `PRAGMA busy_timeout` (e.g. 5000ms) and retry transient
|
||||
+`SQLITE_BUSY` errors up to 3 times with jittered backoff for read commands.
|
||||
```
|
||||
|
||||
Rationale: improves reliability in real multi-agent usage without changing semantics.
|
||||
|
||||
---
|
||||
|
||||
**7. Make Field Definitions Single-Source (Lightweight Drift Prevention)**
|
||||
You rejected full schema generation from code; a lower-cost middle ground is shared field manifests used by both docs and `--fields` validation.
|
||||
|
||||
```diff
|
||||
@@ ## Change 7: Fix Robot-Docs Response Schemas
|
||||
+#### 7h. Single-source field manifests (no reflection)
|
||||
+Define per-command field constants (e.g. `NOTES_FIELDS`, `DISCUSSIONS_FIELDS`)
|
||||
+used by:
|
||||
+1) `--fields` validation/filtering
|
||||
+2) `--fields minimal` expansion
|
||||
+3) `robot-docs` schema rendering
|
||||
```
|
||||
|
||||
Rationale: cuts drift risk materially while staying much simpler than reflection/snapshot infra.
|
||||
|
||||
---
|
||||
|
||||
**8. De-duplicate and Upgrade Test Strategy Around Concurrency**
|
||||
There are duplicated tests across Change 2 and Change 3; add explicit race tests where sync writes happen between list subqueries to prove tx consistency.
|
||||
|
||||
```diff
|
||||
@@ ## Tests
|
||||
-**Test 6**: `--project-id` scopes IID resolution directly
|
||||
-**Test 7**: `--path-side old` matches renamed file discussions
|
||||
-**Test 8**: `--path-side either` matches both old and new paths
|
||||
+Move shared discussion-filter tests to a single section under Change 3.
|
||||
+Add concurrency tests:
|
||||
+1) count/page/incomplete consistency under concurrent sync writes
|
||||
+2) show discussion+notes snapshot consistency under concurrent writes
|
||||
```
|
||||
|
||||
Rationale: less maintenance noise, better coverage of your highest-risk correctness path.
|
||||
|
||||
---
|
||||
|
||||
If you want, I can also produce a single consolidated patch block that rewrites your plan text end-to-end with these edits applied in-place.
|
||||
@@ -1,3 +1,15 @@
|
||||
---
|
||||
plan: true
|
||||
title: ""
|
||||
status: iterating
|
||||
iteration: 2
|
||||
target_iterations: 8
|
||||
beads_revision: 0
|
||||
related_plans: []
|
||||
created: 2026-02-17
|
||||
updated: 2026-02-17
|
||||
---
|
||||
|
||||
# Plan: Expose Discussion IDs Across the Read Surface
|
||||
|
||||
**Problem**: Agents can't bridge from lore's read output to glab's write API because
|
||||
@@ -5,7 +17,7 @@
|
||||
split contract requires lore to emit every identifier an agent needs to construct a glab write
|
||||
command.
|
||||
|
||||
**Scope**: Three changes, delivered in order:
|
||||
**Scope**: Four workstreams, delivered in order:
|
||||
1. Add `gitlab_discussion_id` to notes output
|
||||
2. Add `gitlab_discussion_id` to show command discussion groups
|
||||
3. Add a standalone `discussions` list command
|
||||
@@ -13,6 +25,47 @@ command.
|
||||
|
||||
---
|
||||
|
||||
## Bridge Contract (Cross-Cutting)
|
||||
|
||||
Every read payload that surfaces notes or discussions **MUST** include:
|
||||
- `project_path`
|
||||
- `noteable_type`
|
||||
- `parent_iid`
|
||||
- `gitlab_discussion_id`
|
||||
- `gitlab_note_id` (when note-level data is returned — i.e., in notes list and show detail)
|
||||
|
||||
This contract exists so agents can deterministically construct `glab api` write calls without
|
||||
cross-referencing multiple commands. Each workstream below must satisfy these fields in its
|
||||
output.
|
||||
|
||||
### Field Filtering Guardrail
|
||||
|
||||
In robot mode, `filter_fields` **MUST** force-include Bridge Contract fields even when the
|
||||
caller passes a narrower `--fields` list. This prevents agents from accidentally stripping
|
||||
the identifiers they need for write operations.
|
||||
|
||||
**Implementation**: Add a `BRIDGE_FIELDS` constant map per entity type. In `filter_fields()`,
|
||||
when operating in robot mode, union the caller's requested fields with the bridge set before
|
||||
filtering. Human/table mode keeps existing behavior (no forced fields).
|
||||
|
||||
```rust
|
||||
// src/cli/robot.rs
|
||||
const BRIDGE_FIELDS_NOTES: &[&str] = &[
|
||||
"project_path", "noteable_type", "parent_iid",
|
||||
"gitlab_discussion_id", "gitlab_note_id",
|
||||
];
|
||||
const BRIDGE_FIELDS_DISCUSSIONS: &[&str] = &[
|
||||
"project_path", "noteable_type", "parent_iid",
|
||||
"gitlab_discussion_id",
|
||||
];
|
||||
```
|
||||
|
||||
In `filter_fields`, when entity is `"notes"` or `"discussions"`, merge the bridge set into the
|
||||
requested fields before filtering the JSON value. This is a ~5-line change to the existing
|
||||
function.
|
||||
|
||||
---
|
||||
|
||||
## 1. Add `gitlab_discussion_id` to Notes Output
|
||||
|
||||
### Why
|
||||
@@ -92,9 +145,7 @@ Add `d.gitlab_discussion_id` to the SELECT list. Insert it after
|
||||
d.gitlab_discussion_id
|
||||
```
|
||||
|
||||
Column index shifts: the new field is at index 19 (0-based).
|
||||
|
||||
#### 1b. Add field to `NoteListRow`
|
||||
#### 1b. Add field to `NoteListRow` and switch to named column lookup
|
||||
|
||||
**File**: `src/cli/commands/list.rs` line ~1060
|
||||
|
||||
@@ -106,16 +157,24 @@ pub struct NoteListRow {
|
||||
}
|
||||
```
|
||||
|
||||
And in the `query_map` closure (line ~1407):
|
||||
And in the `query_map` closure (line ~1407), switch from positional indexing to named column
|
||||
lookup for the new field and ideally all fields. At minimum, the new field uses named lookup
|
||||
to avoid fragile positional shifts:
|
||||
|
||||
```rust
|
||||
Ok(NoteListRow {
|
||||
// ... existing fields ...
|
||||
project_path: row.get(18)?,
|
||||
gitlab_discussion_id: row.get(19)?, // ADD
|
||||
// ... existing fields using positional gets ...
|
||||
project_path: row.get("project_path")?,
|
||||
gitlab_discussion_id: row.get("gitlab_discussion_id")?, // ADD — named lookup
|
||||
})
|
||||
```
|
||||
|
||||
**Implementation note**: If converting all existing fields from positional to named lookup is
|
||||
low-risk, do it in this change. The SQL already uses aliases (`AS project_path`, `AS parent_iid`,
|
||||
etc.) which rusqlite's `row.get("name")` can resolve. This eliminates the fragility of
|
||||
column-index counting that has caused bugs in the past. If the conversion touches too many
|
||||
lines, limit named lookup to just the new field and add a follow-up task.
|
||||
|
||||
#### 1c. Add field to `NoteListRowJson`
|
||||
|
||||
**File**: `src/cli/commands/list.rs` line ~1093
|
||||
@@ -167,6 +226,32 @@ Add a column showing a truncated discussion ID (first 8 chars) in the table view
|
||||
|
||||
The discussion ID is critical enough for agent workflows that it belongs in `minimal`.
|
||||
|
||||
#### 1g. Add `--gitlab-discussion-id` filter to notes
|
||||
|
||||
Allow filtering notes directly by GitLab discussion thread ID (the external string ID, not
|
||||
the internal integer). This enables one-hop note retrieval from external references — an agent
|
||||
that received a `gitlab_discussion_id` from another command or webhook can jump straight to
|
||||
the relevant notes without knowing the internal discussion ID.
|
||||
|
||||
**File**: `src/cli/mod.rs` (NotesArgs)
|
||||
|
||||
```rust
|
||||
/// Filter by GitLab discussion ID
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub gitlab_discussion_id: Option<String>,
|
||||
```
|
||||
|
||||
**File**: `src/cli/commands/list.rs` (NoteListFilters + where clause)
|
||||
|
||||
Add `gitlab_discussion_id: Option<String>` to `NoteListFilters`. In the WHERE construction:
|
||||
|
||||
```sql
|
||||
-- When gitlab_discussion_id is provided:
|
||||
AND d.gitlab_discussion_id = ?
|
||||
```
|
||||
|
||||
This is a single WHERE clause addition — minimal complexity, high value for bridge workflows.
|
||||
|
||||
### Tests
|
||||
|
||||
**File**: `src/cli/commands/list_tests.rs`
|
||||
@@ -303,6 +388,54 @@ fn fields_filter_retains_gitlab_discussion_id() {
|
||||
}
|
||||
```
|
||||
|
||||
#### Test 4: Bridge fields survive aggressive --fields filtering in robot mode
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn bridge_fields_forced_in_robot_mode() {
|
||||
// Agent requests only "body" — bridge fields must still appear
|
||||
let mut value = serde_json::json!({
|
||||
"data": {
|
||||
"notes": [{
|
||||
"id": 1,
|
||||
"body": "test",
|
||||
"project_path": "group/repo",
|
||||
"noteable_type": "MergeRequest",
|
||||
"parent_iid": 42,
|
||||
"gitlab_discussion_id": "abc123",
|
||||
"gitlab_note_id": 500
|
||||
}]
|
||||
}
|
||||
});
|
||||
|
||||
// In robot mode, filter_fields merges bridge set
|
||||
filter_fields_robot(
|
||||
&mut value,
|
||||
"notes",
|
||||
&["body".to_string()],
|
||||
);
|
||||
|
||||
let note = &value["data"]["notes"][0];
|
||||
assert_eq!(note["body"], "test");
|
||||
// Bridge fields survive despite not being requested:
|
||||
assert!(note.get("project_path").is_some());
|
||||
assert!(note.get("gitlab_discussion_id").is_some());
|
||||
assert!(note.get("parent_iid").is_some());
|
||||
}
|
||||
```
|
||||
|
||||
#### Test 5: --gitlab-discussion-id filter returns matching notes
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn notes_filter_by_gitlab_discussion_id() {
|
||||
let conn = create_test_db();
|
||||
// Insert 2 discussions with different gitlab_discussion_ids, each with notes
|
||||
// Filter by one gitlab_discussion_id
|
||||
// Assert only notes from matching discussion are returned
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Add `gitlab_discussion_id` to Show Command Discussion Groups
|
||||
@@ -351,74 +484,98 @@ SELECT id, individual_note FROM discussions WHERE merge_request_id = ? ORDER BY
|
||||
|
||||
### Changes Required
|
||||
|
||||
#### 2a. Add field to domain structs
|
||||
#### 2a. Add fields to domain structs
|
||||
|
||||
**File**: `src/cli/commands/show.rs`
|
||||
|
||||
```rust
|
||||
pub struct DiscussionDetail {
|
||||
pub gitlab_discussion_id: String, // ADD
|
||||
pub resolvable: bool, // ADD — agents need thread state
|
||||
pub resolved: bool, // ADD — agents need thread state
|
||||
pub last_note_at: i64, // ADD — for recency sorting
|
||||
pub notes: Vec<NoteDetail>,
|
||||
pub individual_note: bool,
|
||||
}
|
||||
|
||||
pub struct MrDiscussionDetail {
|
||||
pub gitlab_discussion_id: String, // ADD
|
||||
pub resolvable: bool, // ADD
|
||||
pub resolved: bool, // ADD
|
||||
pub last_note_at: i64, // ADD
|
||||
pub notes: Vec<MrNoteDetail>,
|
||||
pub individual_note: bool,
|
||||
}
|
||||
```
|
||||
|
||||
#### 2b. Add field to JSON structs
|
||||
#### 2b. Add fields to JSON structs
|
||||
|
||||
```rust
|
||||
pub struct DiscussionDetailJson {
|
||||
pub gitlab_discussion_id: String, // ADD
|
||||
pub resolvable: bool, // ADD
|
||||
pub resolved: bool, // ADD
|
||||
pub last_note_at_iso: String, // ADD — ISO formatted
|
||||
pub notes: Vec<NoteDetailJson>,
|
||||
pub individual_note: bool,
|
||||
}
|
||||
|
||||
pub struct MrDiscussionDetailJson {
|
||||
pub gitlab_discussion_id: String, // ADD
|
||||
pub resolvable: bool, // ADD
|
||||
pub resolved: bool, // ADD
|
||||
pub last_note_at_iso: String, // ADD — ISO formatted
|
||||
pub notes: Vec<MrNoteDetailJson>,
|
||||
pub individual_note: bool,
|
||||
}
|
||||
```
|
||||
|
||||
#### 2c. Update queries to SELECT gitlab_discussion_id
|
||||
#### 2c. Update queries to SELECT new fields
|
||||
|
||||
**Issue discussions** (`show.rs:325`):
|
||||
```sql
|
||||
SELECT id, gitlab_discussion_id, individual_note FROM discussions
|
||||
SELECT id, gitlab_discussion_id, individual_note, resolvable, resolved, last_note_at
|
||||
FROM discussions
|
||||
WHERE issue_id = ? ORDER BY first_note_at
|
||||
```
|
||||
|
||||
**MR discussions** (`show.rs:537`):
|
||||
```sql
|
||||
SELECT id, gitlab_discussion_id, individual_note FROM discussions
|
||||
SELECT id, gitlab_discussion_id, individual_note, resolvable, resolved, last_note_at
|
||||
FROM discussions
|
||||
WHERE merge_request_id = ? ORDER BY first_note_at
|
||||
```
|
||||
|
||||
#### 2d. Update query_map closures
|
||||
|
||||
The `disc_rows` tuple changes from `(i64, bool)` to `(i64, String, bool)`.
|
||||
The `disc_rows` tuple changes from `(i64, bool)` to a richer shape. Use named columns here
|
||||
too for clarity:
|
||||
|
||||
Issue path (`show.rs:331-335`):
|
||||
```rust
|
||||
let disc_rows: Vec<(i64, String, bool)> = disc_stmt
|
||||
let disc_rows: Vec<(i64, String, bool, bool, bool, i64)> = disc_stmt
|
||||
.query_map([issue_id], |row| {
|
||||
let individual: i64 = row.get(2)?;
|
||||
Ok((row.get(0)?, row.get(1)?, individual == 1))
|
||||
Ok((
|
||||
row.get("id")?,
|
||||
row.get("gitlab_discussion_id")?,
|
||||
row.get::<_, i64>("individual_note").map(|v| v == 1)?,
|
||||
row.get::<_, i64>("resolvable").map(|v| v == 1)?,
|
||||
row.get::<_, i64>("resolved").map(|v| v == 1)?,
|
||||
row.get("last_note_at")?,
|
||||
))
|
||||
})?
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||
```
|
||||
|
||||
And where discussions are constructed (`show.rs:361`):
|
||||
```rust
|
||||
for (disc_id, gitlab_disc_id, individual_note) in disc_rows {
|
||||
for (disc_id, gitlab_disc_id, individual_note, resolvable, resolved, last_note_at) in disc_rows {
|
||||
// ... existing note query ...
|
||||
discussions.push(DiscussionDetail {
|
||||
gitlab_discussion_id: gitlab_disc_id,
|
||||
resolvable,
|
||||
resolved,
|
||||
last_note_at,
|
||||
notes,
|
||||
individual_note,
|
||||
});
|
||||
@@ -434,6 +591,9 @@ impl From<&DiscussionDetail> for DiscussionDetailJson {
|
||||
fn from(disc: &DiscussionDetail) -> Self {
|
||||
Self {
|
||||
gitlab_discussion_id: disc.gitlab_discussion_id.clone(),
|
||||
resolvable: disc.resolvable,
|
||||
resolved: disc.resolved,
|
||||
last_note_at_iso: format_iso_timestamp(disc.last_note_at),
|
||||
notes: disc.notes.iter().map(|n| n.into()).collect(),
|
||||
individual_note: disc.individual_note,
|
||||
}
|
||||
@@ -444,6 +604,9 @@ impl From<&MrDiscussionDetail> for MrDiscussionDetailJson {
|
||||
fn from(disc: &MrDiscussionDetail) -> Self {
|
||||
Self {
|
||||
gitlab_discussion_id: disc.gitlab_discussion_id.clone(),
|
||||
resolvable: disc.resolvable,
|
||||
resolved: disc.resolved,
|
||||
last_note_at_iso: format_iso_timestamp(disc.last_note_at),
|
||||
notes: disc.notes.iter().map(|n| n.into()).collect(),
|
||||
individual_note: disc.individual_note,
|
||||
}
|
||||
@@ -453,9 +616,16 @@ impl From<&MrDiscussionDetail> for MrDiscussionDetailJson {
|
||||
|
||||
#### 2f. Add `gitlab_note_id` to note detail structs in show
|
||||
|
||||
While we're here, add `gitlab_id` to `NoteDetail`, `MrNoteDetail`, and their JSON
|
||||
counterparts. Currently show-command notes only have `author_username`, `body`, `created_at`,
|
||||
`is_system` — no note ID at all, making it impossible to reference a specific note.
|
||||
While we're here, add `gitlab_id` (as `gitlab_note_id` in JSON) to `NoteDetail`,
|
||||
`MrNoteDetail`, and their JSON counterparts. Currently show-command notes only have
|
||||
`author_username`, `body`, `created_at`, `is_system` — no note ID at all, making it impossible
|
||||
to reference a specific note. This satisfies the Bridge Contract requirement for `gitlab_note_id`
|
||||
on note-level data.
|
||||
|
||||
**Domain structs** — add `gitlab_id: i64` field.
|
||||
**JSON structs** — add `gitlab_note_id: i64` field.
|
||||
**Queries** — add `n.gitlab_id` to the note SELECT within show.
|
||||
**From impls** — map `gitlab_id` → `gitlab_note_id`.
|
||||
|
||||
### Tests
|
||||
|
||||
@@ -494,12 +664,27 @@ Same pattern for MR path.
|
||||
fn discussion_detail_json_has_gitlab_discussion_id() {
|
||||
let detail = DiscussionDetail {
|
||||
gitlab_discussion_id: "deadbeef".to_string(),
|
||||
resolvable: true,
|
||||
resolved: false,
|
||||
last_note_at: 1_700_000_000_000,
|
||||
notes: vec![],
|
||||
individual_note: false,
|
||||
};
|
||||
let json = DiscussionDetailJson::from(&detail);
|
||||
let value = serde_json::to_value(&json).unwrap();
|
||||
assert_eq!(value["gitlab_discussion_id"], "deadbeef");
|
||||
assert_eq!(value["resolvable"], true);
|
||||
assert_eq!(value["resolved"], false);
|
||||
assert!(value.get("last_note_at_iso").is_some());
|
||||
}
|
||||
```
|
||||
|
||||
#### Test 4: Show note includes gitlab_note_id
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn show_note_detail_json_has_gitlab_note_id() {
|
||||
// Verify NoteDetailJson serialization includes gitlab_note_id
|
||||
}
|
||||
```
|
||||
|
||||
@@ -531,6 +716,12 @@ lore -J discussions --for-issue 42
|
||||
|
||||
# List discussions across a project
|
||||
lore -J discussions -p group/repo --since 7d
|
||||
|
||||
# Look up a specific discussion by GitLab ID
|
||||
lore -J discussions --gitlab-discussion-id 6a9c1750b37d
|
||||
|
||||
# List unresolved threads with latest 2 notes inline (fewer round-trips)
|
||||
lore -J discussions --for-mr 99 --resolution unresolved --include-notes 2
|
||||
```
|
||||
|
||||
### Response Schema
|
||||
@@ -555,7 +746,8 @@ lore -J discussions -p group/repo --since 7d
|
||||
"resolvable": true,
|
||||
"resolved": false,
|
||||
"position_new_path": "src/components/SwitchHealthCard.vue",
|
||||
"position_new_line": 42
|
||||
"position_new_line": 42,
|
||||
"notes": []
|
||||
}
|
||||
],
|
||||
"total_count": 15,
|
||||
@@ -565,6 +757,10 @@ lore -J discussions -p group/repo --since 7d
|
||||
}
|
||||
```
|
||||
|
||||
The `notes` array is empty by default (zero overhead). When `--include-notes N` is provided,
|
||||
each discussion includes up to N of its most recent notes inline. This covers the common
|
||||
agent pattern of "show me unresolved threads with context" in a single round-trip.
|
||||
|
||||
### File Architecture
|
||||
|
||||
**No new files.** Follow the existing pattern:
|
||||
@@ -617,6 +813,10 @@ pub struct DiscussionsArgs {
|
||||
#[arg(short = 'p', long, help_heading = "Filters")]
|
||||
pub project: Option<String>,
|
||||
|
||||
/// Filter by GitLab discussion ID
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub gitlab_discussion_id: Option<String>,
|
||||
|
||||
/// Filter by resolution status (unresolved, resolved)
|
||||
#[arg(long, value_parser = ["unresolved", "resolved"], help_heading = "Filters")]
|
||||
pub resolution: Option<String>,
|
||||
@@ -633,6 +833,10 @@ pub struct DiscussionsArgs {
|
||||
#[arg(long, value_parser = ["Issue", "MergeRequest"], help_heading = "Filters")]
|
||||
pub noteable_type: Option<String>,
|
||||
|
||||
/// Include up to N latest notes per discussion (0 = none, default)
|
||||
#[arg(long, default_value = "0", help_heading = "Output")]
|
||||
pub include_notes: usize,
|
||||
|
||||
/// Sort field (first_note, last_note)
|
||||
#[arg(long, value_parser = ["first_note", "last_note"], default_value = "last_note", help_heading = "Sorting")]
|
||||
pub sort: String,
|
||||
@@ -691,6 +895,8 @@ pub struct DiscussionListRowJson {
|
||||
pub position_new_path: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub position_new_line: Option<i64>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub notes: Vec<NoteListRowJson>,
|
||||
}
|
||||
|
||||
pub struct DiscussionListResult {
|
||||
@@ -708,6 +914,10 @@ pub struct DiscussionListResultJson {
|
||||
|
||||
The `From` impl truncates `first_note_body` to ~120 chars for the snippet.
|
||||
|
||||
The `notes` field on `DiscussionListRowJson` is populated only when `--include-notes N > 0`.
|
||||
It reuses the existing `NoteListRowJson` struct for consistency — agents get the same note
|
||||
shape whether they come from `notes`, `show`, or `discussions --include-notes`.
|
||||
|
||||
#### 3c. SQL Query
|
||||
|
||||
**File**: `src/cli/commands/list.rs`
|
||||
@@ -720,38 +930,116 @@ pub fn query_discussions(
|
||||
) -> Result<DiscussionListResult> {
|
||||
```
|
||||
|
||||
Core query:
|
||||
Core query uses a CTE + ranked-notes rollup (window function) to avoid per-row correlated
|
||||
subqueries. The `ROW_NUMBER()` approach produces a single scan over the notes table, which
|
||||
is more predictable than repeated LIMIT 1 sub-selects at scale (200K+ discussions):
|
||||
|
||||
```sql
|
||||
WITH filtered_discussions AS (
|
||||
SELECT
|
||||
d.id,
|
||||
d.gitlab_discussion_id,
|
||||
d.noteable_type,
|
||||
d.id, d.gitlab_discussion_id, d.noteable_type, d.project_id,
|
||||
d.issue_id, d.merge_request_id, d.individual_note,
|
||||
d.first_note_at, d.last_note_at, d.resolvable, d.resolved
|
||||
FROM discussions d
|
||||
JOIN projects p ON d.project_id = p.id
|
||||
{where_sql}
|
||||
),
|
||||
ranked_notes AS (
|
||||
SELECT
|
||||
n.discussion_id,
|
||||
n.author_username,
|
||||
n.body,
|
||||
n.is_system,
|
||||
n.position_new_path,
|
||||
n.position_new_line,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY n.discussion_id
|
||||
ORDER BY n.position, n.id
|
||||
) AS rn
|
||||
FROM notes n
|
||||
WHERE n.discussion_id IN (SELECT id FROM filtered_discussions)
|
||||
),
|
||||
note_rollup AS (
|
||||
SELECT
|
||||
discussion_id,
|
||||
SUM(CASE WHEN is_system = 0 THEN 1 ELSE 0 END) AS note_count,
|
||||
MAX(CASE WHEN rn = 1 AND is_system = 0 THEN author_username END) AS first_author,
|
||||
MAX(CASE WHEN rn = 1 AND is_system = 0 THEN body END) AS first_note_body,
|
||||
MAX(CASE WHEN position_new_path IS NOT NULL THEN position_new_path END) AS position_new_path,
|
||||
MAX(CASE WHEN position_new_line IS NOT NULL THEN position_new_line END) AS position_new_line
|
||||
FROM ranked_notes
|
||||
GROUP BY discussion_id
|
||||
)
|
||||
SELECT
|
||||
fd.id,
|
||||
fd.gitlab_discussion_id,
|
||||
fd.noteable_type,
|
||||
COALESCE(i.iid, m.iid) AS parent_iid,
|
||||
COALESCE(i.title, m.title) AS parent_title,
|
||||
p.path_with_namespace AS project_path,
|
||||
d.individual_note,
|
||||
(SELECT COUNT(*) FROM notes n2 WHERE n2.discussion_id = d.id AND n2.is_system = 0) AS note_count,
|
||||
(SELECT n3.author_username FROM notes n3 WHERE n3.discussion_id = d.id ORDER BY n3.position LIMIT 1) AS first_author,
|
||||
(SELECT n4.body FROM notes n4 WHERE n4.discussion_id = d.id AND n4.is_system = 0 ORDER BY n4.position LIMIT 1) AS first_note_body,
|
||||
d.first_note_at,
|
||||
d.last_note_at,
|
||||
d.resolvable,
|
||||
d.resolved,
|
||||
(SELECT n5.position_new_path FROM notes n5 WHERE n5.discussion_id = d.id AND n5.position_new_path IS NOT NULL LIMIT 1) AS position_new_path,
|
||||
(SELECT n5.position_new_line FROM notes n5 WHERE n5.discussion_id = d.id AND n5.position_new_line IS NOT NULL LIMIT 1) AS position_new_line
|
||||
FROM discussions d
|
||||
JOIN projects p ON d.project_id = p.id
|
||||
LEFT JOIN issues i ON d.issue_id = i.id
|
||||
LEFT JOIN merge_requests m ON d.merge_request_id = m.id
|
||||
{where_sql}
|
||||
fd.individual_note,
|
||||
COALESCE(nr.note_count, 0) AS note_count,
|
||||
nr.first_author,
|
||||
nr.first_note_body,
|
||||
fd.first_note_at,
|
||||
fd.last_note_at,
|
||||
fd.resolvable,
|
||||
fd.resolved,
|
||||
nr.position_new_path,
|
||||
nr.position_new_line
|
||||
FROM filtered_discussions fd
|
||||
JOIN projects p ON fd.project_id = p.id
|
||||
LEFT JOIN issues i ON fd.issue_id = i.id
|
||||
LEFT JOIN merge_requests m ON fd.merge_request_id = m.id
|
||||
LEFT JOIN note_rollup nr ON nr.discussion_id = fd.id
|
||||
ORDER BY {sort_column} {order}
|
||||
LIMIT ?
|
||||
```
|
||||
|
||||
**Performance note**: The correlated subqueries for `note_count`, `first_author`, etc. are
|
||||
fine because discussions are always filtered to a specific issue/MR (50-200 rows). For the
|
||||
unscoped case (all discussions in a project), the LIMIT clause keeps it bounded.
|
||||
**Performance rationale**: The CTE pre-filters discussions before joining notes. The
|
||||
`ranked_notes` CTE uses `ROW_NUMBER()` (a single pass over the notes index) instead of
|
||||
correlated `(SELECT ... LIMIT 1)` sub-selects per discussion. For MR-scoped queries
|
||||
(50-200 discussions) the performance is equivalent. For project-wide scans with thousands
|
||||
of discussions, the window function approach avoids repeated index probes and produces a
|
||||
more predictable query plan. The `MAX(CASE WHEN rn = 1 ...)` pattern extracts first-note
|
||||
attributes from the grouped output without additional lookups.
|
||||
|
||||
**Note on SQLite FILTER syntax**: SQLite does not support `COUNT(*) FILTER (WHERE ...)`.
|
||||
Use `SUM(CASE WHEN ... THEN 1 ELSE 0 END)` instead (as shown above).
|
||||
|
||||
#### 3c-ii. Note expansion query (--include-notes)
|
||||
|
||||
When `include_notes > 0`, after the main discussion query, run a follow-up query per
|
||||
discussion to fetch its N most recent notes:
|
||||
|
||||
```sql
|
||||
SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type,
|
||||
n.is_system, n.created_at, n.updated_at,
|
||||
n.position_new_path, n.position_new_line,
|
||||
n.position_old_path, n.position_old_line,
|
||||
n.resolvable, n.resolved, n.resolved_by,
|
||||
d.noteable_type,
|
||||
COALESCE(i.iid, m.iid) AS parent_iid,
|
||||
COALESCE(i.title, m.title) AS parent_title,
|
||||
p.path_with_namespace AS project_path,
|
||||
d.gitlab_discussion_id
|
||||
FROM notes n
|
||||
JOIN discussions d ON n.discussion_id = d.id
|
||||
JOIN projects p ON n.project_id = p.id
|
||||
LEFT JOIN issues i ON d.issue_id = i.id
|
||||
LEFT JOIN merge_requests m ON d.merge_request_id = m.id
|
||||
WHERE d.id = ?
|
||||
ORDER BY n.created_at DESC
|
||||
LIMIT ?
|
||||
```
|
||||
|
||||
**Optimization**: If discussion count is small (<= 50), batch all discussion IDs into a
|
||||
single `WHERE d.id IN (?, ?, ...)` query with a secondary partition to split by discussion.
|
||||
For larger result sets, fall back to per-discussion queries to avoid huge IN clauses. This
|
||||
matches the existing note-loading pattern in `show.rs`.
|
||||
|
||||
The returned `NoteListRow` rows reuse the same struct and `NoteListRowJson` conversion from
|
||||
workstream 1, ensuring identical note shape across all commands.
|
||||
|
||||
#### 3d. Filters struct
|
||||
|
||||
@@ -761,18 +1049,21 @@ pub struct DiscussionListFilters {
|
||||
pub project: Option<String>,
|
||||
pub for_issue_iid: Option<i64>,
|
||||
pub for_mr_iid: Option<i64>,
|
||||
pub gitlab_discussion_id: Option<String>,
|
||||
pub resolution: Option<String>,
|
||||
pub since: Option<String>,
|
||||
pub path: Option<String>,
|
||||
pub noteable_type: Option<String>,
|
||||
pub sort: String,
|
||||
pub order: String,
|
||||
pub include_notes: usize,
|
||||
}
|
||||
```
|
||||
|
||||
Where-clause construction follows the exact pattern from `query_notes()`:
|
||||
- `for_issue_iid` → subquery to resolve issue ID from IID + project
|
||||
- `for_mr_iid` → subquery to resolve MR ID from IID + project
|
||||
- `gitlab_discussion_id` → `d.gitlab_discussion_id = ?`
|
||||
- `resolution` → `d.resolvable = 1 AND d.resolved = 0/1`
|
||||
- `since` → `d.first_note_at >= ?` (using `parse_since()`)
|
||||
- `path` → `EXISTS (SELECT 1 FROM notes n WHERE n.discussion_id = d.id AND n.position_new_path LIKE ?)`
|
||||
@@ -807,12 +1098,14 @@ fn handle_discussions(
|
||||
project: args.project,
|
||||
for_issue_iid: args.for_issue,
|
||||
for_mr_iid: args.for_mr,
|
||||
gitlab_discussion_id: args.gitlab_discussion_id,
|
||||
resolution: args.resolution,
|
||||
since: args.since,
|
||||
path: args.path,
|
||||
noteable_type: args.noteable_type,
|
||||
sort: args.sort,
|
||||
order: order.to_string(),
|
||||
include_notes: args.include_notes,
|
||||
};
|
||||
|
||||
let result = query_discussions(&conn, &filters, &config)?;
|
||||
@@ -828,8 +1121,10 @@ fn handle_discussions(
|
||||
&result,
|
||||
start.elapsed().as_millis() as u64,
|
||||
args.fields.as_deref(),
|
||||
robot_mode,
|
||||
),
|
||||
"jsonl" => print_list_discussions_jsonl(&result),
|
||||
"csv" => print_list_discussions_csv(&result),
|
||||
_ => print_list_discussions(&result),
|
||||
}
|
||||
|
||||
@@ -848,6 +1143,7 @@ pub fn print_list_discussions_json(
|
||||
result: &DiscussionListResult,
|
||||
elapsed_ms: u64,
|
||||
fields: Option<&[String]>,
|
||||
robot_mode: bool,
|
||||
) {
|
||||
let json_result = DiscussionListResultJson::from(result);
|
||||
let meta = RobotMeta { elapsed_ms };
|
||||
@@ -859,8 +1155,12 @@ pub fn print_list_discussions_json(
|
||||
let mut output = output;
|
||||
if let Some(f) = fields {
|
||||
let expanded = expand_fields_preset(f, "discussions");
|
||||
if robot_mode {
|
||||
filter_fields_robot(&mut output, "discussions", &expanded);
|
||||
} else {
|
||||
filter_fields(&mut output, "discussions", &expanded);
|
||||
}
|
||||
}
|
||||
match serde_json::to_string(&output) {
|
||||
Ok(json) => println!("{json}"),
|
||||
Err(e) => eprintln!("Error serializing to JSON: {e}"),
|
||||
@@ -871,6 +1171,8 @@ pub fn print_list_discussions_json(
|
||||
Table view: compact format showing discussion_id (first 8 chars), first author, note count,
|
||||
resolved status, path, snippet.
|
||||
|
||||
CSV view: all fields, following same pattern as `print_list_notes_csv`.
|
||||
|
||||
#### 3g. Fields preset
|
||||
|
||||
**File**: `src/cli/robot.rs`
|
||||
@@ -981,6 +1283,104 @@ fn discussions_fields_minimal_preset() {
|
||||
}
|
||||
```
|
||||
|
||||
#### Test 6: CTE query handles empty note_rollup gracefully
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn query_discussions_with_no_notes() {
|
||||
let conn = create_test_db();
|
||||
insert_project(&conn, 1);
|
||||
insert_mr(&conn, 1, 1, 99, "Test MR");
|
||||
// Insert discussion with no notes (edge case: possible after sync issues)
|
||||
insert_discussion(&conn, 1, "orphan123", 1, None, Some(1), "MergeRequest");
|
||||
|
||||
let filters = DiscussionListFilters::default_for_mr(99);
|
||||
let result = query_discussions(&conn, &filters, &Config::default()).unwrap();
|
||||
|
||||
assert_eq!(result.discussions.len(), 1);
|
||||
assert_eq!(result.discussions[0].note_count, 0);
|
||||
assert!(result.discussions[0].first_author.is_none());
|
||||
}
|
||||
```
|
||||
|
||||
#### Test 7: --gitlab-discussion-id filter returns exact match
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn query_discussions_by_gitlab_id() {
|
||||
let conn = create_test_db();
|
||||
insert_project(&conn, 1);
|
||||
insert_mr(&conn, 1, 1, 99, "Test MR");
|
||||
insert_discussion(&conn, 1, "target123", 1, None, Some(1), "MergeRequest");
|
||||
insert_discussion(&conn, 2, "other456", 1, None, Some(1), "MergeRequest");
|
||||
|
||||
let filters = DiscussionListFilters {
|
||||
gitlab_discussion_id: Some("target123".to_string()),
|
||||
..DiscussionListFilters::default_for_mr(99)
|
||||
};
|
||||
let result = query_discussions(&conn, &filters, &Config::default()).unwrap();
|
||||
|
||||
assert_eq!(result.discussions.len(), 1);
|
||||
assert_eq!(result.discussions[0].gitlab_discussion_id, "target123");
|
||||
}
|
||||
```
|
||||
|
||||
#### Test 8: --include-notes populates notes array
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn query_discussions_with_included_notes() {
|
||||
let conn = create_test_db();
|
||||
insert_project(&conn, 1);
|
||||
insert_mr(&conn, 1, 1, 99, "Test MR");
|
||||
insert_discussion(&conn, 1, "disc123", 1, None, Some(1), "MergeRequest");
|
||||
insert_note_in_discussion(&conn, 1, 500, 1, 1, "alice", "first");
|
||||
insert_note_in_discussion(&conn, 2, 501, 1, 1, "bob", "second");
|
||||
insert_note_in_discussion(&conn, 3, 502, 1, 1, "carol", "third");
|
||||
|
||||
let filters = DiscussionListFilters {
|
||||
include_notes: 2,
|
||||
..DiscussionListFilters::default_for_mr(99)
|
||||
};
|
||||
let result = query_discussions(&conn, &filters, &Config::default()).unwrap();
|
||||
|
||||
assert_eq!(result.discussions.len(), 1);
|
||||
// Note: notes populated during JSON conversion, not in raw result
|
||||
// Test at handler/print level for full integration
|
||||
}
|
||||
```
|
||||
|
||||
#### Test 9: Bridge fields survive --fields filtering in robot mode
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn discussions_bridge_fields_forced_in_robot_mode() {
|
||||
// Request only "note_count" — bridge fields must still appear
|
||||
let mut value = serde_json::json!({
|
||||
"data": {
|
||||
"discussions": [{
|
||||
"gitlab_discussion_id": "abc",
|
||||
"noteable_type": "MergeRequest",
|
||||
"parent_iid": 99,
|
||||
"project_path": "group/repo",
|
||||
"note_count": 3
|
||||
}]
|
||||
}
|
||||
});
|
||||
|
||||
filter_fields_robot(
|
||||
&mut value,
|
||||
"discussions",
|
||||
&["note_count".to_string()],
|
||||
);
|
||||
|
||||
let disc = &value["data"]["discussions"][0];
|
||||
assert_eq!(disc["note_count"], 3);
|
||||
assert!(disc.get("gitlab_discussion_id").is_some());
|
||||
assert!(disc.get("project_path").is_some());
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Fix Robot-Docs Response Schemas
|
||||
@@ -1026,21 +1426,23 @@ With:
|
||||
"--for-issue <iid>",
|
||||
"--for-mr <iid>",
|
||||
"-p/--project <path>",
|
||||
"--gitlab-discussion-id <id>",
|
||||
"--resolution <unresolved|resolved>",
|
||||
"--since <period>",
|
||||
"--path <filepath>",
|
||||
"--noteable-type <Issue|MergeRequest>",
|
||||
"--include-notes <N>",
|
||||
"--sort <first_note|last_note>",
|
||||
"--asc",
|
||||
"--fields <list|minimal>",
|
||||
"--format <table|json|jsonl>"
|
||||
"--format <table|json|jsonl|csv>"
|
||||
],
|
||||
"robot_flags": ["--format json", "--fields minimal"],
|
||||
"example": "lore --robot discussions --for-mr 99 --resolution unresolved",
|
||||
"response_schema": {
|
||||
"ok": "bool",
|
||||
"data": {
|
||||
"discussions": "[{gitlab_discussion_id:string, noteable_type:string, parent_iid:int?, parent_title:string?, project_path:string, individual_note:bool, note_count:int, first_author:string?, first_note_body_snippet:string?, first_note_at_iso:string, last_note_at_iso:string, resolvable:bool, resolved:bool, position_new_path:string?, position_new_line:int?}]",
|
||||
"discussions": "[{gitlab_discussion_id:string, noteable_type:string, parent_iid:int?, parent_title:string?, project_path:string, individual_note:bool, note_count:int, first_author:string?, first_note_body_snippet:string?, first_note_at_iso:string, last_note_at_iso:string, resolvable:bool, resolved:bool, position_new_path:string?, position_new_line:int?, notes:[NoteListRowJson]?}]",
|
||||
"total_count": "int",
|
||||
"showing": "int"
|
||||
},
|
||||
@@ -1062,7 +1464,8 @@ With:
|
||||
#### 4d. Update show response_schema
|
||||
|
||||
Update the `issues` and `mrs` show schemas to reflect that `discussions` now include
|
||||
`gitlab_discussion_id`.
|
||||
`gitlab_discussion_id`, `resolvable`, `resolved`, and `last_note_at_iso`. Also reflect that
|
||||
notes within show discussions now include `gitlab_note_id`.
|
||||
|
||||
#### 4e. Add to lore_exclusive list
|
||||
|
||||
@@ -1070,9 +1473,97 @@ Update the `issues` and `mrs` show schemas to reflect that `discussions` now inc
|
||||
"discussions: Thread-level discussion listing with gitlab_discussion_id for API integration"
|
||||
```
|
||||
|
||||
#### 4f. Add robot-docs contract tests
|
||||
|
||||
**File**: `src/main.rs` (within `#[cfg(test)]` module)
|
||||
|
||||
Add lightweight tests that parse the robot-docs JSON output and assert required Bridge
|
||||
Contract fields are present. This prevents schema drift — if someone adds a field to the
|
||||
struct but forgets to update robot-docs, the test fails.
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn robot_docs_notes_schema_includes_bridge_fields() {
|
||||
let docs = get_robot_docs_json(); // helper that builds the robot-docs Value
|
||||
let notes_schema = docs["commands"]["notes"]["response_schema"]["data"]["notes"]
|
||||
.as_str().unwrap();
|
||||
assert!(notes_schema.contains("gitlab_discussion_id"));
|
||||
assert!(notes_schema.contains("project_path"));
|
||||
assert!(notes_schema.contains("parent_iid"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn robot_docs_discussions_schema_includes_bridge_fields() {
|
||||
let docs = get_robot_docs_json();
|
||||
let disc_schema = docs["commands"]["discussions"]["response_schema"]["data"]["discussions"]
|
||||
.as_str().unwrap();
|
||||
assert!(disc_schema.contains("gitlab_discussion_id"));
|
||||
assert!(disc_schema.contains("project_path"));
|
||||
assert!(disc_schema.contains("parent_iid"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn robot_docs_show_schema_includes_discussion_id() {
|
||||
let docs = get_robot_docs_json();
|
||||
// Verify issues and mrs show schemas reference gitlab_discussion_id
|
||||
// in their discussion sub-schemas
|
||||
}
|
||||
```
|
||||
|
||||
#### 4g. Add CLI-level contract integration tests
|
||||
|
||||
**File**: `src/cli/commands/list_tests.rs` or `src/main.rs` `#[cfg(test)]`
|
||||
|
||||
Add handler-level tests that invoke the command handlers with an in-memory DB and parse the
|
||||
JSON output, asserting Bridge Contract fields are present. These are stronger than unit tests
|
||||
on structs because they exercise the full path from query through serialization.
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn notes_handler_json_includes_bridge_fields() {
|
||||
// Setup in-memory DB with project, discussion, note
|
||||
// Capture stdout from handle_notes (or call query_notes + print_list_notes_json)
|
||||
// Parse JSON, assert bridge fields present on every note
|
||||
let conn = create_test_db();
|
||||
insert_project(&conn, 1);
|
||||
insert_mr(&conn, 1, 1, 99, "Test MR");
|
||||
insert_discussion(&conn, 1, "abc123", 1, None, Some(1), "MergeRequest");
|
||||
insert_note_in_discussion(&conn, 1, 500, 1, 1, "alice", "hello");
|
||||
|
||||
let result = query_notes(&conn, &NoteListFilters::default_for_mr(99), &Config::default()).unwrap();
|
||||
let json_result = NoteListResultJson::from(&result);
|
||||
let value = serde_json::to_value(&json_result).unwrap();
|
||||
|
||||
for note in value["notes"].as_array().unwrap() {
|
||||
assert!(note.get("gitlab_discussion_id").is_some(), "missing gitlab_discussion_id");
|
||||
assert!(note.get("project_path").is_some(), "missing project_path");
|
||||
assert!(note.get("parent_iid").is_some(), "missing parent_iid");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn discussions_handler_json_includes_bridge_fields() {
|
||||
let conn = create_test_db();
|
||||
insert_project(&conn, 1);
|
||||
insert_mr(&conn, 1, 1, 99, "Test MR");
|
||||
insert_discussion(&conn, 1, "abc123", 1, None, Some(1), "MergeRequest");
|
||||
insert_note_in_discussion(&conn, 1, 500, 1, 1, "alice", "hello");
|
||||
|
||||
let result = query_discussions(&conn, &DiscussionListFilters::default_for_mr(99), &Config::default()).unwrap();
|
||||
let json_result = DiscussionListResultJson::from(&result);
|
||||
let value = serde_json::to_value(&json_result).unwrap();
|
||||
|
||||
for disc in value["discussions"].as_array().unwrap() {
|
||||
assert!(disc.get("gitlab_discussion_id").is_some(), "missing gitlab_discussion_id");
|
||||
assert!(disc.get("project_path").is_some(), "missing project_path");
|
||||
assert!(disc.get("parent_iid").is_some(), "missing parent_iid");
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Tests
|
||||
|
||||
No code tests needed for robot-docs (it's static JSON). Verified by running
|
||||
Beyond the contract tests above, robot-docs changes are verified by running
|
||||
`lore robot-docs` and inspecting output.
|
||||
|
||||
---
|
||||
@@ -1081,11 +1572,17 @@ No code tests needed for robot-docs (it's static JSON). Verified by running
|
||||
|
||||
1. **Change 1** (notes output) — standalone, no dependencies. Can be released immediately.
|
||||
2. **Change 2** (show output) — standalone, no dependencies. Can be released alongside 1.
|
||||
3. **Change 4** (robot-docs) — depends on 1 and 2 being done so schemas are accurate.
|
||||
4. **Change 3** (discussions command) — largest change, depends on 1 for design consistency.
|
||||
3. **Change 3** (discussions command) — largest change, benefits from 1+2 being reviewed first
|
||||
to lock down field naming and serialization patterns.
|
||||
4. **Change 4** (robot-docs + contract tests) — last, after all payloads are finalized.
|
||||
|
||||
Changes 1 and 2 can be done in parallel. Change 3 is independent but should come after 1+2
|
||||
are reviewed to avoid rework if the field naming or serialization approach changes.
|
||||
Changes 1 and 2 can be done in parallel. Change 4 must come last since it documents the
|
||||
final schema of all preceding changes.
|
||||
|
||||
**Cross-cutting**: The Bridge Contract field guardrail (force-including bridge fields in robot
|
||||
mode) should be implemented as part of Change 1, since it modifies `filter_fields` in
|
||||
`robot.rs` which all subsequent changes depend on. The `BRIDGE_FIELDS_*` constants are defined
|
||||
once and reused by Changes 3 and 4.
|
||||
|
||||
---
|
||||
|
||||
@@ -1097,8 +1594,30 @@ After all changes:
|
||||
`gitlab_discussion_id` in the response
|
||||
2. An agent can run `lore -J discussions --for-mr 3929 --resolution unresolved` to see all
|
||||
open threads with their IDs
|
||||
3. An agent can run `lore -J mrs 3929` and see `gitlab_discussion_id` on each discussion
|
||||
group
|
||||
3. An agent can run `lore -J mrs 3929` and see `gitlab_discussion_id`, `resolvable`,
|
||||
`resolved`, and `last_note_at_iso` on each discussion group, plus `gitlab_note_id` on
|
||||
each note within
|
||||
4. `lore robot-docs` lists actual field names for all commands
|
||||
5. All existing tests still pass
|
||||
6. No clippy warnings (pedantic + nursery)
|
||||
7. Robot-docs contract tests pass, preventing future schema drift
|
||||
8. Bridge Contract fields (`project_path`, `noteable_type`, `parent_iid`,
|
||||
`gitlab_discussion_id`, `gitlab_note_id`) are present in every applicable read payload
|
||||
9. Bridge Contract fields survive `--fields` filtering in robot mode (guardrail enforced)
|
||||
10. `--gitlab-discussion-id` filter works on both `notes` and `discussions` commands
|
||||
11. `--include-notes N` populates inline notes on `discussions` output
|
||||
12. CLI-level contract integration tests verify bridge fields through the full handler path
|
||||
|
||||
---
|
||||
|
||||
## Rejected Recommendations
|
||||
|
||||
- **Rename `id`→`note_id` and `gitlab_id`→`gitlab_note_id` in notes list output** — rejected because every existing consumer (agents, scripts, field presets) uses `id` and `gitlab_id`. The fields are unambiguous within the `notes` context. The show-command note structs are a different story (they have no IDs at all), so we add `gitlab_note_id` there where it's genuinely missing. Renaming established fields is churn without proportional benefit.
|
||||
- **Keyset cursor-based pagination (`--cursor` flag)** — rejected because no existing lore command has pagination, agents use `--limit` effectively, and adding a cursor mechanism is significant scope creep. Tracked as potential future work if agents hit real pagination needs.
|
||||
- **Split `note_count` into `user_note_count`/`total_note_count` and rename `first_author` to `first_user_author`** — rejected because `note_count` already excludes system notes by query design (the `WHERE is_system = 0` / `CASE WHEN` filter), and `first_author` already targets the first non-system note. The current naming is clear and consistent with how `notes --include-system` works elsewhere.
|
||||
- **Match path filter on both `position_new_path` and `position_old_path`** — rejected because agents care about where code is *now* (new path), not where it was before a rename. Matching old paths adds complexity and returns confusing results for moved files.
|
||||
- **Separate migration file for discussion-list indexes** — rejected because this project uses a `MIGRATIONS` array in `src/core/db.rs`, not separate migration files. If profiling shows the new query needs indexes, they'll be added to the migration array in the standard way. Premature index creation without measurement is against project practice.
|
||||
- **Shared contract model / workstream 0 (shared constants module)** — rejected because 4 structs sharing field names in a codebase this size isn't drift-prone. We have compile-time contract tests (robot-docs assertions + handler-level JSON tests) that catch drift. A constants module for field name strings adds indirection without proportional gain. The Bridge Contract field guardrail (`BRIDGE_FIELDS_*` arrays in robot.rs) provides the centralized definition where it matters — at the filtering enforcement point.
|
||||
- **Structured robot-docs schema (JSON objects instead of string blobs)** — rejected because the current compact string format is intentionally token-efficient for agent consumption. Switching to nested JSON objects per field would significantly bloat robot-docs output. The string-based contract tests are sufficient — they test what agents actually parse. Agents already work with the inline field listing format used by `issues` and `mrs`.
|
||||
- **`bridge_contract` meta-section in robot-docs output** — rejected because agents don't need a separate meta-contract section; they need correct field listings per command, which we already provide. Adding a cross-cutting contract section to robot-docs adds documentation surface area without improving the agent workflow.
|
||||
- **Performance regression benchmark test (ignored by default)** — rejected because timing-based assertions are inherently flaky across machines, CI environments, and load conditions. Performance is validated through query plan analysis (EXPLAIN) and manual profiling, not hard-coded elapsed-time thresholds.
|
||||
|
||||
169
docs/plan-surgical-sync.feedback-3.md
Normal file
169
docs/plan-surgical-sync.feedback-3.md
Normal file
@@ -0,0 +1,169 @@
|
||||
Below are the strongest **new** revisions I’d make (excluding everything in your rejected list), with rationale and plan-level diffs.
|
||||
|
||||
### 1. Add a durable run ledger (`sync_runs`) with phase state
|
||||
This makes surgical sync crash-resumable, auditable, and safer under Ctrl+C. Right now `run_id` is mostly ephemeral; persisting phase state removes ambiguity about what completed.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
+9. **Durable run state**: Surgical sync MUST persist a `sync_runs` row keyed by `run_id`
|
||||
+ with phase transitions (`preflight`, `ingest`, `dependents`, `docs`, `embed`, `done`, `failed`).
|
||||
+ This is required for crash recovery, observability, and deterministic retries.
|
||||
|
||||
@@ Step 9: Create `run_sync_surgical`
|
||||
+Before Stage 0, insert `sync_runs(run_id, project_id, mode='surgical', requested_counts, started_at)`.
|
||||
+After each stage, update `sync_runs.phase`, counters, and `last_error` if present.
|
||||
+On success/failure, set terminal state (`done`/`failed`) and `finished_at`.
|
||||
```
|
||||
|
||||
### 2. Add `--preflight-only` (network validation without writes)
|
||||
`--dry-run` is intentionally zero-network, so it cannot validate IIDs. `--preflight-only` is high-value for agents: verifies existence/permissions quickly with no DB mutation.
|
||||
|
||||
```diff
|
||||
@@ CLI Interface
|
||||
lore sync --dry-run --issue 123 -p myproject
|
||||
+lore sync --preflight-only --issue 123 -p myproject
|
||||
|
||||
@@ Step 2: Add `--issue`, `--mr`, `-p` to `SyncArgs`
|
||||
+ /// Validate remote entities and auth without any DB writes
|
||||
+ #[arg(long, default_value_t = false)]
|
||||
+ pub preflight_only: bool,
|
||||
|
||||
@@ Step 10: Add branch in `run_sync`
|
||||
+if options.preflight_only && options.is_surgical() {
|
||||
+ return run_sync_surgical_preflight_only(config, &options, run_id, signal).await;
|
||||
+}
|
||||
```
|
||||
|
||||
### 3. Preflight should aggregate all missing/failed IIDs, not fail-fast
|
||||
Fail-fast causes repeated reruns. Aggregating errors gives one-shot correction and better robot automation.
|
||||
|
||||
```diff
|
||||
@@ Step 7: Create `src/ingestion/surgical.rs`
|
||||
-/// Returns the fetched payloads. If ANY fetch fails, the entire operation should abort.
|
||||
+/// Returns fetched payloads plus per-IID failures; caller aborts writes if failures exist.
|
||||
pub async fn preflight_fetch(...) -> Result<PreflightResult> {
|
||||
|
||||
@@
|
||||
#[derive(Debug, Default)]
|
||||
pub struct PreflightResult {
|
||||
pub issues: Vec<GitLabIssue>,
|
||||
pub merge_requests: Vec<GitLabMergeRequest>,
|
||||
+ pub failures: Vec<EntityFailure>, // stage="fetch"
|
||||
}
|
||||
|
||||
@@ Step 9: Create `run_sync_surgical`
|
||||
-let preflight = preflight_fetch(...).await?;
|
||||
+let preflight = preflight_fetch(...).await?;
|
||||
+if !preflight.failures.is_empty() {
|
||||
+ result.entity_failures = preflight.failures;
|
||||
+ return Err(LoreError::Other("Surgical preflight failed for one or more IIDs".into()).into());
|
||||
+}
|
||||
```
|
||||
|
||||
### 4. Stop filtering scoped queue drains with raw `json_extract` scans
|
||||
`json_extract(payload_json, '$.scope_run_id')` in hot drain queries will degrade as queue grows. Use indexed scope metadata.
|
||||
|
||||
```diff
|
||||
@@ Step 9b: Implement scoped drain helpers
|
||||
-// claim query adds:
|
||||
-// AND json_extract(payload_json, '$.scope_run_id') = ?
|
||||
+// Add migration:
|
||||
+// 1) Add `scope_run_id` generated/stored column derived from payload_json (or explicit column)
|
||||
+// 2) Create index on (project_id, job_type, scope_run_id, status, id)
|
||||
+// Scoped drains filter by indexed `scope_run_id`, not full-table JSON extraction.
|
||||
```
|
||||
|
||||
### 5. Replace `dirty_source_ids` collection-by-query with explicit run scoping
|
||||
Current approach can accidentally include prior dirty rows for same source and can duplicate work. Tag dirty rows with `origin_run_id` and consume by run.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
-2. **Dirty queue scoping**: ... MUST call ... `run_generate_docs_for_dirty_ids`
|
||||
+2. **Dirty queue scoping**: Surgical sync MUST scope docs by `origin_run_id` on `dirty_sources`
|
||||
+ (or equivalent exact run marker) and MUST NOT drain unrelated dirty rows.
|
||||
|
||||
@@ Step 7: `SurgicalIngestResult`
|
||||
- pub dirty_source_ids: Vec<i64>,
|
||||
+ pub origin_run_id: String,
|
||||
|
||||
@@ Step 9a: Implement `run_generate_docs_for_dirty_ids`
|
||||
-pub fn run_generate_docs_for_dirty_ids(config: &Config, dirty_source_ids: &[i64]) -> Result<...>
|
||||
+pub fn run_generate_docs_for_run_id(config: &Config, run_id: &str) -> Result<...>
|
||||
```
|
||||
|
||||
### 6. Enforce transaction safety at the type boundary
|
||||
`unchecked_transaction()` + `&Connection` signatures is fragile. Accept `&Transaction` for ingest internals and use `TransactionBehavior::Immediate` for deterministic lock behavior.
|
||||
|
||||
```diff
|
||||
@@ Step 7: Create `src/ingestion/surgical.rs`
|
||||
-pub fn ingest_issue_by_iid_from_payload(conn: &Connection, ...)
|
||||
+pub fn ingest_issue_by_iid_from_payload(tx: &rusqlite::Transaction<'_>, ...)
|
||||
|
||||
-pub fn ingest_mr_by_iid_from_payload(conn: &Connection, ...)
|
||||
+pub fn ingest_mr_by_iid_from_payload(tx: &rusqlite::Transaction<'_>, ...)
|
||||
|
||||
-let tx = conn.unchecked_transaction()?;
|
||||
+let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
|
||||
```
|
||||
|
||||
### 7. Acquire sync lock only for mutation phases, not remote preflight
|
||||
This materially reduces lock contention and keeps normal sync throughput higher, while still guaranteeing mutation serialization.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
+10. **Lock window minimization**: Preflight fetch runs without sync lock; lock is acquired immediately
|
||||
+ before first DB mutation and held through all mutation stages.
|
||||
|
||||
@@ Step 9: Create `run_sync_surgical`
|
||||
-// ── Acquire sync lock ──
|
||||
-...
|
||||
-// ── Stage 0: Preflight fetch ──
|
||||
+// ── Stage 0: Preflight fetch (no lock, no writes) ──
|
||||
...
|
||||
+// ── Acquire sync lock just before Stage 1 mutation ──
|
||||
```
|
||||
|
||||
### 8. Add explicit transient retry policy beyond 429
|
||||
Client already handles rate limits; surgical reliability improves a lot if 5xx/timeouts are retried with bounded backoff.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
+11. **Transient retry policy**: Preflight and dependent remote fetches MUST retry boundedly on
|
||||
+ timeout/5xx with jittered backoff; permanent errors (404/401/403) fail immediately.
|
||||
|
||||
@@ Step 5: Add `get_issue_by_iid` / `get_mr_by_iid`
|
||||
+Document retry behavior for transient transport/server failures.
|
||||
```
|
||||
|
||||
### 9. Tighten automated tests around scoping invariants
|
||||
You already list manual checks; these should be enforced in unit/integration tests to prevent regressions.
|
||||
|
||||
```diff
|
||||
@@ Step 1: TDD — Write Failing Tests First
|
||||
+### 1d. New invariants tests
|
||||
+- `surgical_docs_scope_ignores_preexisting_dirty_rows`
|
||||
+- `scoped_queue_drain_ignores_orphaned_jobs`
|
||||
+- `preflight_aggregates_multiple_missing_iids`
|
||||
+- `preflight_only_performs_zero_writes`
|
||||
+- `dry_run_performs_zero_network_calls`
|
||||
+- `lock_window_does_not_block_during_preflight`
|
||||
|
||||
@@ Acceptance Criteria
|
||||
+32. Scoped queue/docs invariants are covered by automated tests (not manual-only verification).
|
||||
```
|
||||
|
||||
### 10. Make robot-mode surgical output first-class
|
||||
For agent workflows, include full stage telemetry and actionable recovery commands.
|
||||
|
||||
```diff
|
||||
@@ Step 15: Update `SyncResult` for robot mode structured output
|
||||
+ /// Per-stage elapsed ms for deterministic performance tracking
|
||||
+ pub stage_timings_ms: std::collections::BTreeMap<String, u64>,
|
||||
+ /// Suggested recovery commands (robot ergonomics)
|
||||
+ pub recovery_actions: Vec<String>,
|
||||
|
||||
@@ Step 14: Update `robot-docs` manifest
|
||||
+Document surgical-specific error codes and `actions` schema for automated recovery.
|
||||
```
|
||||
|
||||
If you want, I can now produce a fully rewritten **iteration 3** plan that merges these into your current structure end-to-end.
|
||||
212
docs/plan-surgical-sync.feedback-4.md
Normal file
212
docs/plan-surgical-sync.feedback-4.md
Normal file
@@ -0,0 +1,212 @@
|
||||
1. **Resolve the current contract contradictions (`preflight-only`, `dry-run`, `sync_runs`)**
|
||||
|
||||
Why this improves the plan:
|
||||
- Right now constraints conflict: “zero DB writes before commit” vs inserting `sync_runs` during preflight.
|
||||
- This ambiguity will cause implementation drift and flaky acceptance tests.
|
||||
- Splitting control-plane writes from content-plane writes keeps safety guarantees strict while preserving observability.
|
||||
|
||||
```diff
|
||||
@@ ## Design Constraints
|
||||
-6. **Preflight-then-commit**: All remote fetches happen BEFORE any DB writes. If any IID fetch fails (404, network error), the entire operation aborts with zero DB mutations.
|
||||
+6. **Preflight-then-commit (content-plane)**: All remote fetches happen BEFORE any writes to content tables (`issues`, `merge_requests`, `discussions`, `resource_events`, `documents`, `embeddings`).
|
||||
+7. **Control-plane exception**: `sync_runs` / `sync_run_entities` writes are allowed during preflight for observability and crash diagnostics.
|
||||
@@
|
||||
-11. **Preflight-only mode**: `--preflight-only` validates remote entity existence and permissions with zero DB writes.
|
||||
+11. **Preflight-only mode**: `--preflight-only` performs zero content writes; control-plane run-ledger writes are allowed.
|
||||
@@ ### For me to evaluate (functional):
|
||||
-24. **Preflight-only mode** ... no DB mutations beyond the sync_runs ledger entry
|
||||
+24. **Preflight-only mode** ... no content DB mutations; only run-ledger rows may be written
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
2. **Add stale-write protection to avoid TOCTOU regressions during unlocked preflight**
|
||||
|
||||
Why this improves the plan:
|
||||
- You intentionally preflight without lock; that’s good for throughput but introduces race risk.
|
||||
- Without a guard, a slower surgical run can overwrite newer data ingested by a concurrent normal sync.
|
||||
- This is a correctness bug under contention, not a nice-to-have.
|
||||
|
||||
```diff
|
||||
@@ ## Design Constraints
|
||||
+12. **Stale-write protection**: Surgical ingest MUST NOT overwrite fresher local rows. If local `updated_at` is newer than the preflight payload’s `updated_at`, skip that entity and record `skipped_stale`.
|
||||
@@ ## Step 7: Create `src/ingestion/surgical.rs`
|
||||
- let labels_created = process_single_issue(conn, config, project_id, issue)?;
|
||||
+ // Skip stale payloads to avoid TOCTOU overwrite after unlocked preflight.
|
||||
+ if is_local_newer_issue(conn, project_id, issue.iid, issue.updated_at)? {
|
||||
+ result.skipped_stale += 1;
|
||||
+ return Ok(result);
|
||||
+ }
|
||||
+ let labels_created = process_single_issue(conn, config, project_id, issue)?;
|
||||
@@
|
||||
+// same guard for MR path
|
||||
@@ ## Step 15: Update `SyncResult`
|
||||
+ /// Entities skipped because local row was newer than preflight payload
|
||||
+ pub skipped_stale: usize,
|
||||
@@ ### Edge cases to verify:
|
||||
+38. **TOCTOU safety**: if a normal sync updates entity after preflight but before ingest, surgical run skips stale payload (no overwrite)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
3. **Make dirty-source scoping exact (do not capture pre-existing rows for same entity)**
|
||||
|
||||
Why this improves the plan:
|
||||
- Current “query dirty rows by `source_id` after ingest” can accidentally include older dirty rows for the same entity.
|
||||
- That silently violates strict run scoping and can delete unrelated backlog rows.
|
||||
- You can fix this without adding `origin_run_id` to `dirty_sources` (which you already rejected).
|
||||
|
||||
```diff
|
||||
@@ ## Step 7: Create `src/ingestion/surgical.rs`
|
||||
- // Collect dirty_source rows for this entity
|
||||
- let mut stmt = conn.prepare(
|
||||
- "SELECT id FROM dirty_sources WHERE source_type = 'issue' AND source_id = ?1"
|
||||
- )?;
|
||||
+ // Capture only rows inserted by THIS call using high-water mark.
|
||||
+ let before_dirty_id: i64 = conn.query_row(
|
||||
+ "SELECT COALESCE(MAX(id), 0) FROM dirty_sources",
|
||||
+ [], |r| r.get(0),
|
||||
+ )?;
|
||||
+ // ... call process_single_issue ...
|
||||
+ let mut stmt = conn.prepare(
|
||||
+ "SELECT id FROM dirty_sources
|
||||
+ WHERE id > ?1 AND source_type = 'issue' AND source_id = ?2"
|
||||
+ )?;
|
||||
@@
|
||||
+ // same pattern for MR
|
||||
@@ ### 1d. Scoping invariant tests
|
||||
+#[test]
|
||||
+fn surgical_docs_scope_ignores_preexisting_dirty_rows_for_same_entity() {
|
||||
+ // pre-insert dirty row for iid=7, then surgical ingest iid=7
|
||||
+ // assert result.dirty_source_ids only contains newly inserted rows
|
||||
+}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
4. **Fix embed-stage leakage when `--no-docs` is used in surgical mode**
|
||||
|
||||
Why this improves the plan:
|
||||
- Current design can run global embed even when docs stage is skipped, which may embed unrelated backlog docs.
|
||||
- That breaks the surgical “scope only this run” promise.
|
||||
- This is both correctness and operator-trust critical.
|
||||
|
||||
```diff
|
||||
@@ ## Step 9: Create `run_sync_surgical`
|
||||
- if !options.no_embed {
|
||||
+ // Surgical embed only runs when surgical docs actually regenerated docs in this run.
|
||||
+ if !options.no_embed && !options.no_docs && result.documents_regenerated > 0 {
|
||||
@@ ## Step 4: Wire new fields in `handle_sync_cmd`
|
||||
+ if options.is_surgical() && options.no_docs && !options.no_embed {
|
||||
+ return Err(Box::new(LoreError::Other(
|
||||
+ "In surgical mode, --no-docs requires --no-embed (to preserve scoping guarantees)".to_string()
|
||||
+ )));
|
||||
+ }
|
||||
@@ ### For me to evaluate
|
||||
+39. **No embed leakage**: `sync --issue X --no-docs` never embeds unrelated unembedded docs
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
5. **Add queue-failure hygiene so scoped jobs do not leak forever**
|
||||
|
||||
Why this improves the plan:
|
||||
- Scoped drains prevent accidental processing, but failed runs can strand pending jobs permanently.
|
||||
- You need explicit terminalization (`aborted`) and optional replay mechanics.
|
||||
- Otherwise queue bloat and confusing diagnostics accumulate.
|
||||
|
||||
```diff
|
||||
@@ ## Step 8a: Add `sync_runs` table migration
|
||||
+ALTER TABLE dependent_queue ADD COLUMN aborted_reason TEXT;
|
||||
+-- status domain now includes: pending, claimed, done, failed, aborted
|
||||
@@ ## Step 9: run_sync_surgical failure paths
|
||||
+// On run failure/cancel:
|
||||
+conn.execute(
|
||||
+ "UPDATE dependent_queue
|
||||
+ SET status='aborted', aborted_reason=?1
|
||||
+ WHERE project_id=?2 AND scope_run_id=?3 AND status='pending'",
|
||||
+ rusqlite::params![failure_summary, project_id, run_id],
|
||||
+)?;
|
||||
@@ ## Acceptance Criteria
|
||||
+40. **No stranded scoped jobs**: failed surgical runs leave no `pending` rows for their `scope_run_id`
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
6. **Persist per-entity lifecycle (`sync_run_entities`) for real observability and deterministic retry**
|
||||
|
||||
Why this improves the plan:
|
||||
- `sync_runs` alone gives aggregate counters but not which IID failed at which stage.
|
||||
- Per-entity records make retries deterministic and robot output far more useful.
|
||||
- This is the missing piece for your stated “deterministic retry decisions.”
|
||||
|
||||
```diff
|
||||
@@ ## Step 8a: Add `sync_runs` table migration
|
||||
+CREATE TABLE IF NOT EXISTS sync_run_entities (
|
||||
+ id INTEGER PRIMARY KEY,
|
||||
+ run_id TEXT NOT NULL REFERENCES sync_runs(run_id),
|
||||
+ entity_type TEXT NOT NULL CHECK(entity_type IN ('issue','merge_request')),
|
||||
+ iid INTEGER NOT NULL,
|
||||
+ stage TEXT NOT NULL,
|
||||
+ status TEXT NOT NULL CHECK(status IN ('ok','failed','skipped_stale')),
|
||||
+ error_code TEXT,
|
||||
+ error_message TEXT,
|
||||
+ updated_at INTEGER NOT NULL
|
||||
+);
|
||||
+CREATE INDEX IF NOT EXISTS idx_sync_run_entities_run ON sync_run_entities(run_id, entity_type, iid);
|
||||
@@ ## Step 15: Update `SyncResult`
|
||||
+ pub failed_iids: Vec<(String, u64)>,
|
||||
+ pub skipped_stale_iids: Vec<(String, u64)>,
|
||||
@@ ## CLI Interface
|
||||
+lore --robot sync-runs --run-id <id>
|
||||
+lore --robot sync-runs --run-id <id> --retry-failed
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
7. **Use explicit error type for surgical preflight failures (not `LoreError::Other`)**
|
||||
|
||||
Why this improves the plan:
|
||||
- `Other(String)` loses machine semantics, weakens robot mode, and leads to bad exit-code behavior.
|
||||
- A typed error preserves structured failures and enables actionable recovery commands.
|
||||
|
||||
```diff
|
||||
@@ ## Step 9: run_sync_surgical
|
||||
- return Err(LoreError::Other(
|
||||
- format!("Surgical preflight failed for {} of {} IIDs: {}", ...)
|
||||
- ).into());
|
||||
+ return Err(LoreError::SurgicalPreflightFailed {
|
||||
+ run_id: run_id.to_string(),
|
||||
+ total: total_items,
|
||||
+ failures: preflight.failures.clone(),
|
||||
+ }.into());
|
||||
@@ ## Step 15: Update `SyncResult`
|
||||
+ /// Machine-actionable error summary for robot mode
|
||||
+ pub error_code: Option<String>,
|
||||
@@ ## Acceptance Criteria
|
||||
+41. **Typed failure**: preflight failures serialize structured errors (not generic `Other`) with machine-usable codes/actions
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
8. **Strengthen tests for rollback, contention, and stale-skip guarantees**
|
||||
|
||||
Why this improves the plan:
|
||||
- Current tests cover many happy-paths and scoping invariants, but key race/rollback behaviors are still under-tested.
|
||||
- These are exactly where regressions will appear first in production.
|
||||
|
||||
```diff
|
||||
@@ ## Step 1: TDD — Write Failing Tests First
|
||||
+### 1f. Transactional rollback + TOCTOU tests
|
||||
+1. `preflight_success_then_ingest_failure_rolls_back_all_content_writes`
|
||||
+2. `stale_payload_is_skipped_when_local_updated_at_is_newer`
|
||||
+3. `failed_run_aborts_pending_scoped_jobs`
|
||||
+4. `surgical_no_docs_requires_no_embed`
|
||||
@@ ### Automated scoping invariants
|
||||
-38. **Scoped queue/docs invariants are enforced by automated tests**
|
||||
+42. **Rollback and race invariants are enforced by automated tests** (no partial writes on ingest failure, no stale overwrite)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
These eight revisions keep your core approach intact, avoid your explicitly rejected ideas, and close the biggest correctness/operability gaps before implementation.
|
||||
130
docs/plan-surgical-sync.feedback-5.md
Normal file
130
docs/plan-surgical-sync.feedback-5.md
Normal file
@@ -0,0 +1,130 @@
|
||||
**Critical Gaps In Current Plan**
|
||||
1. `dirty_sources` scoping is based on `id`, but `dirty_sources` has no `id` column and uses `(source_type, source_id)` UPSERT semantics.
|
||||
2. Plan assumes a new `dependent_queue` with `status`, but current code uses `pending_dependent_fetches` (delete-on-complete), so queue-scoping design conflicts with existing invariants.
|
||||
3. Constraint 6 says all remote fetches happen before any content writes, but the proposed surgical flow fetches discussions/events/diffs after ingest writes.
|
||||
4. `sync_runs` is already an existing table and already used by `SyncRunRecorder`; the plan currently treats it like a new table.
|
||||
|
||||
**Best Revisions**
|
||||
|
||||
1. **Fix dirty-source scoping to match real schema (queued-at watermark, not `id` high-water).**
|
||||
Why this is better: This removes a correctness bug and makes same-entity re-ingest deterministic under UPSERT behavior.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
-2. Dirty queue scoping: ... capture MAX(id) FROM dirty_sources ... run_generate_docs_for_dirty_ids ...
|
||||
+2. Dirty queue scoping: `dirty_sources` is keyed by `(source_type, source_id)` and updated via UPSERT.
|
||||
+ Surgical scoping MUST use:
|
||||
+ 1) a run-level `run_dirty_floor_ms` captured before surgical ingest, and
|
||||
+ 2) explicit touched source keys from ingest (`(source_type, source_id)`).
|
||||
+ Surgical docs MUST call a scoped API (e.g. `run_generate_docs_for_sources`) and MUST NOT drain global dirty queue.
|
||||
@@ Step 9a
|
||||
-pub fn run_generate_docs_for_dirty_ids(config: &Config, dirty_source_ids: &[i64]) -> Result<GenerateDocsResult>
|
||||
+pub fn run_generate_docs_for_sources(config: &Config, sources: &[(SourceType, i64)]) -> Result<GenerateDocsResult>
|
||||
```
|
||||
|
||||
2. **Bypass shared dependent queue in surgical mode; run dependents inline per target.**
|
||||
Why this is better: Avoids queue migration churn, avoids run-scope conflicts with existing unique constraints, and removes orphan-job hygiene complexity entirely.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
-4. Dependent queue scoping: ... scope_run_id indexed column on dependent_queue ...
|
||||
+4. Surgical dependent execution: surgical mode MUST bypass `pending_dependent_fetches`.
|
||||
+ Dependents (resource_events, mr_closes_issues, mr_diffs) run inline for targeted entities only.
|
||||
+ Global queue remains for normal sync only.
|
||||
@@ Design Constraints
|
||||
-14. Queue failure hygiene: ... pending scoped jobs ... terminalized to aborted ...
|
||||
+14. Surgical failure hygiene: surgical mode MUST leave no queue artifacts because it does not enqueue dependent jobs.
|
||||
@@ Step 9b / 9c / Step 13
|
||||
-Implement scoped drain helpers and enqueue_job scope_run_id plumbing
|
||||
+Replace with direct per-entity helpers in ingestion layer:
|
||||
+ - sync_issue_resource_events_direct(...)
|
||||
+ - sync_mr_resource_events_direct(...)
|
||||
+ - sync_mr_closes_issues_direct(...)
|
||||
+ - sync_mr_diffs_direct(...)
|
||||
```
|
||||
|
||||
3. **Clarify atomicity contract to “primary-entity atomicity” (remove contradiction).**
|
||||
Why this is better: Keeps strong zero-write guarantees for missing IIDs while matching practical staged pipeline behavior.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
-6. Preflight-then-commit (content-plane): All remote fetches happen BEFORE any writes to content tables ...
|
||||
+6. Primary-entity atomicity: all requested issue/MR payload fetches complete before first content write.
|
||||
+ If any primary IID fetch fails, primary ingest does zero content writes.
|
||||
+ Dependent stages (discussions/events/diffs/closes) are post-ingest and best-effort, with structured per-stage failure reporting.
|
||||
```
|
||||
|
||||
4. **Extend existing `sync_runs` schema instead of redefining it.**
|
||||
Why this is better: Preserves compatibility with current `SyncRunRecorder`, `sync_status`, and existing historical data.
|
||||
|
||||
```diff
|
||||
@@ Step 8a
|
||||
-Add `sync_runs` table migration (CREATE TABLE sync_runs ...)
|
||||
+Add migration 027 to extend existing `sync_runs` table:
|
||||
+ - ADD COLUMN mode TEXT NULL -- 'standard' | 'surgical'
|
||||
+ - ADD COLUMN phase TEXT NULL -- preflight|ingest|dependents|docs|embed|done|failed
|
||||
+ - ADD COLUMN surgical_summary_json TEXT NULL
|
||||
+Reuse `SyncRunRecorder` row lifecycle; do not introduce a parallel run-ledger model.
|
||||
```
|
||||
|
||||
5. **Strengthen TOCTOU stale protection for equal timestamps.**
|
||||
Why this is better: Prevents regressions when `updated_at` is equal but a fresher local fetch already happened.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
-13. ... If local `updated_at` is newer than preflight payload `updated_at`, skip ...
|
||||
+13. ... Skip stale when:
|
||||
+ a) local.updated_at > payload.updated_at, OR
|
||||
+ b) local.updated_at == payload.updated_at AND local.last_seen_at > preflight_started_at_ms.
|
||||
+ This prevents equal-timestamp regressions under concurrent sync.
|
||||
@@ Step 1f tests
|
||||
+Add test: `equal_updated_at_but_newer_last_seen_is_skipped`.
|
||||
```
|
||||
|
||||
6. **Shrink lock window further: release `sync` lock before embed; use dedicated embed lock.**
|
||||
Why this is better: Prevents long embedding from blocking unrelated syncs and avoids concurrent embed writers.
|
||||
|
||||
```diff
|
||||
@@ Design Constraints
|
||||
-11. Lock ... held through all mutation stages.
|
||||
+11. Lock ... held through ingest/dependents/docs only.
|
||||
+ Release `AppLock("sync")` before embed.
|
||||
+ Embed stage uses `AppLock("embed")` for single-flight embedding writes.
|
||||
@@ Step 9
|
||||
-Embed runs inside the same sync lock window
|
||||
+Embed runs after sync lock release, under dedicated embed lock
|
||||
```
|
||||
|
||||
7. **Add the missing `sync-runs` robot read path (the plan references it but doesn’t define it).**
|
||||
Why this is better: Makes durable run-state actually useful for recovery automation and observability.
|
||||
|
||||
```diff
|
||||
@@ Step 14 (new)
|
||||
+## Step 14a: Add `sync-runs` read command
|
||||
+
|
||||
+CLI:
|
||||
+ lore --robot sync-runs --limit 20
|
||||
+ lore --robot sync-runs --run-id <id>
|
||||
+ lore --robot sync-runs --state failed
|
||||
+
|
||||
+Robot response fields:
|
||||
+ run_id, mode, phase, status, started_at, finished_at, counters, failures, suggested_retry_command
|
||||
```
|
||||
|
||||
8. **Add URL-native surgical targets (`--issue-url`, `--mr-url`) with project inference.**
|
||||
Why this is better: Much more agent-friendly and reduces project-resolution errors from copy/paste workflows.
|
||||
|
||||
```diff
|
||||
@@ CLI Interface
|
||||
lore sync --issue 123 --issue 456 -p myproject
|
||||
+lore sync --issue-url https://gitlab.example.com/group/proj/-/issues/123
|
||||
+lore sync --mr-url https://gitlab.example.com/group/proj/-/merge_requests/789
|
||||
@@ Step 2
|
||||
+Add repeatable flags:
|
||||
+ --issue-url <url>
|
||||
+ --mr-url <url>
|
||||
+Parse URL into (project_path, iid). If all targets are URL-derived and same project, `-p` is optional.
|
||||
+If mixed projects are provided in one command, reject with clear error.
|
||||
```
|
||||
|
||||
If you want, I can produce a single consolidated patched version of your plan (iteration 5 draft) with these revisions already merged.
|
||||
152
docs/plan-surgical-sync.feedback-6.md
Normal file
152
docs/plan-surgical-sync.feedback-6.md
Normal file
@@ -0,0 +1,152 @@
|
||||
Highest-impact revisions after reviewing your v5 plan:
|
||||
|
||||
1. **Fix a real scoping hole: embed can still process unrelated docs**
|
||||
Rationale: Current plan assumes scoped docs implies scoped embed, but that only holds while no other run creates unembedded docs. You explicitly release sync lock before embed, so another sync can enqueue/regenerate docs in between, and `run_embed` may embed unrelated backlog. This breaks surgical isolation and can hide backlog debt.
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Design Constraints
|
||||
-3. Embed scoping: Embedding runs only for documents regenerated by this surgical run. Because `run_embed` processes only unembedded docs, scoping is automatic IF docs are scoped correctly...
|
||||
+3. Embed scoping: Embedding MUST be explicitly scoped to documents regenerated by this surgical run.
|
||||
+ `run_generate_docs_for_sources` returns regenerated `document_ids`; surgical mode calls
|
||||
+ `run_embed_for_document_ids(document_ids)` and never global `run_embed`.
|
||||
+ This remains true even after lock release and under concurrent normal sync activity.
|
||||
@@ Step 9a: Implement `run_generate_docs_for_sources`
|
||||
-pub fn run_generate_docs_for_sources(...) -> Result<GenerateDocsResult> {
|
||||
+pub fn run_generate_docs_for_sources(...) -> Result<GenerateDocsResult> {
|
||||
+ // Return regenerated document IDs for scoped embedding.
|
||||
+ // GenerateDocsResult { regenerated, errored, regenerated_document_ids: Vec<i64> }
|
||||
@@ Step 9: Embed stage
|
||||
- match run_embed(config, false, false, None, signal).await {
|
||||
+ match run_embed_for_document_ids(config, &result.regenerated_document_ids, signal).await {
|
||||
```
|
||||
|
||||
2. **Make run-ledger lifecycle actually durable (and consistent with your own constraint 10)**
|
||||
Rationale: Plan text says “reuse `SyncRunRecorder`”, but Step 9 writes raw SQL directly. That creates lifecycle drift, missing heartbeats, and inconsistent failure handling as code evolves.
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Design Constraints
|
||||
-10. Durable run state: ... Reuses `SyncRunRecorder` row lifecycle ...
|
||||
+10. Durable run state: surgical sync MUST use `SyncRunRecorder` end-to-end (no ad-hoc SQL updates).
|
||||
+ Add recorder APIs for `set_mode`, `set_phase`, `set_counters`, `finish_succeeded`,
|
||||
+ `finish_failed`, `finish_cancelled`, and periodic `heartbeat`.
|
||||
@@ Step 9: Create `run_sync_surgical`
|
||||
- conn.execute("INSERT INTO sync_runs ...")
|
||||
- conn.execute("UPDATE sync_runs SET phase = ...")
|
||||
+ let mut recorder = SyncRunRecorder::start_surgical(...)?;
|
||||
+ recorder.set_phase("preflight")?;
|
||||
+ recorder.heartbeat_if_due()?;
|
||||
+ recorder.set_phase("ingest")?;
|
||||
+ ...
|
||||
+ recorder.finish_succeeded_with_warnings(...)?;
|
||||
```
|
||||
|
||||
3. **Add explicit `cancelled` terminal state**
|
||||
Rationale: Current early cancellation branches return `Ok(result)` without guaranteed run-row finalization. That leaves misleading `running` rows and weak crash diagnostics.
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Design Constraints
|
||||
+15. Cancellation semantics: If shutdown is observed after run start, phase is set to `cancelled`,
|
||||
+ status is `cancelled`, `finished_at` is written, and lock is released before return.
|
||||
@@ Step 8a migration
|
||||
+ALTER TABLE sync_runs ADD COLUMN warnings_count INTEGER NOT NULL DEFAULT 0;
|
||||
+ALTER TABLE sync_runs ADD COLUMN cancelled_at INTEGER;
|
||||
@@ Acceptance Criteria
|
||||
+47. Cancellation durability: Ctrl+C during surgical sync records `status='cancelled'`,
|
||||
+ `phase='cancelled'`, and `finished_at` in `sync_runs`.
|
||||
```
|
||||
|
||||
4. **Reduce lock contention further by separating dependent fetch and dependent write**
|
||||
Rationale: You currently hold lock through network-heavy dependent stages. That maximizes contention and increases lock timeout risk. Better: fetch dependents unlocked, write in short locked transactions with per-entity freshness guards.
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Design Constraints
|
||||
-11. Lock window minimization: ... held through ingest, dependents, and docs stages.
|
||||
+11. Lock window minimization: lock is held only for DB mutation windows.
|
||||
+ Dependents run in two phases:
|
||||
+ (a) fetch from GitLab without lock,
|
||||
+ (b) write results under lock in short transactions.
|
||||
+ Apply per-entity freshness checks before dependent writes.
|
||||
@@ Step 9: Dependent stages
|
||||
- // All dependents run INLINE per-entity ... while lock is held
|
||||
+ // Dependents fetch outside lock, then write under lock with CAS-style watermark guards.
|
||||
```
|
||||
|
||||
5. **Introduce stage timeout budgets to prevent hung surgical runs**
|
||||
Rationale: A single slow GitLab endpoint can stall the whole run and hold resources too long. Timeout budgets plus per-entity failure recording keep the run bounded and predictable.
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Design Constraints
|
||||
+16. Stage timeout budgets: each dependent fetch has a per-entity timeout and a global stage budget.
|
||||
+ Timed-out entities are recorded in `entity_failures` with code `TIMEOUT` and run continues best-effort.
|
||||
@@ Step 9 notes
|
||||
+ - Wrap dependent network calls with `tokio::time::timeout`.
|
||||
+ - Add config knobs:
|
||||
+ `sync.surgical_entity_timeout_seconds` (default 20),
|
||||
+ `sync.surgical_dependents_budget_seconds` (default 120).
|
||||
```
|
||||
|
||||
6. **Add payload integrity checks (project mismatch hard-fail)**
|
||||
Rationale: Surgical mode is precision tooling. If API/proxy misconfiguration returns payloads from wrong project, you should fail preflight loudly, not trust downstream assumptions.
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Step 7: preflight_fetch
|
||||
+ // Integrity check: payload.project_id must equal requested gitlab_project_id.
|
||||
+ // On mismatch, record EntityFailure { code: "PROJECT_MISMATCH", stage: "fetch" }.
|
||||
@@ Step 9d: error codes
|
||||
+PROJECT_MISMATCH -> usage/config data integrity failure (typed, machine-readable)
|
||||
@@ Acceptance Criteria
|
||||
+48. Project integrity: payloads with unexpected `project_id` are rejected in preflight
|
||||
+ and produce zero content writes.
|
||||
```
|
||||
|
||||
7. **Upgrade robot output from aggregate-only to per-entity lifecycle**
|
||||
Rationale: `entity_failures` alone is not enough for robust automation. Agents need a complete entity outcome map (fetched, ingested, stale-skipped, dependent failures) to retry deterministically.
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Step 15: Update `SyncResult`
|
||||
+pub struct EntityOutcome {
|
||||
+ pub entity_type: String,
|
||||
+ pub iid: u64,
|
||||
+ pub fetched: bool,
|
||||
+ pub ingested: bool,
|
||||
+ pub stale_skipped: bool,
|
||||
+ pub dependent_failures: Vec<EntityFailure>,
|
||||
+}
|
||||
@@
|
||||
+pub entity_outcomes: Vec<EntityOutcome>,
|
||||
+pub completion_status: String, // succeeded | succeeded_with_warnings | failed | cancelled
|
||||
@@ Robot mode
|
||||
- enables agents to detect partial failures via `entity_failures`
|
||||
+ enables deterministic, per-IID retry and richer UI messaging.
|
||||
```
|
||||
|
||||
8. **Index `sync_runs` for real observability at scale**
|
||||
Rationale: You’re adding mode/phase/counters and then querying recent surgical runs. Without indexes, this degrades as run history grows.
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Step 8a migration
|
||||
+CREATE INDEX IF NOT EXISTS idx_sync_runs_mode_started
|
||||
+ ON sync_runs(mode, started_at DESC);
|
||||
+CREATE INDEX IF NOT EXISTS idx_sync_runs_status_phase_started
|
||||
+ ON sync_runs(status, phase, started_at DESC);
|
||||
```
|
||||
|
||||
9. **Add tests specifically for the new failure-prone paths**
|
||||
Rationale: Current tests are strong on ingest and scoping, but still miss new high-risk runtime behavior (cancel state, timeout handling, scoped embed under concurrency).
|
||||
```diff
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@ Step 1f tests
|
||||
+#[tokio::test]
|
||||
+async fn cancellation_marks_sync_run_cancelled() { ... }
|
||||
+
|
||||
+#[tokio::test]
|
||||
+async fn dependent_timeout_records_entity_failure_and_continues() { ... }
|
||||
+
|
||||
+#[tokio::test]
|
||||
+async fn scoped_embed_does_not_embed_unrelated_docs_created_after_docs_stage() { ... }
|
||||
@@ Acceptance Criteria
|
||||
+49. Scoped embed isolation under concurrency is verified by automated test.
|
||||
+50. Timeout path is verified (TIMEOUT code + continued processing).
|
||||
```
|
||||
|
||||
These revisions keep your core direction intact, avoid every rejected recommendation, and materially improve correctness under concurrency, operational observability, and agent automation quality.
|
||||
2240
docs/plan-surgical-sync.md
Normal file
2240
docs/plan-surgical-sync.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,174 +0,0 @@
|
||||
Highest-impact gaps I see in the current plan:
|
||||
|
||||
1. `for-issue` / `for-mr` filtering is ambiguous across projects and can return incorrect rows.
|
||||
2. `lore notes` has no pagination contract, so large exports and deterministic resumption are weak.
|
||||
3. Migration `022` is high-risk (table rebuild + FTS + junction tables) without explicit integrity gates.
|
||||
4. Note-doc freshness is incomplete for upstream note deletions and parent metadata changes (labels/title).
|
||||
|
||||
Below are my best revisions, each with rationale and a git-diff-style plan edit.
|
||||
|
||||
---
|
||||
|
||||
1. **Add gated rollout + rollback controls**
|
||||
Rationale: You can still “ship together” while reducing blast radius. This makes recovery fast if note-doc generation causes DB/embedding pressure.
|
||||
|
||||
```diff
|
||||
@@ ## Design
|
||||
-Two phases, shipped together as one feature:
|
||||
+Two phases, shipped together as one feature, but with runtime gates:
|
||||
+
|
||||
+- `feature.notes_cli` (Phase 1 surface)
|
||||
+- `feature.note_documents` (Phase 2 indexing/extraction path)
|
||||
+
|
||||
+Rollout order:
|
||||
+1) Enable `notes_cli`
|
||||
+2) Run note-doc backfill in bounded batches
|
||||
+3) Enable `note_documents` for continuous updates
|
||||
+
|
||||
+Rollback:
|
||||
+- Disabling `feature.note_documents` stops new note-doc generation without affecting issue/MR/discussion docs.
|
||||
```
|
||||
|
||||
2. **Add keyset pagination + deterministic ordering**
|
||||
Rationale: Needed for year-long reviewer analysis and reliable “continue where I left off” behavior under concurrent updates.
|
||||
|
||||
```diff
|
||||
@@ pub struct NoteListFilters<'a> {
|
||||
pub limit: usize,
|
||||
+ pub cursor: Option<&'a str>, // keyset token "<sort_ms>:<id>"
|
||||
+ pub include_total_count: bool, // avoid COUNT(*) in hot paths
|
||||
@@
|
||||
- pub sort: &'a str, // "created" (default) | "updated"
|
||||
+ pub sort: &'a str, // "created" | "updated"
|
||||
@@ query_notes SQL
|
||||
-ORDER BY {sort_column} {order}
|
||||
+ORDER BY {sort_column} {order}, n.id {order}
|
||||
LIMIT ?
|
||||
```
|
||||
|
||||
3. **Make `for-issue` / `for-mr` project-scoped**
|
||||
Rationale: IIDs are not globally unique. Requiring project avoids false positives and hard-to-debug cross-project leakage.
|
||||
|
||||
```diff
|
||||
@@ pub struct NotesArgs {
|
||||
- #[arg(long = "for-issue", help_heading = "Filters", conflicts_with = "for_mr")]
|
||||
+ #[arg(long = "for-issue", help_heading = "Filters", conflicts_with = "for_mr", requires = "project")]
|
||||
pub for_issue: Option<i64>,
|
||||
@@
|
||||
- #[arg(long = "for-mr", help_heading = "Filters", conflicts_with = "for_issue")]
|
||||
+ #[arg(long = "for-mr", help_heading = "Filters", conflicts_with = "for_issue", requires = "project")]
|
||||
pub for_mr: Option<i64>,
|
||||
```
|
||||
|
||||
4. **Upgrade path filtering semantics**
|
||||
Rationale: Review comments often reference renames/moves. Restricting to `position_new_path` misses relevant notes.
|
||||
|
||||
```diff
|
||||
@@ pub struct NotesArgs {
|
||||
- /// Filter by file path (trailing / for prefix match)
|
||||
+ /// Filter by file path
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub path: Option<String>,
|
||||
+ /// Path mode: exact|prefix|glob
|
||||
+ #[arg(long = "path-mode", value_parser = ["exact","prefix","glob"], default_value = "exact", help_heading = "Filters")]
|
||||
+ pub path_mode: String,
|
||||
+ /// Match against old path as well as new path
|
||||
+ #[arg(long = "match-old-path", help_heading = "Filters")]
|
||||
+ pub match_old_path: bool,
|
||||
@@ query_notes filter mappings
|
||||
-- `path` ... n.position_new_path ...
|
||||
+- `path` applies to `n.position_new_path` and optionally `n.position_old_path`.
|
||||
+- `glob` mode translates `*`/`?` to SQL LIKE with escaping.
|
||||
```
|
||||
|
||||
5. **Add explicit performance indexes (new migration)**
|
||||
Rationale: `notes` becomes a first-class query surface; without indexes, filters degrade quickly at 10k+ note scale.
|
||||
|
||||
```diff
|
||||
@@ ## Phase 1: `lore notes` Command
|
||||
+### Work Chunk 1E: Query Performance Indexes
|
||||
+**Files:** `migrations/023_notes_query_indexes.sql`, `src/core/db.rs`
|
||||
+
|
||||
+Add indexes:
|
||||
+- `notes(project_id, created_at DESC, id DESC)`
|
||||
+- `notes(author_username, created_at DESC, id DESC) WHERE is_system = 0`
|
||||
+- `notes(discussion_id)`
|
||||
+- `notes(position_new_path)`
|
||||
+- `notes(position_old_path)`
|
||||
+- `discussions(issue_id)`
|
||||
+- `discussions(merge_request_id)`
|
||||
```
|
||||
|
||||
6. **Harden migration 022 with transactional integrity checks**
|
||||
Rationale: This is the riskiest part of the plan. Add hard fail-fast checks so corruption cannot silently pass.
|
||||
|
||||
```diff
|
||||
@@ ### Work Chunk 2A: Schema Migration (022)
|
||||
+Migration safety requirements:
|
||||
+- Execute in a single `BEGIN IMMEDIATE ... COMMIT` transaction.
|
||||
+- Capture and compare pre/post row counts for `documents`, `document_labels`, `document_paths`, `dirty_sources`.
|
||||
+- Run `PRAGMA foreign_key_check` and abort on any violation.
|
||||
+- Run `PRAGMA integrity_check` and abort on non-`ok`.
|
||||
+- Rebuild FTS and assert `documents_fts` rowcount equals `documents` rowcount.
|
||||
```
|
||||
|
||||
7. **Add note deletion + parent-change propagation**
|
||||
Rationale: Current plan handles create/update ingestion but not all staleness paths. Without this, note documents drift.
|
||||
|
||||
```diff
|
||||
@@ ## Phase 2: Per-Note Documents
|
||||
+### Work Chunk 2G: Freshness Propagation
|
||||
+**Files:** `src/ingestion/discussions.rs`, `src/ingestion/mr_discussions.rs`, `src/documents/regenerator.rs`
|
||||
+
|
||||
+Rules:
|
||||
+- If a previously stored note is missing from upstream payload, delete local note row and enqueue `(note, id)` for document deletion.
|
||||
+- When parent issue/MR title or labels change, enqueue descendant note docs dirty (notes inherit parent metadata).
|
||||
+- Keep idempotent behavior for repeated syncs.
|
||||
```
|
||||
|
||||
8. **Separate FTS coverage from embedding coverage**
|
||||
Rationale: Biggest cost/perf risk is embeddings. Index all notes in FTS, but embed selectively with policy knobs.
|
||||
|
||||
```diff
|
||||
@@ ## Estimated Document Volume Impact
|
||||
-FTS5 handles this comfortably. Embedding generation time scales linearly (~4x increase).
|
||||
+FTS5 handles this comfortably. Embedding generation is policy-controlled:
|
||||
+- FTS: index all non-system note docs
|
||||
+- Embeddings default: only notes with body length >= 40 chars (configurable)
|
||||
+- Add config: `documents.note_embeddings.min_chars`, `documents.note_embeddings.enabled`
|
||||
+- Prioritize unresolved DiffNotes before other notes during embedding backfill
|
||||
```
|
||||
|
||||
9. **Bring structured reviewer profiling into scope (not narrative reporting)**
|
||||
Rationale: This directly serves the stated use case and makes the feature compelling immediately.
|
||||
|
||||
```diff
|
||||
@@ ## Non-Goals
|
||||
-- Adding a "reviewer profile" report command (that's a downstream use case built on this infrastructure)
|
||||
+- Generating free-form narrative reviewer reports.
|
||||
+ A structured profiling command is in scope.
|
||||
+
|
||||
+## Phase 3: Structured Reviewer Profiling
|
||||
+Add `lore notes profile --author <user> --since <window>` returning:
|
||||
+- top commented paths
|
||||
+- top parent labels
|
||||
+- unresolved-comment ratio
|
||||
+- note-type distribution
|
||||
+- median comment length
|
||||
```
|
||||
|
||||
10. **Add operational SLOs + robot-mode status for note pipeline**
|
||||
Rationale: Reliability improves when regressions are observable, not inferred from failures.
|
||||
|
||||
```diff
|
||||
@@ ## Verification Checklist
|
||||
+Operational checks:
|
||||
+- `lore -J stats` includes per-`source_type` document counts (including `note`)
|
||||
+- Add queue lag metrics: oldest dirty note age, retry backlog size
|
||||
+- Add extraction error breakdown by `source_type`
|
||||
+- Add smoke assertion: disabling `feature.note_documents` leaves other source regeneration unaffected
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
If you want, I can produce a single consolidated revised PRD draft (fully merged text, not just diffs) as the next step.
|
||||
@@ -1,200 +0,0 @@
|
||||
Below are the strongest revisions I’d make, excluding everything in your `## Rejected Recommendations` list.
|
||||
|
||||
1. **Add a Phase 0 for stable note identity before any note-doc generation**
|
||||
Rationale: your current plan still allows note document churn because Issue discussion ingestion is delete/reinsert-based. That makes local `notes.id` unstable, causing unnecessary dirtying/regeneration and potential stale-doc edge cases. Stabilizing identity first (upsert-by-GitLab-ID + sweep stale) improves correctness and cuts repeated work.
|
||||
|
||||
```diff
|
||||
@@ ## Design
|
||||
-Two phases, shipped together as one feature:
|
||||
+Three phases, shipped together as one feature:
|
||||
+- **Phase 0 (Foundation):** Stable note identity in local DB (upsert + sweep, no delete/reinsert churn)
|
||||
- **Phase 1 (Option A):** `lore notes` command — direct SQL query over the `notes` table with rich filtering
|
||||
- **Phase 2 (Option B):** Per-note documents — each non-system note becomes its own searchable document in the FTS/embedding pipeline
|
||||
@@
|
||||
+## Phase 0: Stable Note Identity
|
||||
+
|
||||
+### Work Chunk 0A: Upsert/Sweep for Issue Discussion Notes
|
||||
+**Files:** `src/ingestion/discussions.rs`, `migrations/022_notes_identity_index.sql`, `src/core/db.rs`
|
||||
+**Implementation:**
|
||||
+- Add unique index: `UNIQUE(project_id, gitlab_id)` on `notes`
|
||||
+- Replace delete/reinsert issue-note flow with upsert + `last_seen_at` sweep (same durability model as MR note sweep)
|
||||
+- Ensure `insert_note/upsert_note` returns the stable local row id for both insert and update paths
|
||||
```
|
||||
|
||||
2. **Replace `source_type` CHECK constraints with a registry table + FK in migration**
|
||||
Rationale: table CHECKs force full table rebuild for every new source type forever. A `source_types` table with FK keeps DB-level integrity and future extensibility without rebuilding `documents`/`dirty_sources` every time. This is a major architecture hardening win.
|
||||
|
||||
```diff
|
||||
@@ ### Work Chunk 2A: Schema Migration (023)
|
||||
-Current migration ... CHECK constraints limiting `source_type` ...
|
||||
+Current migration ... CHECK constraints limiting `source_type` ...
|
||||
+Revision: migrate to `source_types` registry table + FK constraints.
|
||||
@@
|
||||
-1. `dirty_sources` — add `'note'` to source_type CHECK
|
||||
-2. `documents` — add `'note'` to source_type CHECK
|
||||
+1. Create `source_types(name TEXT PRIMARY KEY)` and seed: `issue, merge_request, discussion, note`
|
||||
+2. Rebuild `dirty_sources` and `documents` to replace CHECK with `REFERENCES source_types(name)`
|
||||
+3. Future source-type additions become `INSERT INTO source_types(name) VALUES (?)` (no table rebuild)
|
||||
@@
|
||||
+#### Additional integrity tests
|
||||
+#[test]
|
||||
+fn test_source_types_registry_contains_note() { ... }
|
||||
+#[test]
|
||||
+fn test_documents_source_type_fk_enforced() { ... }
|
||||
+#[test]
|
||||
+fn test_dirty_sources_source_type_fk_enforced() { ... }
|
||||
```
|
||||
|
||||
3. **Mark note documents dirty only when note semantics actually changed**
|
||||
Rationale: current loops mark every non-system note dirty every sync. With 8k+ notes this creates avoidable queue pressure and regeneration time. Change-aware dirtying (inserted/changed only) gives major performance and stability improvements.
|
||||
|
||||
```diff
|
||||
@@ ### Work Chunk 2D: Regenerator & Dirty Tracking Integration
|
||||
-for note in notes {
|
||||
- let local_note_id = insert_note(&tx, local_discussion_id, ¬e, None)?;
|
||||
- if !note.is_system {
|
||||
- dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, local_note_id)?;
|
||||
- }
|
||||
-}
|
||||
+for note in notes {
|
||||
+ let outcome = upsert_note(&tx, local_discussion_id, ¬e, None)?;
|
||||
+ if !note.is_system && outcome.changed_semantics {
|
||||
+ dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
|
||||
+ }
|
||||
+}
|
||||
@@
|
||||
+// changed_semantics should include: body, note_type, path/line positions, resolvable/resolved/resolved_by, updated_at
|
||||
```
|
||||
|
||||
4. **Expand filters to support real analysis windows and resolution state**
|
||||
Rationale: reviewer profiling usually needs bounded windows and both resolved/unresolved views. Current `unresolved: bool` is too narrow and one-sided. Add `--until` and tri-state resolution filtering for better analytical power.
|
||||
|
||||
```diff
|
||||
@@ pub struct NoteListFilters<'a> {
|
||||
- pub since: Option<&'a str>,
|
||||
+ pub since: Option<&'a str>,
|
||||
+ pub until: Option<&'a str>,
|
||||
@@
|
||||
- pub unresolved: bool,
|
||||
+ pub resolution: &'a str, // "any" (default) | "unresolved" | "resolved"
|
||||
@@
|
||||
- pub author: Option<&'a str>,
|
||||
+ pub author: Option<&'a str>, // case-insensitive match
|
||||
@@
|
||||
- // Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
|
||||
+ // Filter by start time (7d, 2w, 1m, or YYYY-MM-DD)
|
||||
pub since: Option<String>,
|
||||
+ /// Filter by end time (7d, 2w, 1m, or YYYY-MM-DD)
|
||||
+ #[arg(long, help_heading = "Filters")]
|
||||
+ pub until: Option<String>,
|
||||
@@
|
||||
- /// Only show unresolved review comments
|
||||
- pub unresolved: bool,
|
||||
+ /// Resolution filter: any, unresolved, resolved
|
||||
+ #[arg(long, value_parser = ["any", "unresolved", "resolved"], default_value = "any", help_heading = "Filters")]
|
||||
+ pub resolution: String,
|
||||
```
|
||||
|
||||
5. **Broaden index strategy to match actual query shapes, not just author queries**
|
||||
Rationale: `idx_notes_user_created` helps one path, but common usage also includes project+time scans and unresolved filters. Add two more partial composites for high-selectivity paths.
|
||||
|
||||
```diff
|
||||
@@ ### Work Chunk 1E: Composite Query Index
|
||||
CREATE INDEX IF NOT EXISTS idx_notes_user_created
|
||||
ON notes(project_id, author_username, created_at DESC, id DESC)
|
||||
WHERE is_system = 0;
|
||||
+
|
||||
+CREATE INDEX IF NOT EXISTS idx_notes_project_created
|
||||
+ON notes(project_id, created_at DESC, id DESC)
|
||||
+WHERE is_system = 0;
|
||||
+
|
||||
+CREATE INDEX IF NOT EXISTS idx_notes_unresolved_project_created
|
||||
+ON notes(project_id, created_at DESC, id DESC)
|
||||
+WHERE is_system = 0 AND resolvable = 1 AND resolved = 0;
|
||||
@@
|
||||
+#[test]
|
||||
+fn test_notes_query_plan_uses_project_created_index_for_default_listing() { ... }
|
||||
+#[test]
|
||||
+fn test_notes_query_plan_uses_unresolved_index_when_resolution_unresolved() { ... }
|
||||
```
|
||||
|
||||
6. **Improve per-note document payload with structured metadata header + minimal thread context**
|
||||
Rationale: isolated single-note docs can lose meaning. A small structured header plus lightweight context (parent + one preceding note excerpt) improves semantic retrieval quality substantially without re-bundling full threads.
|
||||
|
||||
```diff
|
||||
@@ ### Work Chunk 2C: Note Document Extractor
|
||||
-// 6. Format content:
|
||||
-// [[Note]] {note_type or "Comment"} on {parent_type_prefix}: {parent_title}
|
||||
-// Project: {path_with_namespace}
|
||||
-// URL: {url}
|
||||
-// Author: @{author}
|
||||
-// Date: {format_date(created_at)}
|
||||
-// Labels: {labels_json}
|
||||
-// File: {position_new_path}:{position_new_line} (if DiffNote)
|
||||
-//
|
||||
-// --- Body ---
|
||||
-//
|
||||
-// {body}
|
||||
+// 6. Format content with machine-readable header:
|
||||
+// [[Note]]
|
||||
+// source_type: note
|
||||
+// note_gitlab_id: {gitlab_id}
|
||||
+// project: {path_with_namespace}
|
||||
+// parent_type: {Issue|MergeRequest}
|
||||
+// parent_iid: {iid}
|
||||
+// note_type: {DiffNote|DiscussionNote|Comment}
|
||||
+// author: @{author}
|
||||
+// created_at: {iso8601}
|
||||
+// resolved: {true|false}
|
||||
+// path: {position_new_path}:{position_new_line}
|
||||
+// url: {url}
|
||||
+//
|
||||
+// --- Context ---
|
||||
+// parent_title: {title}
|
||||
+// previous_note_excerpt: {optional, max 200 chars}
|
||||
+//
|
||||
+// --- Body ---
|
||||
+// {body}
|
||||
```
|
||||
|
||||
7. **Add first-class export modes for downstream profiling pipelines**
|
||||
Rationale: this makes the feature much more useful immediately (LLM prompts, notebook analysis, external scripts) without adding a profiling command. It stays within your non-goals and increases adoption.
|
||||
|
||||
```diff
|
||||
@@ pub struct NotesArgs {
|
||||
+ /// Output format
|
||||
+ #[arg(long, value_parser = ["table", "json", "jsonl", "csv"], default_value = "table", help_heading = "Output")]
|
||||
+ pub format: String,
|
||||
@@
|
||||
- if robot_mode {
|
||||
+ if robot_mode || args.format == "json" || args.format == "jsonl" || args.format == "csv" {
|
||||
print_list_notes_json(...)
|
||||
} else {
|
||||
print_list_notes(&result);
|
||||
}
|
||||
@@ ### Work Chunk 1C: Human & Robot Output Formatting
|
||||
+Add `print_list_notes_csv()` and `print_list_notes_jsonl()`:
|
||||
+- CSV columns mirror `NoteListRowJson` field names
|
||||
+- JSONL emits one note object per line for streaming pipelines
|
||||
```
|
||||
|
||||
8. **Strengthen verification with idempotence + migration data-preservation checks**
|
||||
Rationale: this feature touches ingestion, migrations, indexing, and regeneration. Add explicit idempotence/perf checks so regressions surface early.
|
||||
|
||||
```diff
|
||||
@@ ## Verification Checklist
|
||||
cargo test
|
||||
cargo clippy --all-targets -- -D warnings
|
||||
cargo fmt --check
|
||||
+cargo test test_note_ingestion_idempotent_across_two_syncs
|
||||
+cargo test test_note_document_count_stable_after_second_generate_docs_full
|
||||
@@
|
||||
+lore sync
|
||||
+lore generate-docs --full
|
||||
+lore -J stats > /tmp/stats1.json
|
||||
+lore generate-docs --full
|
||||
+lore -J stats > /tmp/stats2.json
|
||||
+# assert note doc count unchanged and dirty queue drains to zero
|
||||
```
|
||||
|
||||
If you want, I can turn this into a fully rewritten PRD v2 draft with these changes merged in-place and renumbered work chunks end-to-end.
|
||||
@@ -1,162 +0,0 @@
|
||||
These are the highest-impact revisions I’d make. They avoid everything in your `## Rejected Recommendations` list.
|
||||
|
||||
1. Add immediate note-document deletion propagation (don’t wait for `generate-docs --full`)
|
||||
Why: right now, deleted notes can leave stale `source_type='note'` documents until a full rebuild. That creates incorrect search/reporting results and weakens trust in the dataset.
|
||||
```diff
|
||||
@@ Phase 0: Stable Note Identity
|
||||
+### Work Chunk 0B: Immediate Deletion Propagation
|
||||
+
|
||||
+When sweep deletes stale notes, propagate deletion to documents in the same transaction.
|
||||
+Do not rely on eventual cleanup via `generate-docs --full`.
|
||||
+
|
||||
+#### Tests to Write First
|
||||
+#[test]
|
||||
+fn test_issue_note_sweep_deletes_note_documents_immediately() { ... }
|
||||
+#[test]
|
||||
+fn test_mr_note_sweep_deletes_note_documents_immediately() { ... }
|
||||
+
|
||||
+#### Implementation
|
||||
+Use `DELETE ... RETURNING id, is_system` in note sweep functions.
|
||||
+For returned non-system note ids:
|
||||
+1) `DELETE FROM documents WHERE source_type='note' AND source_id=?`
|
||||
+2) `DELETE FROM dirty_sources WHERE source_type='note' AND source_id=?`
|
||||
```
|
||||
|
||||
2. Add one-time upgrade backfill for existing notes (migration 024)
|
||||
Why: existing DBs will otherwise only get note-documents for changed/new notes. Historical notes remain invisible unless users manually run full rebuild.
|
||||
```diff
|
||||
@@ Phase 2: Per-Note Documents
|
||||
+### Work Chunk 2H: Backfill Existing Notes After Upgrade (Migration 024)
|
||||
+
|
||||
+Create migration `024_note_dirty_backfill.sql`:
|
||||
+INSERT INTO dirty_sources (source_type, source_id, queued_at)
|
||||
+SELECT 'note', n.id, unixepoch('now') * 1000
|
||||
+FROM notes n
|
||||
+LEFT JOIN documents d
|
||||
+ ON d.source_type='note' AND d.source_id=n.id
|
||||
+WHERE n.is_system=0 AND d.id IS NULL
|
||||
+ON CONFLICT(source_type, source_id) DO NOTHING;
|
||||
+
|
||||
+Add migration test asserting idempotence and expected queue size.
|
||||
```
|
||||
|
||||
3. Fix `--since/--until` semantics and validation
|
||||
Why: reusing `parse_since` for `until` creates ambiguous windows and off-by-boundary behavior; your own example `--since 90d --until 180d` is chronologically reversed.
|
||||
```diff
|
||||
@@ Work Chunk 1A: Data Types & Query Layer
|
||||
- since: parse_since(since_str) then n.created_at >= ?
|
||||
- until: parse_since(until_str) then n.created_at <= ?
|
||||
+ since: parse_since_start_bound(since_str) then n.created_at >= ?
|
||||
+ until: parse_until_end_bound(until_str) then n.created_at <= ?
|
||||
+ Validate since <= until; otherwise return a clear user error.
|
||||
+
|
||||
+#### Tests to Write First
|
||||
+#[test] fn test_query_notes_invalid_time_window_rejected() { ... }
|
||||
+#[test] fn test_query_notes_until_date_is_end_of_day_inclusive() { ... }
|
||||
```
|
||||
|
||||
4. Separate semantic-change detection from housekeeping updates
|
||||
Why: current proposed `WHERE` includes `updated_at`, which will cause unnecessary dirty churn. You want `last_seen_at` to always refresh, but regeneration only when searchable semantics changed.
|
||||
```diff
|
||||
@@ Work Chunk 0A: Upsert/Sweep for Issue Discussion Notes
|
||||
- OR notes.updated_at IS NOT excluded.updated_at
|
||||
+ -- updated_at-only changes should not mark semantic dirty
|
||||
+
|
||||
+Perform two-step logic:
|
||||
+1) Upsert always updates persistence/housekeeping fields (`updated_at`, `last_seen_at`).
|
||||
+2) `changed_semantics` is computed only from fields used by note documents/search filters
|
||||
+ (body, note_type, resolved flags, paths, author, parent linkage).
|
||||
+
|
||||
+#### Tests to Write First
|
||||
+#[test]
|
||||
+fn test_issue_note_upsert_updated_at_only_does_not_mark_semantic_change() { ... }
|
||||
```
|
||||
|
||||
5. Make indexes align with actual query collation and join strategy
|
||||
Why: `author` uses `COLLATE NOCASE`; without collation-aware index, SQLite can skip index use. Also, IID filters via scalar subqueries are harder for planner than direct join predicates.
|
||||
```diff
|
||||
@@ Work Chunk 1E: Composite Query Index
|
||||
-CREATE INDEX ... ON notes(project_id, author_username, created_at DESC, id DESC) WHERE is_system = 0;
|
||||
+CREATE INDEX ... ON notes(project_id, author_username COLLATE NOCASE, created_at DESC, id DESC) WHERE is_system = 0;
|
||||
+
|
||||
+CREATE INDEX IF NOT EXISTS idx_discussions_issue_id ON discussions(issue_id);
|
||||
+CREATE INDEX IF NOT EXISTS idx_discussions_mr_id ON discussions(merge_request_id);
|
||||
```
|
||||
|
||||
```diff
|
||||
@@ Work Chunk 1A: query_notes()
|
||||
- d.issue_id = (SELECT id FROM issues WHERE iid = ? AND project_id = ?)
|
||||
+ i.iid = ? AND i.project_id = ?
|
||||
- d.merge_request_id = (SELECT id FROM merge_requests WHERE iid = ? AND project_id = ?)
|
||||
+ m.iid = ? AND m.project_id = ?
|
||||
```
|
||||
|
||||
6. Replace manual CSV escaping with `csv` crate
|
||||
Why: manual RFC4180 escaping is fragile (quotes/newlines/multi-byte edge cases). This is exactly where a mature library reduces long-term bug risk.
|
||||
```diff
|
||||
@@ Work Chunk 1C: Human & Robot Output Formatting
|
||||
- Uses a minimal CSV writer (no external dependency — the format is simple enough for manual escaping).
|
||||
+ Uses `csv::Writer` for RFC4180-compliant escaping and stable output across edge cases.
|
||||
+
|
||||
+#### Tests to Write First
|
||||
+#[test] fn test_csv_output_multiline_and_quotes_roundtrip() { ... }
|
||||
```
|
||||
|
||||
7. Add `--contains` lexical body filter to `lore notes`
|
||||
Why: useful middle ground between metadata filtering and semantic search; great for reviewer-pattern mining without requiring FTS query syntax.
|
||||
```diff
|
||||
@@ Work Chunk 1B: CLI Arguments & Command Wiring
|
||||
+/// Filter by case-insensitive substring in note body
|
||||
+#[arg(long, help_heading = "Filters")]
|
||||
+pub contains: Option<String>;
|
||||
```
|
||||
|
||||
```diff
|
||||
@@ Work Chunk 1A: NoteListFilters
|
||||
+ pub contains: Option<&'a str>,
|
||||
@@ query_notes dynamic filters
|
||||
+ if contains.is_some() {
|
||||
+ where_clauses.push("n.body LIKE ? COLLATE NOCASE");
|
||||
+ params.push(format!("%{}%", escape_like(contains.unwrap())));
|
||||
+ }
|
||||
```
|
||||
|
||||
8. Reduce note-document embedding noise by slimming metadata header
|
||||
Why: current verbose key-value header repeats low-signal tokens and consumes embedding budget. Keep context, but bias tokens toward actual review text.
|
||||
```diff
|
||||
@@ Work Chunk 2C: Note Document Extractor
|
||||
- Build content with structured metadata header:
|
||||
- [[Note]]
|
||||
- source_type: note
|
||||
- note_gitlab_id: ...
|
||||
- project: ...
|
||||
- ...
|
||||
- --- Body ---
|
||||
- {body}
|
||||
+ Build content with compact, high-signal layout:
|
||||
+ [[Note]]
|
||||
+ @{author} on {Issue#|MR!}{iid} in {project_path}
|
||||
+ path: {path:line} (only when available)
|
||||
+ state: {resolved|unresolved} (only when resolvable)
|
||||
+
|
||||
+ {body}
|
||||
+
|
||||
+Keep detailed metadata in structured document columns/labels/paths/url,
|
||||
+not repeated in verbose text.
|
||||
```
|
||||
|
||||
9. Add explicit performance regression checks for the new hot paths
|
||||
Why: this feature increases document volume ~4x; you should pin acceptable query behavior now so future changes don’t silently degrade.
|
||||
```diff
|
||||
@@ Verification Checklist
|
||||
+Performance/plan checks:
|
||||
+1) `EXPLAIN QUERY PLAN` for:
|
||||
+ - author+since query
|
||||
+ - project+date query
|
||||
+ - for-mr / for-issue query
|
||||
+2) Seed 50k-note synthetic fixture and assert:
|
||||
+ - `lore notes --author ... --limit 100` stays under agreed local threshold
|
||||
+ - `lore search --type note ...` remains deterministic and completes successfully
|
||||
```
|
||||
|
||||
If you want, I can also provide a fully merged “iteration 3” PRD text with these edits applied end-to-end so you can drop it in directly.
|
||||
@@ -1,187 +0,0 @@
|
||||
1. **Canonical note identity for documents: use `notes.gitlab_id` as `source_id`**
|
||||
Why this is better: the current plan still couples document identity to local row IDs. Even with upsert+sweep, local IDs are a storage artifact and can be reused in edge cases. Using GitLab note IDs as canonical document IDs makes regeneration, backfill, and deletion propagation more stable and portable.
|
||||
|
||||
```diff
|
||||
--- a/PRD.md
|
||||
+++ b/PRD.md
|
||||
@@ Phase 0: Stable Note Identity
|
||||
-Phase 2 depends on `notes.id` as the `source_id` for note documents.
|
||||
+Phase 2 uses `notes.gitlab_id` as the `source_id` for note documents.
|
||||
+`notes.id` remains an internal relational key only.
|
||||
|
||||
@@ Work Chunk 0A
|
||||
pub struct NoteUpsertOutcome {
|
||||
pub local_note_id: i64,
|
||||
+ pub document_source_id: i64, // notes.gitlab_id
|
||||
pub changed_semantics: bool,
|
||||
}
|
||||
|
||||
@@ Work Chunk 2D
|
||||
-if !note.is_system && outcome.changed_semantics {
|
||||
- dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
|
||||
+if !note.is_system && outcome.changed_semantics {
|
||||
+ dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.document_source_id)?;
|
||||
}
|
||||
|
||||
@@ Work Chunk 2E
|
||||
-SELECT 'note', n.id, ?1
|
||||
+SELECT 'note', n.gitlab_id, ?1
|
||||
|
||||
@@ Work Chunk 2H
|
||||
-ON d.source_type = 'note' AND d.source_id = n.id
|
||||
+ON d.source_type = 'note' AND d.source_id = n.gitlab_id
|
||||
```
|
||||
|
||||
2. **Prevent false deletions on partial/incomplete syncs**
|
||||
Why this is better: sweep-based deletion is correct only when a discussion’s notes were fully fetched. If a page fails mid-fetch, current logic can incorrectly delete valid notes. Add an explicit “fetch complete” guard before sweep.
|
||||
|
||||
```diff
|
||||
--- a/PRD.md
|
||||
+++ b/PRD.md
|
||||
@@ Phase 0
|
||||
+### Work Chunk 0C: Sweep Safety Guard (Partial Fetch Protection)
|
||||
+
|
||||
+Only run stale-note sweep when note pagination completed successfully for that discussion.
|
||||
+If fetch is partial/interrupted, skip sweep and keep prior notes intact.
|
||||
|
||||
+#### Tests to Write First
|
||||
+#[test]
|
||||
+fn test_partial_fetch_does_not_sweep_notes() { /* ... */ }
|
||||
+
|
||||
+#[test]
|
||||
+fn test_complete_fetch_runs_sweep_notes() { /* ... */ }
|
||||
|
||||
+#### Implementation
|
||||
+if discussion_fetch_complete {
|
||||
+ sweep_stale_issue_notes(...)?;
|
||||
+} else {
|
||||
+ tracing::warn!("Skipping stale sweep for discussion {} due to partial fetch", discussion_gitlab_id);
|
||||
+}
|
||||
```
|
||||
|
||||
3. **Make deletion propagation set-based (not per-note loop)**
|
||||
Why this is better: the current per-note DELETE loop is O(N) statements and gets slow on large threads. A temp-table/CTE set-based delete is faster, simpler to reason about, and remains atomic.
|
||||
|
||||
```diff
|
||||
--- a/PRD.md
|
||||
+++ b/PRD.md
|
||||
@@ Work Chunk 0B Implementation
|
||||
- for note_id in stale_note_ids {
|
||||
- conn.execute("DELETE FROM documents WHERE source_type = 'note' AND source_id = ?", [note_id])?;
|
||||
- conn.execute("DELETE FROM dirty_sources WHERE source_type = 'note' AND source_id = ?", [note_id])?;
|
||||
- }
|
||||
+ CREATE TEMP TABLE _stale_note_source_ids(source_id INTEGER PRIMARY KEY) WITHOUT ROWID;
|
||||
+ INSERT INTO _stale_note_source_ids
|
||||
+ SELECT gitlab_id
|
||||
+ FROM notes
|
||||
+ WHERE discussion_id = ? AND last_seen_at < ? AND is_system = 0;
|
||||
+
|
||||
+ DELETE FROM notes
|
||||
+ WHERE discussion_id = ? AND last_seen_at < ?;
|
||||
+
|
||||
+ DELETE FROM documents
|
||||
+ WHERE source_type = 'note'
|
||||
+ AND source_id IN (SELECT source_id FROM _stale_note_source_ids);
|
||||
+
|
||||
+ DELETE FROM dirty_sources
|
||||
+ WHERE source_type = 'note'
|
||||
+ AND source_id IN (SELECT source_id FROM _stale_note_source_ids);
|
||||
+
|
||||
+ DROP TABLE _stale_note_source_ids;
|
||||
```
|
||||
|
||||
4. **Fix project-scoping and time-window semantics in `lore notes`**
|
||||
Why this is better: the plan currently has a contradiction: clap `requires = "project"` blocks use of `defaultProject`, while query layer says default fallback is allowed. Also, `since/until` parsing should use one shared “now” to avoid subtle drift and inverted windows.
|
||||
|
||||
```diff
|
||||
--- a/PRD.md
|
||||
+++ b/PRD.md
|
||||
@@ Work Chunk 1B NotesArgs
|
||||
-#[arg(long = "for-issue", ..., requires = "project")]
|
||||
+#[arg(long = "for-issue", ...)]
|
||||
pub for_issue: Option<i64>;
|
||||
|
||||
-#[arg(long = "for-mr", ..., requires = "project")]
|
||||
+#[arg(long = "for-mr", ...)]
|
||||
pub for_mr: Option<i64>;
|
||||
|
||||
@@ Work Chunk 1A Query Notes
|
||||
-- `since`: `parse_since(since_str)` then `n.created_at >= ?`
|
||||
-- `until`: `parse_since(until_str)` then `n.created_at <= ?`
|
||||
+- Parse `since` and `until` with a single anchored `now_ms` captured once per command.
|
||||
+- If user supplies `YYYY-MM-DD` for `--until`, interpret as end-of-day (23:59:59.999 UTC).
|
||||
+- Validate `since <= until` after both parse with same anchor.
|
||||
```
|
||||
|
||||
5. **Add an analytics mode (not a profile command): `lore notes --aggregate`**
|
||||
Why this is better: this directly supports the stated use case (review patterns) without introducing the rejected “profile report” command. It keeps scope narrow and reuses existing filters.
|
||||
|
||||
```diff
|
||||
--- a/PRD.md
|
||||
+++ b/PRD.md
|
||||
@@ Phase 1
|
||||
+### Work Chunk 1F: Aggregation Mode for Notes Listing
|
||||
+
|
||||
+Add optional aggregation on top of `lore notes`:
|
||||
+- `--aggregate author|note_type|path|resolution`
|
||||
+- `--top N` (default 20)
|
||||
+
|
||||
+Behavior:
|
||||
+- Reuses all existing filters (`--since`, `--project`, `--for-mr`, etc.)
|
||||
+- Returns grouped counts (+ percentage of filtered corpus)
|
||||
+- Works in table/json/jsonl/csv
|
||||
+
|
||||
+Non-goal alignment:
|
||||
+- This is not a narrative “reviewer profile” command.
|
||||
+- It is a query primitive for downstream analysis.
|
||||
```
|
||||
|
||||
6. **Prevent note backfill from starving other document regeneration**
|
||||
Why this is better: after migration/backfill, note dirty entries can dominate the queue and delay issue/MR/discussion updates. Add source-type fairness in regenerator scheduling.
|
||||
|
||||
```diff
|
||||
--- a/PRD.md
|
||||
+++ b/PRD.md
|
||||
@@ Work Chunk 2D
|
||||
+#### Scheduling Revision
|
||||
+Process dirty sources with weighted fairness instead of strict FIFO:
|
||||
+- issue: 3
|
||||
+- merge_request: 3
|
||||
+- discussion: 2
|
||||
+- note: 1
|
||||
+
|
||||
+Implementation sketch:
|
||||
+- fetch next batch by source_type buckets
|
||||
+- interleave according to weights
|
||||
+- preserve retry semantics per source
|
||||
|
||||
+#### Tests to Write First
|
||||
+#[test]
|
||||
+fn test_note_backfill_does_not_starve_issue_and_mr_regeneration() { /* ... */ }
|
||||
```
|
||||
|
||||
7. **Harden migration 023: remove invalid SQL assertions and move integrity checks to tests**
|
||||
Why this is better: `RAISE(ABORT, ...)` in standalone `SELECT` is not valid SQLite usage outside triggers/check expressions. Keep migration SQL minimal/portable and enforce invariants in migration tests.
|
||||
|
||||
```diff
|
||||
--- a/PRD.md
|
||||
+++ b/PRD.md
|
||||
@@ Work Chunk 2A Migration SQL
|
||||
--- Step 10: Integrity verification
|
||||
-SELECT CASE
|
||||
- WHEN ... THEN RAISE(ABORT, '...')
|
||||
-END;
|
||||
+-- Step 10 removed from SQL migration.
|
||||
+-- Integrity verification is enforced in migration tests:
|
||||
+-- 1) pre/post row-count equality
|
||||
+-- 2) `PRAGMA foreign_key_check` is empty
|
||||
+-- 3) documents_fts row count matches documents row count after rebuild
|
||||
|
||||
@@ Work Chunk 2A Tests
|
||||
+#[test]
|
||||
+fn test_migration_023_integrity_checks_pass() {
|
||||
+ // pre/post counts, foreign_key_check empty, fts parity
|
||||
+}
|
||||
```
|
||||
|
||||
These 7 revisions improve correctness under failure, reduce churn risk, improve large-sync performance, and make the feature materially more useful for reviewer-analysis workflows without reintroducing any rejected recommendations.
|
||||
@@ -1,190 +0,0 @@
|
||||
Here are the highest-impact revisions I’d make. None of these repeat anything in your `## Rejected Recommendations`.
|
||||
|
||||
1. **Add immutable reviewer identity (`author_id`) as a first-class key**
|
||||
Why this improves the plan: the PRD’s core use case is year-scale reviewer profiling. Usernames are mutable in GitLab, so username-only filtering will fragment one reviewer into multiple identities over time. Adding `author_id` closes that correctness hole and makes historical analysis reliable.
|
||||
|
||||
```diff
|
||||
@@ Problem Statement
|
||||
-1. **Query individual notes by author** — the `--author` filter on `lore search` only matches the first note's author per discussion thread
|
||||
+1. **Query individual notes by reviewer identity** — support both mutable username and immutable GitLab `author_id` for stable longitudinal analysis
|
||||
|
||||
@@ Phase 0: Stable Note Identity
|
||||
+### Work Chunk 0D: Immutable Author Identity Capture
|
||||
+**Files:** `migrations/025_notes_author_id.sql`, `src/ingestion/discussions.rs`, `src/ingestion/mr_discussions.rs`, `src/cli/commands/list.rs`
|
||||
+
|
||||
+#### Implementation
|
||||
+- Add nullable `notes.author_id INTEGER` and backfill from future syncs.
|
||||
+- Populate `author_id` from GitLab note payload (`note.author.id`) on both issue and MR note ingestion paths.
|
||||
+- Add `--author-id <int>` filter to `lore notes`.
|
||||
+- Keep `--author` for ergonomics; when both provided, require both to match.
|
||||
+
|
||||
+#### Indexing
|
||||
+- Add `idx_notes_author_id_created ON notes(project_id, author_id, created_at DESC, id DESC) WHERE is_system = 0;`
|
||||
+
|
||||
+#### Tests
|
||||
+- `test_query_notes_filter_author_id_survives_username_change`
|
||||
+- `test_query_notes_author_and_author_id_intersection`
|
||||
```
|
||||
|
||||
2. **Strengthen partial-fetch safety from a boolean to an explicit fetch state contract**
|
||||
Why this improves the plan: `fetch_complete: bool` is easy to misuse and fragile under retries/crashes. A run-scoped state model makes sweep correctness auditable and prevents accidental deletions when ingestion aborts midway.
|
||||
|
||||
```diff
|
||||
@@ Phase 0: Stable Note Identity
|
||||
-### Work Chunk 0C: Sweep Safety Guard (Partial Fetch Protection)
|
||||
+### Work Chunk 0C: Sweep Safety Guard with Run-Scoped Fetch State
|
||||
|
||||
@@ Implementation
|
||||
-Add a `fetch_complete` parameter to the discussion ingestion functions. Only run the stale-note sweep when the fetch completed successfully:
|
||||
+Add a run-scoped fetch state:
|
||||
+- `FetchState::Complete`
|
||||
+- `FetchState::Partial`
|
||||
+- `FetchState::Failed`
|
||||
+
|
||||
+Only run sweep on `FetchState::Complete`.
|
||||
+Persist `run_seen_at` once per sync run and pass unchanged through all discussion/note upserts.
|
||||
+Require `run_seen_at` monotonicity per discussion before sweep (skip and warn otherwise).
|
||||
|
||||
@@ Tests to Write First
|
||||
+#[test]
|
||||
+fn test_failed_fetch_never_sweeps_even_after_partial_upserts() { ... }
|
||||
+#[test]
|
||||
+fn test_non_monotonic_run_seen_at_skips_sweep() { ... }
|
||||
+#[test]
|
||||
+fn test_retry_after_failed_fetch_then_complete_sweeps_correctly() { ... }
|
||||
```
|
||||
|
||||
3. **Add DB-level cleanup triggers for note-document referential integrity**
|
||||
Why this improves the plan: Work Chunk 0B handles the sweep path, but not every possible delete path. DB triggers give defense-in-depth so stale note docs cannot survive even if a future code path deletes notes differently.
|
||||
|
||||
```diff
|
||||
@@ Work Chunk 0B: Immediate Deletion Propagation
|
||||
-Update both sweep functions to propagate deletion to documents and dirty_sources using set-based SQL
|
||||
+Keep set-based SQL in sweep functions, and add DB-level cleanup triggers as a safety net.
|
||||
|
||||
@@ Work Chunk 2A: Schema Migration (023)
|
||||
+-- Cleanup trigger: deleting a non-system note must delete note document + dirty queue row
|
||||
+CREATE TRIGGER notes_ad_cleanup AFTER DELETE ON notes
|
||||
+WHEN old.is_system = 0
|
||||
+BEGIN
|
||||
+ DELETE FROM documents
|
||||
+ WHERE source_type = 'note' AND source_id = old.id;
|
||||
+ DELETE FROM dirty_sources
|
||||
+ WHERE source_type = 'note' AND source_id = old.id;
|
||||
+END;
|
||||
+
|
||||
+-- Cleanup trigger: if note flips to system, remove its document artifacts
|
||||
+CREATE TRIGGER notes_au_system_cleanup AFTER UPDATE OF is_system ON notes
|
||||
+WHEN old.is_system = 0 AND new.is_system = 1
|
||||
+BEGIN
|
||||
+ DELETE FROM documents
|
||||
+ WHERE source_type = 'note' AND source_id = new.id;
|
||||
+ DELETE FROM dirty_sources
|
||||
+ WHERE source_type = 'note' AND source_id = new.id;
|
||||
+END;
|
||||
```
|
||||
|
||||
4. **Eliminate N+1 extraction cost with parent metadata caching in regeneration**
|
||||
Why this improves the plan: backfilling ~8k notes with per-note parent/label lookups creates avoidable query amplification. Batch caching turns repeated joins into one-time lookups per parent entity and materially reduces rebuild time.
|
||||
|
||||
```diff
|
||||
@@ Phase 2: Per-Note Documents
|
||||
+### Work Chunk 2I: Batch Parent Metadata Cache for Note Regeneration
|
||||
+**Files:** `src/documents/regenerator.rs`, `src/documents/extractor.rs`
|
||||
+
|
||||
+#### Implementation
|
||||
+- Add `NoteExtractionContext` cache keyed by `(noteable_type, parent_id)` containing:
|
||||
+ - parent iid/title/url
|
||||
+ - parent labels
|
||||
+ - project path
|
||||
+- In batch regeneration, prefetch parent metadata for note IDs in the current chunk.
|
||||
+- Use cached metadata in `extract_note_document()` to avoid repeated parent/label queries.
|
||||
+
|
||||
+#### Tests
|
||||
+- `test_note_regeneration_uses_parent_cache_consistently`
|
||||
+- `test_note_regeneration_cache_hit_preserves_hash_determinism`
|
||||
```
|
||||
|
||||
5. **Add embedding dedup cache keyed by semantic text hash**
|
||||
Why this improves the plan: note docs will contain repeated short comments (“LGTM”, “nit: …”). Current doc-level hashing includes metadata, so identical semantic comments still re-embed many times. A semantic embedding hash cache cuts cost and speeds full rebuild/backfill without changing search behavior.
|
||||
|
||||
```diff
|
||||
@@ Phase 2: Per-Note Documents
|
||||
+### Work Chunk 2J: Semantic Embedding Dedup for Notes
|
||||
+**Files:** `migrations/026_embedding_cache.sql`, embedding pipeline module(s), `src/documents/extractor.rs`
|
||||
+
|
||||
+#### Implementation
|
||||
+- Compute `embedding_text` for notes as: normalized note body + compact stable context (`parent_type`, `path`, `resolution`), excluding volatile fields.
|
||||
+- Compute `embedding_hash = sha256(embedding_text)`.
|
||||
+- Before embedding generation, lookup existing vector by `(model, embedding_hash)`.
|
||||
+- Reuse cached vector when present; only call embedding model on misses.
|
||||
+
|
||||
+#### Tests
|
||||
+- `test_identical_note_bodies_reuse_embedding_vector`
|
||||
+- `test_embedding_hash_changes_when_semantic_context_changes`
|
||||
```
|
||||
|
||||
6. **Add deterministic review-signal tags as derived labels**
|
||||
Why this improves the plan: this makes output immediately more useful for reviewer-pattern analysis without adding a profile command (which is explicitly out of scope). It increases practical value of both `lore notes` and `lore search --type note` with low complexity.
|
||||
|
||||
```diff
|
||||
@@ Non-Goals
|
||||
-- Adding a "reviewer profile" report command (that's a downstream use case built on this infrastructure)
|
||||
+- Adding a "reviewer profile" report command (downstream), while allowing low-level derived signal tags as indexing primitives
|
||||
|
||||
@@ Phase 2: Per-Note Documents
|
||||
+### Work Chunk 2K: Derived Review Signal Labels
|
||||
+**Files:** `src/documents/extractor.rs`
|
||||
+
|
||||
+#### Implementation
|
||||
+- Derive deterministic labels from note text + metadata:
|
||||
+ - `signal:nit`
|
||||
+ - `signal:blocking`
|
||||
+ - `signal:security`
|
||||
+ - `signal:performance`
|
||||
+ - `signal:testing`
|
||||
+- Attach via existing `document_labels` flow for note documents.
|
||||
+- No new CLI mode required; existing label filters can consume these labels.
|
||||
+
|
||||
+#### Tests
|
||||
+- `test_note_document_derives_signal_labels_nit`
|
||||
+- `test_note_document_derives_signal_labels_security`
|
||||
+- `test_signal_label_derivation_is_deterministic`
|
||||
```
|
||||
|
||||
7. **Add high-precision note targeting filters (`--note-id`, `--gitlab-note-id`, `--discussion-id`)**
|
||||
Why this improves the plan: debugging, incident response, and reproducibility all benefit from exact addressing. This is especially useful when validating sync correctness and cross-checking a specific note/document lifecycle.
|
||||
|
||||
```diff
|
||||
@@ Work Chunk 1B: CLI Arguments & Command Wiring
|
||||
pub struct NotesArgs {
|
||||
+ /// Filter by local note row id
|
||||
+ #[arg(long = "note-id", help_heading = "Filters")]
|
||||
+ pub note_id: Option<i64>,
|
||||
+
|
||||
+ /// Filter by GitLab note id
|
||||
+ #[arg(long = "gitlab-note-id", help_heading = "Filters")]
|
||||
+ pub gitlab_note_id: Option<i64>,
|
||||
+
|
||||
+ /// Filter by local discussion id
|
||||
+ #[arg(long = "discussion-id", help_heading = "Filters")]
|
||||
+ pub discussion_id: Option<i64>,
|
||||
}
|
||||
|
||||
@@ Work Chunk 1A: Filter struct
|
||||
pub struct NoteListFilters<'a> {
|
||||
+ pub note_id: Option<i64>,
|
||||
+ pub gitlab_note_id: Option<i64>,
|
||||
+ pub discussion_id: Option<i64>,
|
||||
}
|
||||
|
||||
@@ Tests to Write First
|
||||
+#[test]
|
||||
+fn test_query_notes_filter_note_id_exact() { ... }
|
||||
+#[test]
|
||||
+fn test_query_notes_filter_gitlab_note_id_exact() { ... }
|
||||
+#[test]
|
||||
+fn test_query_notes_filter_discussion_id_exact() { ... }
|
||||
```
|
||||
|
||||
If you want, I can produce a single consolidated “iteration 5” PRD diff that merges these into your exact section ordering and updates the dependency graph/migration numbering end-to-end.
|
||||
@@ -1,434 +0,0 @@
|
||||
Below are the highest-leverage revisions I’d make to this plan. I’m focusing on correctness pitfalls, SQLite gotchas, query performance on 280K notes, and reducing “dynamic SQL + param juggling” complexity—without turning this into a new ingestion project.
|
||||
|
||||
Change 1 — Fix a hard SQLite bug in --active (GROUP_CONCAT DISTINCT + separator)
|
||||
Why
|
||||
|
||||
SQLite does not allow GROUP_CONCAT(DISTINCT x, sep). With DISTINCT, SQLite only permits a single argument (GROUP_CONCAT(DISTINCT x)). Your current query will error at runtime in many SQLite versions.
|
||||
|
||||
Revision
|
||||
|
||||
Use a subquery that selects distinct participants, then GROUP_CONCAT with your separator.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ fn query_active(...)
|
||||
- (SELECT GROUP_CONCAT(DISTINCT n.author_username, X'1F')
|
||||
- FROM notes n
|
||||
- WHERE n.discussion_id = d.id
|
||||
- AND n.is_system = 0
|
||||
- AND n.author_username IS NOT NULL) AS participants
|
||||
+ (SELECT GROUP_CONCAT(username, X'1F') FROM (
|
||||
+ SELECT DISTINCT n.author_username AS username
|
||||
+ FROM notes n
|
||||
+ WHERE n.discussion_id = d.id
|
||||
+ AND n.is_system = 0
|
||||
+ AND n.author_username IS NOT NULL
|
||||
+ ORDER BY username
|
||||
+ )) AS participants
|
||||
|
||||
Change 2 — Replace “contains('.') => exact file match” with segment-aware path classification
|
||||
Why
|
||||
|
||||
path.contains('.') misclassifies directories like:
|
||||
|
||||
.github/workflows/
|
||||
|
||||
src/v1.2/auth/
|
||||
|
||||
It also fails the “root file” case (README.md) because your mode discriminator only treats paths as paths if they contain /.
|
||||
|
||||
Revision
|
||||
|
||||
Add explicit --path to force Expert mode (covers root files cleanly).
|
||||
|
||||
Classify file-vs-dir by checking last path segment for a dot, and whether the input ends with /.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ pub struct WhoArgs {
|
||||
- /// Username or file path (path if contains /)
|
||||
- pub target: Option<String>,
|
||||
+ /// Username or file path shorthand (ambiguous for root files like README.md)
|
||||
+ pub target: Option<String>,
|
||||
+
|
||||
+ /// Force expert mode for a file/directory path (supports root files like README.md)
|
||||
+ #[arg(long, help_heading = "Mode", conflicts_with_all = ["active", "overlap", "reviews"])]
|
||||
+ pub path: Option<String>,
|
||||
@@ fn resolve_mode<'a>(args: &'a WhoArgs) -> Result<WhoMode<'a>> {
|
||||
- if let Some(target) = &args.target {
|
||||
+ if let Some(p) = &args.path {
|
||||
+ return Ok(WhoMode::Expert { path: p });
|
||||
+ }
|
||||
+ if let Some(target) = &args.target {
|
||||
let clean = target.strip_prefix('@').unwrap_or(target);
|
||||
if args.reviews {
|
||||
return Ok(WhoMode::Reviews { username: clean });
|
||||
}
|
||||
- // Disambiguation: if target contains '/', it's a file path.
|
||||
- // GitLab usernames never contain '/'.
|
||||
- if target.contains('/') {
|
||||
+ // Disambiguation:
|
||||
+ // - treat as path if it contains '/'
|
||||
+ // - otherwise treat as username (root files require --path)
|
||||
+ if target.contains('/') {
|
||||
return Ok(WhoMode::Expert { path: target });
|
||||
}
|
||||
return Ok(WhoMode::Workload { username: clean });
|
||||
}
|
||||
|
||||
|
||||
And update the path pattern logic used by Expert/Overlap:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ fn query_expert(...)
|
||||
- // Normalize path for LIKE matching: add trailing % if no extension
|
||||
- let path_pattern = if path.contains('.') {
|
||||
- path.to_string() // Exact file match
|
||||
- } else {
|
||||
- let trimmed = path.trim_end_matches('/');
|
||||
- format!("{trimmed}/%")
|
||||
- };
|
||||
+ // Normalize:
|
||||
+ // - if ends_with('/') => directory prefix
|
||||
+ // - else if last segment contains '.' => file exact match
|
||||
+ // - else => directory prefix
|
||||
+ let trimmed = path.trim_end_matches('/');
|
||||
+ let last = trimmed.rsplit('/').next().unwrap_or(trimmed);
|
||||
+ let is_file = !path.ends_with('/') && last.contains('.');
|
||||
+ let path_pattern = if is_file { trimmed.to_string() } else { format!("{trimmed}/%") };
|
||||
|
||||
Change 3 — Stop building dynamic SQL strings for optional filters; always bind params
|
||||
Why
|
||||
|
||||
Right now you’re mixing:
|
||||
|
||||
dynamic project_clause string fragments
|
||||
|
||||
ad-hoc param vectors
|
||||
|
||||
placeholder renumbering by branch
|
||||
|
||||
That’s brittle and easy to regress (especially when you add more conditions later). SQLite/rusqlite can bind Option<T> to NULL, which enables a simple pattern:
|
||||
|
||||
sql
|
||||
Copy code
|
||||
AND (?3 IS NULL OR n.project_id = ?3)
|
||||
|
||||
Revision (representative; apply to all queries)
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ fn query_expert(...)
|
||||
- let project_clause = if project_id.is_some() {
|
||||
- "AND n.project_id = ?3"
|
||||
- } else {
|
||||
- ""
|
||||
- };
|
||||
-
|
||||
- let sql = format!(
|
||||
+ let sql = format!(
|
||||
"SELECT username, role, activity_count, last_active_at FROM (
|
||||
@@
|
||||
FROM notes n
|
||||
WHERE n.position_new_path LIKE ?1
|
||||
AND n.is_system = 0
|
||||
AND n.author_username IS NOT NULL
|
||||
AND n.created_at >= ?2
|
||||
- {project_clause}
|
||||
+ AND (?3 IS NULL OR n.project_id = ?3)
|
||||
@@
|
||||
WHERE n.position_new_path LIKE ?1
|
||||
AND m.author_username IS NOT NULL
|
||||
AND m.updated_at >= ?2
|
||||
- {project_clause}
|
||||
+ AND (?3 IS NULL OR n.project_id = ?3)
|
||||
GROUP BY m.author_username
|
||||
- )"
|
||||
+ ) t"
|
||||
);
|
||||
-
|
||||
- let mut params: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
|
||||
- params.push(Box::new(path_pattern.clone()));
|
||||
- params.push(Box::new(since_ms));
|
||||
- if let Some(pid) = project_id {
|
||||
- params.push(Box::new(pid));
|
||||
- }
|
||||
- let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
|
||||
+ let param_refs = rusqlite::params![path_pattern, since_ms, project_id];
|
||||
|
||||
|
||||
Notes:
|
||||
|
||||
Adds required derived-table alias t (some SQLite configurations are stricter).
|
||||
|
||||
Eliminates the dynamic param vector and placeholder gymnastics.
|
||||
|
||||
Change 4 — Filter “path touch” queries to DiffNotes and escape LIKE properly
|
||||
Why
|
||||
|
||||
Only DiffNotes reliably have position_new_path; including other note types can skew counts and harm performance.
|
||||
|
||||
LIKE treats % and _ as wildcards—rare in file paths, but not impossible (generated files, templates). Escaping is a low-cost robustness win.
|
||||
|
||||
Revision
|
||||
|
||||
Add note_type='DiffNote' and LIKE ... ESCAPE '\' plus a tiny escape helper.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ fn query_expert(...)
|
||||
- FROM notes n
|
||||
- WHERE n.position_new_path LIKE ?1
|
||||
+ FROM notes n
|
||||
+ WHERE n.note_type = 'DiffNote'
|
||||
+ AND n.position_new_path LIKE ?1 ESCAPE '\'
|
||||
AND n.is_system = 0
|
||||
@@
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ Helper Functions
|
||||
+fn escape_like(input: &str) -> String {
|
||||
+ input.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_")
|
||||
+}
|
||||
|
||||
|
||||
And when building patterns:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
- let path_pattern = if is_file { trimmed.to_string() } else { format!("{trimmed}/%") };
|
||||
+ let base = escape_like(trimmed);
|
||||
+ let path_pattern = if is_file { base } else { format!("{base}/%") };
|
||||
|
||||
|
||||
Apply the same changes to query_overlap and any other position_new_path LIKE ....
|
||||
|
||||
Change 5 — Use note timestamps for “touch since” semantics (Expert/Overlap author branch)
|
||||
Why
|
||||
|
||||
In Expert/Overlap “author” branches you filter by m.updated_at >= since. That answers “MR updated recently” rather than “MR touched at this path recently”, which can surface stale ownership.
|
||||
|
||||
Revision
|
||||
|
||||
Filter by the note creation time (and use it for “last touch” where relevant). You can still compute author activity, but anchor it to note activity.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ fn query_overlap(...)
|
||||
- WHERE n.position_new_path LIKE ?1
|
||||
+ WHERE n.note_type = 'DiffNote'
|
||||
+ AND n.position_new_path LIKE ?1 ESCAPE '\'
|
||||
AND m.state IN ('opened', 'merged')
|
||||
AND m.author_username IS NOT NULL
|
||||
- AND m.updated_at >= ?2
|
||||
+ AND n.created_at >= ?2
|
||||
AND (?3 IS NULL OR m.project_id = ?3)
|
||||
|
||||
|
||||
Same idea in Expert mode’s “MR authors” branch.
|
||||
|
||||
Change 6 — Workload mode: apply --since consistently to unresolved discussions
|
||||
Why
|
||||
|
||||
Workload’s unresolved discussions ignore since_ms. That makes --since partially misleading and can dump very old threads.
|
||||
|
||||
Revision
|
||||
|
||||
Filter on d.last_note_at when since_ms is set.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ fn query_workload(...)
|
||||
- let disc_sql = format!(
|
||||
+ let disc_since = if since_ms.is_some() {
|
||||
+ "AND d.last_note_at >= ?2"
|
||||
+ } else { "" };
|
||||
+ let disc_sql = format!(
|
||||
"SELECT d.noteable_type,
|
||||
@@
|
||||
WHERE d.resolvable = 1 AND d.resolved = 0
|
||||
AND EXISTS (
|
||||
@@
|
||||
)
|
||||
{disc_project_filter}
|
||||
+ {disc_since}
|
||||
ORDER BY d.last_note_at DESC
|
||||
LIMIT {limit}"
|
||||
);
|
||||
@@
|
||||
- // Rebuild params for discussion query (only username + optional project_id)
|
||||
- let mut disc_params: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
|
||||
- disc_params.push(Box::new(username.to_string()));
|
||||
- if let Some(pid) = project_id {
|
||||
- disc_params.push(Box::new(pid));
|
||||
- }
|
||||
+ // Params: username, since_ms, project_id (NULLs ok)
|
||||
+ let disc_param_refs = rusqlite::params![username, since_ms, project_id];
|
||||
|
||||
|
||||
(If you adopt Change 3 fully, this becomes very clean.)
|
||||
|
||||
Change 7 — Make Overlap results represent “both roles” instead of collapsing to one
|
||||
Why
|
||||
|
||||
Collapsing to a single role loses valuable info (“they authored and reviewed”). Also your current “prefer author” rule is arbitrary for the “who else is touching this” question.
|
||||
|
||||
Revision
|
||||
|
||||
Track role counts separately and render as A, R, or A+R.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ pub struct OverlapUser {
|
||||
pub username: String,
|
||||
- pub role: String,
|
||||
- pub touch_count: u32,
|
||||
+ pub author_touch_count: u32,
|
||||
+ pub review_touch_count: u32,
|
||||
+ pub touch_count: u32,
|
||||
pub last_touch_at: i64,
|
||||
pub mr_iids: Vec<i64>,
|
||||
}
|
||||
@@ fn query_overlap(...)
|
||||
- let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser {
|
||||
+ let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser {
|
||||
username: username.clone(),
|
||||
- role: role.clone(),
|
||||
+ author_touch_count: 0,
|
||||
+ review_touch_count: 0,
|
||||
touch_count: 0,
|
||||
last_touch_at: 0,
|
||||
mr_iids: Vec::new(),
|
||||
});
|
||||
entry.touch_count += count;
|
||||
+ if role == "author" { entry.author_touch_count += count; }
|
||||
+ if role == "reviewer" { entry.review_touch_count += count; }
|
||||
@@ human output
|
||||
- println!(
|
||||
- " {:<16} {:<8} {:>7} {:<12} {}",
|
||||
+ println!(
|
||||
+ " {:<16} {:<6} {:>7} {:<12} {}",
|
||||
...
|
||||
);
|
||||
@@
|
||||
- user.role,
|
||||
+ format_roles(user.author_touch_count, user.review_touch_count),
|
||||
|
||||
Change 8 — Add an “Index Audit + optional migration” step (big perf win, low blast radius)
|
||||
Why
|
||||
|
||||
With 280K notes, the path/timestamp queries will degrade quickly without indexes. This isn’t “scope creep”; it’s making the feature usable.
|
||||
|
||||
Revision (plan-level)
|
||||
|
||||
Add a non-breaking migration that only creates indexes if missing.
|
||||
|
||||
Optionally add a runtime check: if EXPLAIN QUERY PLAN indicates full table scan on notes, print a dim warning in human mode.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ Implementation Order
|
||||
-| Step | What | Files |
|
||||
+| Step | What | Files |
|
||||
| 1 | CLI skeleton: `WhoArgs` + `Commands::Who` + dispatch + stub | `cli/mod.rs`, `commands/mod.rs`, `main.rs` |
|
||||
+| 1.5 | Index audit + add `CREATE INDEX IF NOT EXISTS` migration for who hot paths | `migrations/0xx_who_indexes.sql` |
|
||||
@@
|
||||
|
||||
|
||||
Suggested indexes (tune names to your conventions):
|
||||
|
||||
notes(note_type, position_new_path, created_at)
|
||||
|
||||
notes(discussion_id, is_system, author_username)
|
||||
|
||||
discussions(resolvable, resolved, last_note_at, project_id)
|
||||
|
||||
merge_requests(project_id, state, updated_at, author_username)
|
||||
|
||||
issue_assignees(username, issue_id)
|
||||
|
||||
Even if SQLite can’t perfectly index LIKE, these still help with join and timestamp filters.
|
||||
|
||||
Change 9 — Make robot JSON reproducible by echoing the effective query inputs
|
||||
Why
|
||||
|
||||
Agent workflows benefit from a stable “query record”: what mode ran, what path/user, resolved project, effective since, limit.
|
||||
|
||||
Revision
|
||||
|
||||
Include an input object in JSON output.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ struct WhoJsonData {
|
||||
mode: String,
|
||||
+ input: serde_json::Value,
|
||||
#[serde(flatten)]
|
||||
result: serde_json::Value,
|
||||
}
|
||||
@@ pub fn print_who_json(...)
|
||||
- let output = WhoJsonEnvelope {
|
||||
+ let input = serde_json::json!({
|
||||
+ "project": /* resolved or raw args.project */,
|
||||
+ "since": /* resolved since ISO */,
|
||||
+ "limit": /* args.limit */,
|
||||
+ });
|
||||
+ let output = WhoJsonEnvelope {
|
||||
ok: true,
|
||||
data: WhoJsonData {
|
||||
mode: mode.to_string(),
|
||||
+ input,
|
||||
result: data,
|
||||
},
|
||||
meta: RobotMeta { elapsed_ms },
|
||||
};
|
||||
|
||||
Change 10 — Tighten clap constraints so invalid combinations never reach resolve_mode
|
||||
Why
|
||||
|
||||
Right now conflicts are enforced manually (or not at all). Clamp the invalid combos at the CLI layer:
|
||||
|
||||
--active should conflict with target, --overlap, --reviews, --path
|
||||
|
||||
--reviews should require a username (and should conflict with Expert path modes)
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@ pub struct WhoArgs {
|
||||
- pub active: bool,
|
||||
+ #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "overlap", "reviews", "path"])]
|
||||
+ pub active: bool,
|
||||
@@
|
||||
- pub overlap: Option<String>,
|
||||
+ #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "active", "reviews", "path"])]
|
||||
+ pub overlap: Option<String>,
|
||||
@@
|
||||
- pub reviews: bool,
|
||||
+ #[arg(long, help_heading = "Mode", requires = "target", conflicts_with_all = ["active", "overlap", "path"])]
|
||||
+ pub reviews: bool,
|
||||
|
||||
Summary of what I’d definitely change
|
||||
|
||||
If you do nothing else, do these first:
|
||||
|
||||
Fix GROUP_CONCAT(DISTINCT ..., sep) in Active mode (runtime error).
|
||||
|
||||
Path classification: add --path, and stop using contains('.') globally.
|
||||
|
||||
Remove dynamic SQL + param vectors: always bind project_id as nullable and use (? IS NULL OR ...).
|
||||
|
||||
Filter to DiffNotes + LIKE escaping for correctness and fewer rows scanned.
|
||||
|
||||
Optional index migration: otherwise this will feel slow/non-deterministically slow depending on local DB state.
|
||||
|
||||
If you want, I can also provide a consolidated “v2 plan” as a single unified patch (one diff) rather than per-change snippets.
|
||||
@@ -1,303 +0,0 @@
|
||||
Below are the highest-leverage revisions I’d make to iteration 1 to tighten correctness, performance, and “agent usefulness” without blowing up scope. For each change: (1) rationale, (2) a focused unified diff against the plan you pasted.
|
||||
|
||||
Change 1 — Make robot “input echo” actually resolved (project_id, project_path, since_ms/iso, mode)
|
||||
Why
|
||||
|
||||
Your Design Principle #5 says the robot envelope should echo resolved inputs (“effective since, resolved project”), but the current input object echoes only raw CLI strings. Agents can’t reliably reproduce or compare runs (e.g., fuzzy project resolution may map differently over time).
|
||||
|
||||
This is also a reliability improvement: “what ran” should be computed once and propagated, not recomputed in output.
|
||||
|
||||
Plan diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@
|
||||
-5. **Robot-first reproducibility.** Robot JSON output includes an `input` object echoing the resolved query parameters (effective since, resolved project, limit) so agents can trace exactly what ran.
|
||||
+5. **Robot-first reproducibility.** Robot JSON output includes a `resolved_input` object (mode, since_ms + since_iso, resolved project_id + project_path, limit, db_path) so agents can trace exactly what ran.
|
||||
|
||||
@@
|
||||
-/// Main entry point. Resolves mode from args and dispatches.
|
||||
-pub fn run_who(config: &Config, args: &WhoArgs) -> Result<WhoResult> {
|
||||
+/// Main entry point. Resolves mode + resolved inputs once, then dispatches.
|
||||
+pub fn run_who(config: &Config, args: &WhoArgs) -> Result<WhoRun> {
|
||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||
let conn = create_connection(&db_path)?;
|
||||
|
||||
- let project_id = args
|
||||
+ let project_id = args
|
||||
.project
|
||||
.as_deref()
|
||||
.map(|p| resolve_project(&conn, p))
|
||||
.transpose()?;
|
||||
+ let project_path = project_id
|
||||
+ .map(|id| lookup_project_path(&conn, id))
|
||||
+ .transpose()?;
|
||||
|
||||
let mode = resolve_mode(args)?;
|
||||
|
||||
match mode {
|
||||
WhoMode::Expert { path } => {
|
||||
let since_ms = resolve_since(args.since.as_deref(), "6m")?;
|
||||
let result = query_expert(&conn, path, project_id, since_ms, args.limit)?;
|
||||
- Ok(WhoResult::Expert(result))
|
||||
+ Ok(WhoRun::new("expert", &db_path, project_id, project_path, since_ms, args.limit, WhoResult::Expert(result)))
|
||||
}
|
||||
@@
|
||||
}
|
||||
}
|
||||
+
|
||||
+/// Wrapper that carries resolved inputs for reproducible output.
|
||||
+pub struct WhoRun {
|
||||
+ pub mode: String,
|
||||
+ pub resolved_input: WhoResolvedInput,
|
||||
+ pub result: WhoResult,
|
||||
+}
|
||||
+
|
||||
+pub struct WhoResolvedInput {
|
||||
+ pub db_path: String,
|
||||
+ pub project_id: Option<i64>,
|
||||
+ pub project_path: Option<String>,
|
||||
+ pub since_ms: i64,
|
||||
+ pub since_iso: String,
|
||||
+ pub limit: usize,
|
||||
+}
|
||||
@@
|
||||
-pub fn print_who_json(result: &WhoResult, args: &WhoArgs, elapsed_ms: u64) {
|
||||
- let (mode, data) = match result {
|
||||
+pub fn print_who_json(run: &WhoRun, args: &WhoArgs, elapsed_ms: u64) {
|
||||
+ let (mode, data) = match &run.result {
|
||||
WhoResult::Expert(r) => ("expert", expert_to_json(r)),
|
||||
@@
|
||||
- let input = serde_json::json!({
|
||||
+ let input = serde_json::json!({
|
||||
"target": args.target,
|
||||
"path": args.path,
|
||||
"project": args.project,
|
||||
"since": args.since,
|
||||
"limit": args.limit,
|
||||
});
|
||||
+
|
||||
+ let resolved_input = serde_json::json!({
|
||||
+ "mode": run.mode,
|
||||
+ "db_path": run.resolved_input.db_path,
|
||||
+ "project_id": run.resolved_input.project_id,
|
||||
+ "project_path": run.resolved_input.project_path,
|
||||
+ "since_ms": run.resolved_input.since_ms,
|
||||
+ "since_iso": run.resolved_input.since_iso,
|
||||
+ "limit": run.resolved_input.limit,
|
||||
+ });
|
||||
@@
|
||||
- data: WhoJsonData {
|
||||
- mode: mode.to_string(),
|
||||
- input,
|
||||
- result: data,
|
||||
- },
|
||||
+ data: WhoJsonData { mode: mode.to_string(), input, resolved_input, result: data },
|
||||
meta: RobotMeta { elapsed_ms },
|
||||
};
|
||||
@@
|
||||
struct WhoJsonData {
|
||||
mode: String,
|
||||
input: serde_json::Value,
|
||||
+ resolved_input: serde_json::Value,
|
||||
#[serde(flatten)]
|
||||
result: serde_json::Value,
|
||||
}
|
||||
|
||||
Change 2 — Remove dynamic SQL format!(..LIMIT {limit}) and parameterize LIMIT everywhere
|
||||
Why
|
||||
|
||||
You explicitly prefer static SQL ((?N IS NULL OR ...)) to avoid subtle bugs; but Workload/Active use format! for LIMIT. Even though limit is typed, it’s an inconsistency that complicates statement caching and encourages future string assembly creep.
|
||||
|
||||
SQLite supports LIMIT ? with bound parameters; rusqlite can bind an i64.
|
||||
|
||||
Plan diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@
|
||||
- let issues_sql = format!(
|
||||
- "SELECT ...
|
||||
- ORDER BY i.updated_at DESC
|
||||
- LIMIT {limit}"
|
||||
- );
|
||||
- let mut stmt = conn.prepare(&issues_sql)?;
|
||||
+ let issues_sql =
|
||||
+ "SELECT ...
|
||||
+ ORDER BY i.updated_at DESC
|
||||
+ LIMIT ?4";
|
||||
+ let mut stmt = conn.prepare(issues_sql)?;
|
||||
let assigned_issues: Vec<WorkloadIssue> = stmt
|
||||
- .query_map(rusqlite::params![username, project_id, since_ms], |row| {
|
||||
+ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| {
|
||||
@@
|
||||
- let authored_sql = format!(
|
||||
- "SELECT ...
|
||||
- ORDER BY m.updated_at DESC
|
||||
- LIMIT {limit}"
|
||||
- );
|
||||
- let mut stmt = conn.prepare(&authored_sql)?;
|
||||
+ let authored_sql =
|
||||
+ "SELECT ...
|
||||
+ ORDER BY m.updated_at DESC
|
||||
+ LIMIT ?4";
|
||||
+ let mut stmt = conn.prepare(authored_sql)?;
|
||||
@@
|
||||
- .query_map(rusqlite::params![username, project_id, since_ms], |row| {
|
||||
+ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| {
|
||||
@@
|
||||
- let reviewing_sql = format!(
|
||||
- "SELECT ...
|
||||
- ORDER BY m.updated_at DESC
|
||||
- LIMIT {limit}"
|
||||
- );
|
||||
- let mut stmt = conn.prepare(&reviewing_sql)?;
|
||||
+ let reviewing_sql =
|
||||
+ "SELECT ...
|
||||
+ ORDER BY m.updated_at DESC
|
||||
+ LIMIT ?4";
|
||||
+ let mut stmt = conn.prepare(reviewing_sql)?;
|
||||
@@
|
||||
- .query_map(rusqlite::params![username, project_id, since_ms], |row| {
|
||||
+ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| {
|
||||
@@
|
||||
- let disc_sql = format!(
|
||||
- "SELECT ...
|
||||
- ORDER BY d.last_note_at DESC
|
||||
- LIMIT {limit}"
|
||||
- );
|
||||
- let mut stmt = conn.prepare(&disc_sql)?;
|
||||
+ let disc_sql =
|
||||
+ "SELECT ...
|
||||
+ ORDER BY d.last_note_at DESC
|
||||
+ LIMIT ?4";
|
||||
+ let mut stmt = conn.prepare(disc_sql)?;
|
||||
@@
|
||||
- .query_map(rusqlite::params![username, project_id, since_ms], |row| {
|
||||
+ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| {
|
||||
@@
|
||||
- let sql = format!(
|
||||
- "SELECT ...
|
||||
- ORDER BY d.last_note_at DESC
|
||||
- LIMIT {limit}"
|
||||
- );
|
||||
- let mut stmt = conn.prepare(&sql)?;
|
||||
+ let sql =
|
||||
+ "SELECT ...
|
||||
+ ORDER BY d.last_note_at DESC
|
||||
+ LIMIT ?3";
|
||||
+ let mut stmt = conn.prepare(sql)?;
|
||||
@@
|
||||
- .query_map(rusqlite::params![since_ms, project_id], |row| {
|
||||
+ .query_map(rusqlite::params![since_ms, project_id, limit as i64], |row| {
|
||||
|
||||
Change 3 — Fix path matching for dotless files (LICENSE/Makefile) via “exact OR prefix” (no new flags)
|
||||
Why
|
||||
|
||||
Your improved “dot only in last segment” heuristic still fails on dotless files (LICENSE, Makefile, Dockerfile) which are common, especially at repo root. Right now they’ll be treated as directories (LICENSE/%) and silently return nothing.
|
||||
|
||||
Best minimal UX: if user provides a path that’s ambiguous (no trailing slash), match either exact file OR directory prefix.
|
||||
|
||||
Plan diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@
|
||||
-/// Build a LIKE pattern from a user-supplied path, with proper LIKE escaping.
|
||||
-///
|
||||
-/// Rules:
|
||||
-/// - If the path ends with `/`, it's a directory prefix → `escaped_path%`
|
||||
-/// - If the last path segment contains `.`, it's a file → exact match
|
||||
-/// - Otherwise, it's a directory prefix → `escaped_path/%`
|
||||
+/// Build an exact + prefix match from a user-supplied path, with proper LIKE escaping.
|
||||
+///
|
||||
+/// Rules:
|
||||
+/// - If the path ends with `/`, treat as directory-only (prefix match)
|
||||
+/// - Otherwise, treat as ambiguous: exact match OR directory prefix
|
||||
+/// (fixes dotless files like LICENSE/Makefile without requiring new flags)
|
||||
@@
|
||||
-fn build_path_pattern(path: &str) -> String {
|
||||
+struct PathMatch {
|
||||
+ exact: String,
|
||||
+ prefix: String,
|
||||
+ dir_only: bool,
|
||||
+}
|
||||
+
|
||||
+fn build_path_match(path: &str) -> PathMatch {
|
||||
let trimmed = path.trim_end_matches('/');
|
||||
- let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
|
||||
- let is_file = !path.ends_with('/') && last_segment.contains('.');
|
||||
let escaped = escape_like(trimmed);
|
||||
-
|
||||
- if is_file {
|
||||
- escaped
|
||||
- } else {
|
||||
- format!("{escaped}/%")
|
||||
- }
|
||||
+ PathMatch {
|
||||
+ exact: escaped.clone(),
|
||||
+ prefix: format!("{escaped}/%"),
|
||||
+ dir_only: path.ends_with('/'),
|
||||
+ }
|
||||
}
|
||||
@@
|
||||
- let path_pattern = build_path_pattern(path);
|
||||
+ let pm = build_path_match(path);
|
||||
@@
|
||||
- AND n.position_new_path LIKE ?1 ESCAPE '\\'
|
||||
+ AND (
|
||||
+ (?4 = 1 AND n.position_new_path LIKE ?2 ESCAPE '\\')
|
||||
+ OR (?4 = 0 AND (n.position_new_path = ?1 OR n.position_new_path LIKE ?2 ESCAPE '\\'))
|
||||
+ )
|
||||
@@
|
||||
- let rows: Vec<(String, String, u32, i64)> = stmt
|
||||
- .query_map(rusqlite::params![path_pattern, since_ms, project_id], |row| {
|
||||
+ let rows: Vec<(String, String, u32, i64)> = stmt
|
||||
+ .query_map(rusqlite::params![pm.exact, pm.prefix, since_ms, i32::from(pm.dir_only), project_id], |row| {
|
||||
Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
|
||||
})?
|
||||
|
||||
|
||||
(Apply the same pattern to Overlap mode.)
|
||||
|
||||
Change 4 — Consistently exclude system notes in all DiffNote-based branches (Expert/Overlap author branches currently don’t)
|
||||
Why
|
||||
|
||||
You filter n.is_system = 0 for reviewer branches, but not in the author branches of Expert/Overlap. That can skew “author touch” via system-generated diff notes or bot activity.
|
||||
|
||||
Consistency here improves correctness and also enables more aggressive partial indexing.
|
||||
|
||||
Plan diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@
|
||||
- WHERE n.note_type = 'DiffNote'
|
||||
+ WHERE n.note_type = 'DiffNote'
|
||||
AND n.position_new_path LIKE ?1 ESCAPE '\\'
|
||||
+ AND n.is_system = 0
|
||||
AND m.author_username IS NOT NULL
|
||||
AND n.created_at >= ?2
|
||||
AND (?3 IS NULL OR m.project_id = ?3)
|
||||
@@
|
||||
- WHERE n.note_type = 'DiffNote'
|
||||
+ WHERE n.note_type = 'DiffNote'
|
||||
AND n.position_new_path LIKE ?1 ESCAPE '\\'
|
||||
+ AND n.is_system = 0
|
||||
AND m.state IN ('opened', 'merged')
|
||||
AND m.author_username IS NOT NULL
|
||||
AND n.created_at >= ?2
|
||||
AND (?3 IS NULL OR m.project_id = ?3)
|
||||
|
||||
Change 5 — Rework Migration 017 indexes to match real predicates + add one critical notes index for discussion participation
|
||||
Why
|
||||
|
||||
(a) idx_notes_diffnote_path_created currently leads with note_type even though it’s constant via partial index. You want the leading columns to match your most selective predicates: position_new_path prefix + created_at range, with optional project_id.
|
||||
|
||||
(b) Active + Workload discussion participation repeatedly hits notes by (discussion_id, author_username); you only guarantee notes(discussion_id) is indexed. Adding a narrow partial composite index pays off immediately for both “participants” and “EXISTS user participated” checks.
|
||||
|
||||
(c) The discussions index should focus on (project_id, last_note_at) with a partial predicate; resolvable/resolved a_
|
||||
@@ -1,471 +0,0 @@
|
||||
Below are the revisions I’d make to iteration 2 to improve correctness, determinism, query-plan quality, and multi-project usability without turning this into a bigger product.
|
||||
|
||||
I’m treating your plan as the “source of truth” and showing git-diff style patches against the plan text/code blocks you included.
|
||||
|
||||
Change 1 — Fix project scoping to hit the right index (DiffNote branches)
|
||||
Why
|
||||
|
||||
Your hot-path index is:
|
||||
|
||||
idx_notes_diffnote_path_created ON notes(position_new_path, created_at, project_id) WHERE note_type='DiffNote' AND is_system=0
|
||||
|
||||
But in Expert/Overlap you sometimes scope by m.project_id = ?3 (MR table), not n.project_id = ?3 (notes table). That weakens the optimizer’s ability to use the composite notes index (and can force broader joins before filtering).
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ Query: Expert Mode @@
|
||||
- AND (?3 IS NULL OR m.project_id = ?3)
|
||||
+ -- IMPORTANT: scope on notes.project_id to maximize use of
|
||||
+ -- idx_notes_diffnote_path_created (notes is the selective table)
|
||||
+ AND (?3 IS NULL OR n.project_id = ?3)
|
||||
|
||||
@@ Query: Overlap Mode @@
|
||||
- AND (?3 IS NULL OR m.project_id = ?3)
|
||||
+ AND (?3 IS NULL OR n.project_id = ?3)
|
||||
|
||||
@@ Query: Overlap Mode (author branch) @@
|
||||
- AND (?3 IS NULL OR m.project_id = ?3)
|
||||
+ AND (?3 IS NULL OR n.project_id = ?3)
|
||||
|
||||
Change 2 — Introduce a “prefix vs exact” path query to avoid LIKE when you don’t need it
|
||||
Why
|
||||
|
||||
For exact file paths (e.g. src/auth/login.rs), you currently do:
|
||||
|
||||
position_new_path LIKE ?1 ESCAPE '\' where ?1 has no wildcard
|
||||
|
||||
That’s logically fine, but it’s a worse signal to the planner than = and can degrade performance depending on collation/case settings.
|
||||
|
||||
This doesn’t violate “static SQL” — you can pick between two static query strings.
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ Helper: Path Pattern Construction @@
|
||||
-fn build_path_pattern(path: &str) -> String {
|
||||
+struct PathQuery {
|
||||
+ /// The parameter value to bind.
|
||||
+ value: String,
|
||||
+ /// If true: use LIKE value || '%'. If false: use '='.
|
||||
+ is_prefix: bool,
|
||||
+}
|
||||
+
|
||||
+fn build_path_query(path: &str) -> PathQuery {
|
||||
let trimmed = path.trim_end_matches('/');
|
||||
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
|
||||
let is_file = !path.ends_with('/') && last_segment.contains('.');
|
||||
let escaped = escape_like(trimmed);
|
||||
|
||||
if is_file {
|
||||
- escaped
|
||||
+ PathQuery { value: escaped, is_prefix: false }
|
||||
} else {
|
||||
- format!("{escaped}/%")
|
||||
+ PathQuery { value: format!("{escaped}/%"), is_prefix: true }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
And then (example for DiffNote predicates):
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@ Query: Expert Mode @@
|
||||
- let path_pattern = build_path_pattern(path);
|
||||
+ let pq = build_path_query(path);
|
||||
|
||||
- let sql = " ... n.position_new_path LIKE ?1 ESCAPE '\\' ... ";
|
||||
+ let sql_prefix = " ... n.position_new_path LIKE ?1 ESCAPE '\\' ... ";
|
||||
+ let sql_exact = " ... n.position_new_path = ?1 ... ";
|
||||
|
||||
- let mut stmt = conn.prepare(sql)?;
|
||||
+ let mut stmt = if pq.is_prefix { conn.prepare_cached(sql_prefix)? }
|
||||
+ else { conn.prepare_cached(sql_exact)? };
|
||||
let rows = stmt.query_map(params![... pq.value ...], ...);
|
||||
|
||||
Change 3 — Push Expert aggregation into SQL (less Rust, fewer rows, SQL-level LIMIT)
|
||||
Why
|
||||
|
||||
Right now Expert does:
|
||||
|
||||
UNION ALL
|
||||
|
||||
return per-role rows
|
||||
|
||||
HashMap merge
|
||||
|
||||
score compute
|
||||
|
||||
sort/truncate
|
||||
|
||||
You can do all of that in SQL deterministically, then LIMIT ?N actually works.
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ Query: Expert Mode @@
|
||||
- let sql = "SELECT username, role, activity_count, last_active_at FROM (
|
||||
- ...
|
||||
- )";
|
||||
+ let sql = "
|
||||
+ WITH activity AS (
|
||||
+ SELECT
|
||||
+ n.author_username AS username,
|
||||
+ 'reviewer' AS role,
|
||||
+ COUNT(*) AS cnt,
|
||||
+ MAX(n.created_at) AS last_active_at
|
||||
+ FROM notes n
|
||||
+ WHERE n.note_type = 'DiffNote'
|
||||
+ AND n.is_system = 0
|
||||
+ AND n.author_username IS NOT NULL
|
||||
+ AND n.created_at >= ?2
|
||||
+ AND (?3 IS NULL OR n.project_id = ?3)
|
||||
+ AND (
|
||||
+ (?4 = 1 AND n.position_new_path LIKE ?1 ESCAPE '\\') OR
|
||||
+ (?4 = 0 AND n.position_new_path = ?1)
|
||||
+ )
|
||||
+ GROUP BY n.author_username
|
||||
+
|
||||
+ UNION ALL
|
||||
+
|
||||
+ SELECT
|
||||
+ m.author_username AS username,
|
||||
+ 'author' AS role,
|
||||
+ COUNT(DISTINCT m.id) AS cnt,
|
||||
+ MAX(n.created_at) AS last_active_at
|
||||
+ FROM merge_requests m
|
||||
+ JOIN discussions d ON d.merge_request_id = m.id
|
||||
+ JOIN notes n ON n.discussion_id = d.id
|
||||
+ WHERE n.note_type = 'DiffNote'
|
||||
+ AND n.is_system = 0
|
||||
+ AND m.author_username IS NOT NULL
|
||||
+ AND n.created_at >= ?2
|
||||
+ AND (?3 IS NULL OR n.project_id = ?3)
|
||||
+ AND (
|
||||
+ (?4 = 1 AND n.position_new_path LIKE ?1 ESCAPE '\\') OR
|
||||
+ (?4 = 0 AND n.position_new_path = ?1)
|
||||
+ )
|
||||
+ GROUP BY m.author_username
|
||||
+ )
|
||||
+ SELECT
|
||||
+ username,
|
||||
+ SUM(CASE WHEN role='reviewer' THEN cnt ELSE 0 END) AS review_count,
|
||||
+ SUM(CASE WHEN role='author' THEN cnt ELSE 0 END) AS author_count,
|
||||
+ MAX(last_active_at) AS last_active_at,
|
||||
+ (SUM(CASE WHEN role='reviewer' THEN cnt ELSE 0 END) * 3.0) +
|
||||
+ (SUM(CASE WHEN role='author' THEN cnt ELSE 0 END) * 2.0) AS score
|
||||
+ FROM activity
|
||||
+ GROUP BY username
|
||||
+ ORDER BY score DESC, last_active_at DESC, username ASC
|
||||
+ LIMIT ?5
|
||||
+ ";
|
||||
|
||||
- // Aggregate by username: combine reviewer + author counts
|
||||
- let mut user_map: HashMap<...> = HashMap::new();
|
||||
- ...
|
||||
- experts.sort_by(...); experts.truncate(limit);
|
||||
+ // No Rust-side merge/sort needed; SQL already returns final rows.
|
||||
|
||||
Change 4 — Overlap output is ambiguous across projects: include stable MR refs (project_path!iid)
|
||||
Why
|
||||
|
||||
mr_iids: Vec<i64> is ambiguous in a multi-project DB. !123 only means something with a project.
|
||||
|
||||
Also: your MR IID dedup is currently Vec.contains() inside a loop (O(n²)). Use a HashSet.
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ OverlapResult @@
|
||||
pub struct OverlapUser {
|
||||
pub username: String,
|
||||
@@
|
||||
- pub mr_iids: Vec<i64>,
|
||||
+ /// Stable MR references like "group/project!123"
|
||||
+ pub mr_refs: Vec<String>,
|
||||
}
|
||||
|
||||
@@ Query: Overlap Mode (SQL) @@
|
||||
- GROUP_CONCAT(DISTINCT m.iid) AS mr_iids
|
||||
+ GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs
|
||||
FROM notes n
|
||||
JOIN discussions d ON n.discussion_id = d.id
|
||||
JOIN merge_requests m ON d.merge_request_id = m.id
|
||||
+ JOIN projects p ON m.project_id = p.id
|
||||
@@
|
||||
- GROUP_CONCAT(DISTINCT m.iid) AS mr_iids
|
||||
+ GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs
|
||||
FROM merge_requests m
|
||||
JOIN discussions d ON d.merge_request_id = m.id
|
||||
JOIN notes n ON n.discussion_id = d.id
|
||||
+ JOIN projects p ON m.project_id = p.id
|
||||
|
||||
@@ Query: Overlap Mode (Rust merge) @@
|
||||
- let mr_iids: Vec<i64> = mr_iids_csv ...
|
||||
+ let mr_refs: Vec<String> = mr_refs_csv
|
||||
+ .as_deref()
|
||||
+ .map(|csv| csv.split(',').map(|s| s.trim().to_string()).collect())
|
||||
+ .unwrap_or_default();
|
||||
@@
|
||||
- // Merge MR IIDs, deduplicate
|
||||
- for iid in &mr_iids {
|
||||
- if !entry.mr_iids.contains(iid) {
|
||||
- entry.mr_iids.push(*iid);
|
||||
- }
|
||||
- }
|
||||
+ // Merge MR refs, deduplicate
|
||||
+ use std::collections::HashSet;
|
||||
+ let mut set: HashSet<String> = entry.mr_refs.drain(..).collect();
|
||||
+ for r in mr_refs { set.insert(r); }
|
||||
+ entry.mr_refs = set.into_iter().collect();
|
||||
|
||||
Change 5 — Active mode: avoid correlated subqueries by preselecting discussions, then aggregating notes once
|
||||
Why
|
||||
|
||||
Your Active query does two correlated subqueries per discussion row:
|
||||
|
||||
note_count
|
||||
|
||||
participants
|
||||
|
||||
With LIMIT 20 it’s not catastrophic, but it is still unnecessary work and creates “spiky” behavior if the planner chooses poorly.
|
||||
|
||||
Pattern to use:
|
||||
|
||||
CTE selects the limited set of discussions
|
||||
|
||||
Join notes once, aggregate with GROUP BY
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ Query: Active Mode @@
|
||||
- let sql =
|
||||
- "SELECT
|
||||
- d.noteable_type,
|
||||
- ...
|
||||
- (SELECT COUNT(*) FROM notes n
|
||||
- WHERE n.discussion_id = d.id AND n.is_system = 0) AS note_count,
|
||||
- (SELECT GROUP_CONCAT(username, X'1F') FROM (
|
||||
- SELECT DISTINCT n.author_username AS username
|
||||
- FROM notes n
|
||||
- WHERE n.discussion_id = d.id
|
||||
- AND n.is_system = 0
|
||||
- AND n.author_username IS NOT NULL
|
||||
- ORDER BY username
|
||||
- )) AS participants
|
||||
- FROM discussions d
|
||||
- ...
|
||||
- LIMIT ?3";
|
||||
+ let sql = "
|
||||
+ WITH picked AS (
|
||||
+ SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, d.project_id, d.last_note_at
|
||||
+ FROM discussions d
|
||||
+ WHERE d.resolvable = 1 AND d.resolved = 0
|
||||
+ AND d.last_note_at >= ?1
|
||||
+ AND (?2 IS NULL OR d.project_id = ?2)
|
||||
+ ORDER BY d.last_note_at DESC
|
||||
+ LIMIT ?3
|
||||
+ ),
|
||||
+ note_agg AS (
|
||||
+ SELECT
|
||||
+ n.discussion_id,
|
||||
+ COUNT(*) AS note_count,
|
||||
+ GROUP_CONCAT(n.author_username, X'1F') AS participants
|
||||
+ FROM (
|
||||
+ SELECT DISTINCT discussion_id, author_username
|
||||
+ FROM notes
|
||||
+ WHERE is_system = 0 AND author_username IS NOT NULL
|
||||
+ ) n
|
||||
+ JOIN picked p ON p.id = n.discussion_id
|
||||
+ GROUP BY n.discussion_id
|
||||
+ )
|
||||
+ SELECT
|
||||
+ p.noteable_type,
|
||||
+ COALESCE(i.iid, m.iid) AS entity_iid,
|
||||
+ COALESCE(i.title, m.title) AS entity_title,
|
||||
+ proj.path_with_namespace,
|
||||
+ p.last_note_at,
|
||||
+ COALESCE(na.note_count, 0) AS note_count,
|
||||
+ COALESCE(na.participants, '') AS participants
|
||||
+ FROM picked p
|
||||
+ JOIN projects proj ON p.project_id = proj.id
|
||||
+ LEFT JOIN issues i ON p.issue_id = i.id
|
||||
+ LEFT JOIN merge_requests m ON p.merge_request_id = m.id
|
||||
+ LEFT JOIN note_agg na ON na.discussion_id = p.id
|
||||
+ ORDER BY p.last_note_at DESC
|
||||
+ ";
|
||||
|
||||
Change 6 — Use prepare_cached() everywhere (cheap perf win, no scope creep)
|
||||
Why
|
||||
|
||||
You already worked hard to keep SQL static. Taking advantage of sqlite statement caching completes the loop.
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ Query functions @@
|
||||
- let mut stmt = conn.prepare(sql)?;
|
||||
+ let mut stmt = conn.prepare_cached(sql)?;
|
||||
|
||||
|
||||
Apply in all query fns (query_workload, query_reviews, query_active, query_expert, query_overlap, lookup_project_path).
|
||||
|
||||
Change 7 — Human output: show project_path where ambiguity exists (Workload + Overlap)
|
||||
Why
|
||||
|
||||
When not project-scoped, #42 and !100 aren’t unique. You already have project paths in the query results — you’re just not printing them.
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ print_workload_human @@
|
||||
- println!(
|
||||
- " {} {} {}",
|
||||
+ println!(
|
||||
+ " {} {} {} {}",
|
||||
style(format!("#{:<5}", item.iid)).cyan(),
|
||||
truncate_str(&item.title, 45),
|
||||
style(format_relative_time(item.updated_at)).dim(),
|
||||
+ style(&item.project_path).dim(),
|
||||
);
|
||||
|
||||
@@ print_workload_human (MRs) @@
|
||||
- println!(
|
||||
- " {} {}{} {}",
|
||||
+ println!(
|
||||
+ " {} {}{} {} {}",
|
||||
style(format!("!{:<5}", mr.iid)).cyan(),
|
||||
truncate_str(&mr.title, 40),
|
||||
style(draft).dim(),
|
||||
style(format_relative_time(mr.updated_at)).dim(),
|
||||
+ style(&mr.project_path).dim(),
|
||||
);
|
||||
|
||||
@@ print_overlap_human @@
|
||||
- let mr_str = user.mr_iids.iter().take(5).map(|iid| format!("!{iid}")).collect::<Vec<_>>().join(", ");
|
||||
+ let mr_str = user.mr_refs.iter().take(5).cloned().collect::<Vec<_>>().join(", ");
|
||||
|
||||
Change 8 — Robot JSON: add stable IDs + “defaulted” flags for reproducibility
|
||||
Why
|
||||
|
||||
You already added resolved_input — good. Two more reproducibility gaps remain:
|
||||
|
||||
Agents can’t reliably “open” an entity without IDs (discussion_id, mr_id, issue_id).
|
||||
|
||||
Agents can’t tell whether since was user-provided vs defaulted (important when replaying intent).
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ WhoResolvedInput @@
|
||||
pub struct WhoResolvedInput {
|
||||
@@
|
||||
pub since_ms: Option<i64>,
|
||||
pub since_iso: Option<String>,
|
||||
+ pub since_was_default: bool,
|
||||
pub limit: usize,
|
||||
}
|
||||
|
||||
@@ run_who @@
|
||||
- let since_ms = resolve_since(args.since.as_deref(), "6m")?;
|
||||
+ let since_was_default = args.since.is_none();
|
||||
+ let since_ms = resolve_since(args.since.as_deref(), "6m")?;
|
||||
Ok(WhoRun {
|
||||
resolved_input: WhoResolvedInput {
|
||||
@@
|
||||
since_ms: Some(since_ms),
|
||||
since_iso: Some(ms_to_iso(since_ms)),
|
||||
+ since_was_default,
|
||||
limit: args.limit,
|
||||
},
|
||||
|
||||
@@ print_who_json resolved_input @@
|
||||
let resolved_input = serde_json::json!({
|
||||
@@
|
||||
"since_ms": run.resolved_input.since_ms,
|
||||
"since_iso": run.resolved_input.since_iso,
|
||||
+ "since_was_default": run.resolved_input.since_was_default,
|
||||
"limit": run.resolved_input.limit,
|
||||
});
|
||||
|
||||
|
||||
And for Active/Workload discussion items, add IDs in SQL and JSON:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@ ActiveDiscussion @@
|
||||
pub struct ActiveDiscussion {
|
||||
+ pub discussion_id: i64,
|
||||
@@
|
||||
}
|
||||
|
||||
@@ query_active SELECT @@
|
||||
- SELECT
|
||||
- p.noteable_type,
|
||||
+ SELECT
|
||||
+ p.id AS discussion_id,
|
||||
+ p.noteable_type,
|
||||
|
||||
@@ active_to_json @@
|
||||
- "discussions": r.discussions.iter().map(|d| json!({
|
||||
+ "discussions": r.discussions.iter().map(|d| json!({
|
||||
+ "discussion_id": d.discussion_id,
|
||||
...
|
||||
}))
|
||||
|
||||
Change 9 — Make performance verification explicit: require EXPLAIN QUERY PLAN checks for each mode
|
||||
Why
|
||||
|
||||
You’re adding indexes specifically for these queries. The only way to ensure the planner is doing what you think is to lock in a short perf checklist (especially after schema drift or SQLite version differences).
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
--- a/who-command-design.md
|
||||
+++ b/who-command-design.md
|
||||
@@ Verification @@
|
||||
# Manual verification against real data
|
||||
cargo run --release -- who src/features/global-search/
|
||||
@@
|
||||
cargo run --release -- who src/features/global-search/ -p typescript # project scoped
|
||||
+
|
||||
+# Perf verification (required before merge):
|
||||
+# Confirm idx_notes_diffnote_path_created is used for Expert/Overlap and
|
||||
+# idx_discussions_unresolved_recent is used for Active.
|
||||
+sqlite3 path/to/db.sqlite "
|
||||
+ EXPLAIN QUERY PLAN
|
||||
+ SELECT ... -- paste final Expert SQL with representative bindings
|
||||
+";
|
||||
|
||||
|
||||
(Keep it lightweight: one representative query per mode is enough.)
|
||||
|
||||
Net effect
|
||||
|
||||
Correctness: project scoping hits the notes index; IDs added for agent workflows.
|
||||
|
||||
Performance: fewer rows/materialization in Expert; statement caching everywhere; Active avoids correlated subqueries.
|
||||
|
||||
UX: human output no longer ambiguous across projects; Overlap MR references become actionable.
|
||||
|
||||
Reproducibility: agents can distinguish defaults vs explicit inputs; can dereference entities reliably.
|
||||
|
||||
If you want one “highest ROI” subset to implement first: Change 1 + Change 4 + Change 6 + Change 7. That’s where the real operational value lands.
|
||||
@@ -1,3 +0,0 @@
|
||||
ChatGPT said:
|
||||
Proposing code revisions for performance and determinism
|
||||
Answer now
|
||||
@@ -1,356 +0,0 @@
|
||||
Below are the highest-leverage revisions I’d make. They’re tightly scoped (no new tables/APIs), but fix a few real correctness issues and make the outputs more actionable.
|
||||
|
||||
1) Fix a correctness bug in PathQuery: don’t escape for =, and make --path Makefile actually work
|
||||
Why
|
||||
|
||||
Bug: build_path_query() currently runs escape_like() even when is_prefix = false (exact match). That will break exact matches for paths containing _, %, or \ because = does not treat those as metacharacters (so the escaped string won’t equal the stored path).
|
||||
|
||||
UX mismatch: The plan says --path handles dotless root files (Makefile/LICENSE), but the current logic still treats them as directory prefixes (Makefile/%) → zero results.
|
||||
|
||||
Change
|
||||
|
||||
Only escape for LIKE.
|
||||
|
||||
Treat root paths (no /) passed via --path as exact matches by default (unless they end with /).
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
-/// Build a path query from a user-supplied path.
|
||||
-///
|
||||
-/// Rules:
|
||||
-/// - If the path ends with `/`, it's a directory prefix -> `escaped_path%` (LIKE)
|
||||
-/// - If the last path segment contains `.`, it's a file -> exact match (=)
|
||||
-/// - Otherwise, it's a directory prefix -> `escaped_path/%` (LIKE)
|
||||
+/// Build a path query from a user-supplied path.
|
||||
+///
|
||||
+/// Rules:
|
||||
+/// - If the path ends with `/`, it's a directory prefix -> `escaped_path/%` (LIKE)
|
||||
+/// - If the path is a root path (no `/`) and does NOT end with `/`, treat as exact (=)
|
||||
+/// (this makes `--path Makefile` and `--path LICENSE` work as intended)
|
||||
+/// - Else if the last path segment contains `.`, treat as exact (=)
|
||||
+/// - Otherwise, treat as directory prefix -> `escaped_path/%` (LIKE)
|
||||
@@
|
||||
-fn build_path_query(path: &str) -> PathQuery {
|
||||
+fn build_path_query(path: &str) -> PathQuery {
|
||||
let trimmed = path.trim_end_matches('/');
|
||||
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
|
||||
- let is_file = !path.ends_with('/') && last_segment.contains('.');
|
||||
- let escaped = escape_like(trimmed);
|
||||
+ let is_root = !trimmed.contains('/');
|
||||
+ let is_file = !path.ends_with('/') && (is_root || last_segment.contains('.'));
|
||||
|
||||
if is_file {
|
||||
PathQuery {
|
||||
- value: escaped,
|
||||
+ // IMPORTANT: do NOT escape for exact match (=)
|
||||
+ value: trimmed.to_string(),
|
||||
is_prefix: false,
|
||||
}
|
||||
} else {
|
||||
+ let escaped = escape_like(trimmed);
|
||||
PathQuery {
|
||||
value: format!("{escaped}/%"),
|
||||
is_prefix: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@
|
||||
-/// **Known limitation:** Dotless root files (LICENSE, Makefile, Dockerfile)
|
||||
-/// without a trailing `/` will be treated as directory prefixes. Use `--path`
|
||||
-/// for these — the `--path` flag passes through to Expert mode directly,
|
||||
-/// and the `build_path_query` output for "LICENSE" is a prefix `LICENSE/%`
|
||||
-/// which will simply return zero results (a safe, obvious failure mode that the
|
||||
-/// help text addresses).
|
||||
+/// Note: Root file paths passed via `--path` (including dotless files like Makefile/LICENSE)
|
||||
+/// are treated as exact matches unless they end with `/`.
|
||||
|
||||
|
||||
Also update the --path help text to be explicit:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
- /// Force expert mode for a file/directory path (handles root files like
|
||||
- /// README.md, LICENSE, Makefile that lack a / and can't be auto-detected)
|
||||
+ /// Force expert mode for a file/directory path.
|
||||
+ /// Root files (README.md, LICENSE, Makefile) are treated as exact matches.
|
||||
+ /// Use a trailing `/` to force directory-prefix matching.
|
||||
|
||||
2) Fix Active mode: your note_count is currently counting participants, and the CTE scans too broadly
|
||||
Why
|
||||
|
||||
In note_agg, you do SELECT DISTINCT discussion_id, author_username and then COUNT(*) AS note_count. That’s participant count, not note count.
|
||||
|
||||
The current note_agg also builds the DISTINCT set from all notes then joins to picked. It’s avoidable work.
|
||||
|
||||
Change
|
||||
|
||||
Split into two aggregations scoped to picked:
|
||||
|
||||
note_counts: counts non-system notes per picked discussion.
|
||||
|
||||
participants: distinct usernames per picked discussion, then GROUP_CONCAT.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
- note_agg AS (
|
||||
- SELECT
|
||||
- n.discussion_id,
|
||||
- COUNT(*) AS note_count,
|
||||
- GROUP_CONCAT(n.author_username, X'1F') AS participants
|
||||
- FROM (
|
||||
- SELECT DISTINCT discussion_id, author_username
|
||||
- FROM notes
|
||||
- WHERE is_system = 0 AND author_username IS NOT NULL
|
||||
- ) n
|
||||
- JOIN picked p ON p.id = n.discussion_id
|
||||
- GROUP BY n.discussion_id
|
||||
- )
|
||||
+ note_counts AS (
|
||||
+ SELECT
|
||||
+ n.discussion_id,
|
||||
+ COUNT(*) AS note_count
|
||||
+ FROM notes n
|
||||
+ JOIN picked p ON p.id = n.discussion_id
|
||||
+ WHERE n.is_system = 0
|
||||
+ GROUP BY n.discussion_id
|
||||
+ ),
|
||||
+ participants AS (
|
||||
+ SELECT
|
||||
+ x.discussion_id,
|
||||
+ GROUP_CONCAT(x.author_username, X'1F') AS participants
|
||||
+ FROM (
|
||||
+ SELECT DISTINCT n.discussion_id, n.author_username
|
||||
+ FROM notes n
|
||||
+ JOIN picked p ON p.id = n.discussion_id
|
||||
+ WHERE n.is_system = 0 AND n.author_username IS NOT NULL
|
||||
+ ) x
|
||||
+ GROUP BY x.discussion_id
|
||||
+ )
|
||||
@@
|
||||
- LEFT JOIN note_agg na ON na.discussion_id = p.id
|
||||
+ LEFT JOIN note_counts nc ON nc.discussion_id = p.id
|
||||
+ LEFT JOIN participants pa ON pa.discussion_id = p.id
|
||||
@@
|
||||
- COALESCE(na.note_count, 0) AS note_count,
|
||||
- COALESCE(na.participants, '') AS participants
|
||||
+ COALESCE(nc.note_count, 0) AS note_count,
|
||||
+ COALESCE(pa.participants, '') AS participants
|
||||
|
||||
|
||||
Net effect: correctness fix + more predictable perf.
|
||||
|
||||
Add a test that would have failed before:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
#[test]
|
||||
fn test_active_query() {
|
||||
@@
|
||||
- insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/foo.rs", "needs work");
|
||||
+ insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/foo.rs", "needs work");
|
||||
+ insert_diffnote(&conn, 2, 1, 1, "reviewer_b", "src/foo.rs", "follow-up");
|
||||
@@
|
||||
- assert_eq!(result.discussions[0].participants, vec!["reviewer_b"]);
|
||||
+ assert_eq!(result.discussions[0].participants, vec!["reviewer_b"]);
|
||||
+ assert_eq!(result.discussions[0].note_count, 2);
|
||||
|
||||
3) Index fix: idx_discussions_unresolved_recent won’t help global --active ordering
|
||||
Why
|
||||
|
||||
Your index is (project_id, last_note_at) with WHERE resolvable=1 AND resolved=0.
|
||||
|
||||
When --active is not project-scoped (common default), SQLite can’t use (project_id, last_note_at) to satisfy ORDER BY last_note_at DESC efficiently because project_id isn’t constrained.
|
||||
|
||||
This can turn into a scan+sort over potentially large unresolved sets.
|
||||
|
||||
Change
|
||||
|
||||
Keep the project-scoped index, but add a global ordering index (partial, still small):
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
CREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent
|
||||
ON discussions(project_id, last_note_at)
|
||||
WHERE resolvable = 1 AND resolved = 0;
|
||||
+
|
||||
+-- Active (global): unresolved discussions by recency (no project scope).
|
||||
+-- Supports ORDER BY last_note_at DESC LIMIT N when project_id is unconstrained.
|
||||
+CREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent_global
|
||||
+ ON discussions(last_note_at)
|
||||
+ WHERE resolvable = 1 AND resolved = 0;
|
||||
|
||||
4) Make Overlap “touches” coherent: count MRs for reviewers, not DiffNotes
|
||||
Why
|
||||
|
||||
Overlap’s question is “Who else has MRs touching my files?” but:
|
||||
|
||||
reviewer branch uses COUNT(*) (DiffNotes)
|
||||
|
||||
author branch uses COUNT(DISTINCT m.id) (MRs)
|
||||
|
||||
Those are different units; summing them into touch_count is misleading.
|
||||
|
||||
Change
|
||||
|
||||
Count distinct MRs on the reviewer branch too:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
- COUNT(*) AS touch_count,
|
||||
+ COUNT(DISTINCT m.id) AS touch_count,
|
||||
MAX(n.created_at) AS last_touch_at,
|
||||
GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs
|
||||
|
||||
|
||||
Also update human output labeling:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
- style("Touches").bold(),
|
||||
+ style("MRs").bold(),
|
||||
|
||||
|
||||
(You still preserve “strength” via mr_refs and last_touch_at.)
|
||||
|
||||
5) Make outputs more actionable: add a canonical ref field (group/project!iid, group/project#iid)
|
||||
Why
|
||||
|
||||
You already do this for Overlap (mr_refs). Doing the same for Workload and Active reduces friction for both humans and agents:
|
||||
|
||||
humans can copy/paste a single token
|
||||
|
||||
robots don’t need to stitch project_path + iid + prefix
|
||||
|
||||
Change (Workload structs + SQL)
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
pub struct WorkloadIssue {
|
||||
pub iid: i64,
|
||||
+ pub ref_: String,
|
||||
pub title: String,
|
||||
pub project_path: String,
|
||||
pub updated_at: i64,
|
||||
}
|
||||
@@
|
||||
pub struct WorkloadMr {
|
||||
pub iid: i64,
|
||||
+ pub ref_: String,
|
||||
pub title: String,
|
||||
pub draft: bool,
|
||||
pub project_path: String,
|
||||
@@
|
||||
- let issues_sql =
|
||||
- "SELECT i.iid, i.title, p.path_with_namespace, i.updated_at
|
||||
+ let issues_sql =
|
||||
+ "SELECT i.iid,
|
||||
+ (p.path_with_namespace || '#' || i.iid) AS ref,
|
||||
+ i.title, p.path_with_namespace, i.updated_at
|
||||
@@
|
||||
- iid: row.get(0)?,
|
||||
- title: row.get(1)?,
|
||||
- project_path: row.get(2)?,
|
||||
- updated_at: row.get(3)?,
|
||||
+ iid: row.get(0)?,
|
||||
+ ref_: row.get(1)?,
|
||||
+ title: row.get(2)?,
|
||||
+ project_path: row.get(3)?,
|
||||
+ updated_at: row.get(4)?,
|
||||
})
|
||||
@@
|
||||
- let authored_sql =
|
||||
- "SELECT m.iid, m.title, m.draft, p.path_with_namespace, m.updated_at
|
||||
+ let authored_sql =
|
||||
+ "SELECT m.iid,
|
||||
+ (p.path_with_namespace || '!' || m.iid) AS ref,
|
||||
+ m.title, m.draft, p.path_with_namespace, m.updated_at
|
||||
@@
|
||||
- iid: row.get(0)?,
|
||||
- title: row.get(1)?,
|
||||
- draft: row.get::<_, i32>(2)? != 0,
|
||||
- project_path: row.get(3)?,
|
||||
+ iid: row.get(0)?,
|
||||
+ ref_: row.get(1)?,
|
||||
+ title: row.get(2)?,
|
||||
+ draft: row.get::<_, i32>(3)? != 0,
|
||||
+ project_path: row.get(4)?,
|
||||
author_username: None,
|
||||
- updated_at: row.get(4)?,
|
||||
+ updated_at: row.get(5)?,
|
||||
})
|
||||
|
||||
|
||||
Then use ref_ in human output + robot JSON.
|
||||
|
||||
6) Reviews mode: tolerate leading whitespace before **prefix**
|
||||
Why
|
||||
|
||||
Many people write " **suggestion**: ...". Current LIKE '**%**%' misses that.
|
||||
|
||||
Change
|
||||
|
||||
Use ltrim(n.body) consistently:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
- AND n.body LIKE '**%**%'
|
||||
+ AND ltrim(n.body) LIKE '**%**%'
|
||||
@@
|
||||
- SUBSTR(n.body, 3, INSTR(SUBSTR(n.body, 3), '**') - 1) AS raw_prefix,
|
||||
+ SUBSTR(ltrim(n.body), 3, INSTR(SUBSTR(ltrim(n.body), 3), '**') - 1) AS raw_prefix,
|
||||
|
||||
7) Add two small tests that catch the above regressions
|
||||
Why
|
||||
|
||||
These are exactly the kind of issues that slip through without targeted tests.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/plan.md b/plan.md
|
||||
@@
|
||||
#[test]
|
||||
fn test_escape_like() {
|
||||
@@
|
||||
}
|
||||
+
|
||||
+ #[test]
|
||||
+ fn test_build_path_query_exact_does_not_escape() {
|
||||
+ // '_' must not be escaped for '='
|
||||
+ let pq = build_path_query("README_with_underscore.md");
|
||||
+ assert_eq!(pq.value, "README_with_underscore.md");
|
||||
+ assert!(!pq.is_prefix);
|
||||
+ }
|
||||
+
|
||||
+ #[test]
|
||||
+ fn test_path_flag_dotless_root_file_is_exact() {
|
||||
+ let pq = build_path_query("Makefile");
|
||||
+ assert_eq!(pq.value, "Makefile");
|
||||
+ assert!(!pq.is_prefix);
|
||||
+ }
|
||||
|
||||
Summary of net effect
|
||||
|
||||
Correctness fixes: exact-path escaping bug; Active.note_count bug.
|
||||
|
||||
Perf fixes: global --active index; avoid broad note scans in Active.
|
||||
|
||||
Usefulness upgrades: coherent overlap “touch” metric; canonical refs everywhere; reviews prefix more robust.
|
||||
|
||||
If you want one extra “stretch” that still isn’t scope creep: add an unscoped warning line in human output when project_id == None (e.g., “Aggregated across projects; use -p to scope”) for Expert/Overlap/Active. That’s pure presentation, but prevents misinterpretation in multi-project DBs.
|
||||
@@ -1,471 +0,0 @@
|
||||
Proposed revisions (Iteration 6)
|
||||
|
||||
Below are the highest-leverage changes I’d make on top of your current Iteration 5 plan, with rationale and git-diff style edits to the plan text/snippets.
|
||||
|
||||
1) Fix a real edge case: dotless non-root files (src/Dockerfile, infra/Makefile, etc.)
|
||||
Why
|
||||
|
||||
Your current build_path_query() treats dotless last segments as directories (prefix match) unless the path is root. That misclassifies legitimate dotless files inside directories and silently produces path/% (zero hits or wrong hits).
|
||||
|
||||
Best minimal fix: keep your static SQL approach, but add a DB existence probe (static SQL) for path queries:
|
||||
|
||||
If user didn’t force directory (/), and exact path exists in DiffNotes, treat as exact =.
|
||||
|
||||
Otherwise use prefix LIKE 'dir/%'.
|
||||
|
||||
This avoids new CLI flags, avoids heuristics lists, and uses your existing partial index (idx_notes_diffnote_path_created) efficiently.
|
||||
|
||||
Diff
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/Plan.md b/Plan.md
|
||||
@@
|
||||
-struct PathQuery {
|
||||
+struct PathQuery {
|
||||
/// The parameter value to bind.
|
||||
value: String,
|
||||
/// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`.
|
||||
is_prefix: bool,
|
||||
}
|
||||
|
||||
-/// Build a path query from a user-supplied path.
|
||||
+/// Build a path query from a user-supplied path, with a DB probe for dotless files.
|
||||
@@
|
||||
-fn build_path_query(path: &str) -> PathQuery {
|
||||
+fn build_path_query(conn: &Connection, path: &str) -> Result<PathQuery> {
|
||||
let trimmed = path.trim_end_matches('/');
|
||||
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
|
||||
let is_root = !trimmed.contains('/');
|
||||
- let is_file = !path.ends_with('/') && (is_root || last_segment.contains('.'));
|
||||
+ let forced_dir = path.ends_with('/');
|
||||
+ let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
|
||||
+
|
||||
+ // If it doesn't "look like a file" but the exact path exists in DiffNotes,
|
||||
+ // treat as exact (handles src/Dockerfile, infra/Makefile, etc.).
|
||||
+ let exact_exists = if !looks_like_file && !forced_dir {
|
||||
+ conn.query_row(
|
||||
+ "SELECT 1
|
||||
+ FROM notes
|
||||
+ WHERE note_type = 'DiffNote'
|
||||
+ AND is_system = 0
|
||||
+ AND position_new_path = ?1
|
||||
+ LIMIT 1",
|
||||
+ rusqlite::params![trimmed],
|
||||
+ |_| Ok(()),
|
||||
+ ).is_ok()
|
||||
+ } else {
|
||||
+ false
|
||||
+ };
|
||||
+
|
||||
+ let is_file = looks_like_file || exact_exists;
|
||||
|
||||
if is_file {
|
||||
PathQuery {
|
||||
value: trimmed.to_string(),
|
||||
is_prefix: false,
|
||||
}
|
||||
} else {
|
||||
let escaped = escape_like(trimmed);
|
||||
PathQuery {
|
||||
value: format!("{escaped}/%"),
|
||||
is_prefix: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Also update callers:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- let pq = build_path_query(path);
|
||||
+ let pq = build_path_query(conn, path)?;
|
||||
@@
|
||||
- let pq = build_path_query(path);
|
||||
+ let pq = build_path_query(conn, path)?;
|
||||
|
||||
|
||||
And tests:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- fn test_build_path_query() {
|
||||
+ fn test_build_path_query() {
|
||||
@@
|
||||
- // Dotless root file -> exact match (root path without '/')
|
||||
+ // Dotless root file -> exact match (root path without '/')
|
||||
let pq = build_path_query("Makefile");
|
||||
assert_eq!(pq.value, "Makefile");
|
||||
assert!(!pq.is_prefix);
|
||||
+
|
||||
+ // Dotless file in subdir should become exact if DB contains it (probe)
|
||||
+ // (set up: insert one DiffNote with position_new_path = "src/Dockerfile")
|
||||
|
||||
2) Make “reviewer” semantics correct: exclude MR authors commenting on their own diffs
|
||||
Why
|
||||
|
||||
Right now, Overlap (and Expert reviewer branch) will count MR authors as “reviewers” if they leave DiffNotes in their own MR (clarifications / replies), inflating A+R and contaminating “who reviewed here” signals.
|
||||
|
||||
You already enforce this in --reviews mode (m.author_username != ?1). Apply the same principle consistently:
|
||||
|
||||
Reviewer branch: only count notes where n.author_username != m.author_username (when both non-NULL).
|
||||
|
||||
Diff (Overlap reviewer branch)
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- WHERE n.note_type = 'DiffNote'
|
||||
+ WHERE n.note_type = 'DiffNote'
|
||||
AND n.position_new_path LIKE ?1 ESCAPE '\\'
|
||||
AND n.is_system = 0
|
||||
AND n.author_username IS NOT NULL
|
||||
+ AND (m.author_username IS NULL OR n.author_username != m.author_username)
|
||||
AND n.created_at >= ?2
|
||||
AND (?3 IS NULL OR n.project_id = ?3)
|
||||
|
||||
|
||||
Same change for sql_exact.
|
||||
|
||||
3) Expert mode scoring: align units + reduce single-MR “comment storms”
|
||||
Why
|
||||
|
||||
Expert currently mixes units:
|
||||
|
||||
reviewer side: DiffNote count
|
||||
|
||||
author side: distinct MR count
|
||||
|
||||
That makes score noisy and can crown “someone who wrote 30 comments on one MR” as top expert.
|
||||
|
||||
Fix: make both sides primarily MR-breadth:
|
||||
|
||||
reviewer: COUNT(DISTINCT m.id) as review_mr_count
|
||||
|
||||
author: COUNT(DISTINCT m.id) as author_mr_count
|
||||
Optionally keep review_note_count as a secondary intensity signal (but not the main driver).
|
||||
|
||||
Diff (types + SQL)
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
pub struct Expert {
|
||||
pub username: String,
|
||||
- pub score: f64,
|
||||
- pub review_count: u32,
|
||||
- pub author_count: u32,
|
||||
+ pub score: i64,
|
||||
+ pub review_mr_count: u32,
|
||||
+ pub review_note_count: u32,
|
||||
+ pub author_mr_count: u32,
|
||||
pub last_active_ms: i64,
|
||||
}
|
||||
|
||||
|
||||
Reviewer branch now joins to MR so it can count distinct MRs and exclude self-comments:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- SELECT
|
||||
- n.author_username AS username,
|
||||
- 'reviewer' AS role,
|
||||
- COUNT(*) AS cnt,
|
||||
- MAX(n.created_at) AS last_active_at
|
||||
- FROM notes n
|
||||
+ SELECT
|
||||
+ n.author_username AS username,
|
||||
+ 'reviewer' AS role,
|
||||
+ COUNT(DISTINCT m.id) AS mr_cnt,
|
||||
+ COUNT(*) AS note_cnt,
|
||||
+ MAX(n.created_at) AS last_active_at
|
||||
+ FROM notes n
|
||||
+ JOIN discussions d ON n.discussion_id = d.id
|
||||
+ JOIN merge_requests m ON d.merge_request_id = m.id
|
||||
WHERE n.note_type = 'DiffNote'
|
||||
AND n.is_system = 0
|
||||
AND n.author_username IS NOT NULL
|
||||
+ AND (m.author_username IS NULL OR n.author_username != m.author_username)
|
||||
AND n.position_new_path LIKE ?1 ESCAPE '\\'
|
||||
AND n.created_at >= ?2
|
||||
AND (?3 IS NULL OR n.project_id = ?3)
|
||||
GROUP BY n.author_username
|
||||
|
||||
|
||||
Update author branch payload to match shape:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
SELECT
|
||||
m.author_username AS username,
|
||||
'author' AS role,
|
||||
- COUNT(DISTINCT m.id) AS cnt,
|
||||
+ COUNT(DISTINCT m.id) AS mr_cnt,
|
||||
+ 0 AS note_cnt,
|
||||
MAX(n.created_at) AS last_active_at
|
||||
|
||||
|
||||
Aggregate:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
SELECT
|
||||
username,
|
||||
- SUM(CASE WHEN role = 'reviewer' THEN cnt ELSE 0 END) AS review_count,
|
||||
- SUM(CASE WHEN role = 'author' THEN cnt ELSE 0 END) AS author_count,
|
||||
+ SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) AS review_mr_count,
|
||||
+ SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) AS review_note_count,
|
||||
+ SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) AS author_mr_count,
|
||||
MAX(last_active_at) AS last_active_at,
|
||||
- (SUM(CASE WHEN role = 'reviewer' THEN cnt ELSE 0 END) * 3.0) +
|
||||
- (SUM(CASE WHEN role = 'author' THEN cnt ELSE 0 END) * 2.0) AS score
|
||||
+ (
|
||||
+ (SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) * 20) +
|
||||
+ (SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) * 12) +
|
||||
+ (SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) * 1)
|
||||
+ ) AS score
|
||||
|
||||
|
||||
Human header:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- style("Reviews").bold(),
|
||||
- style("Authored").bold(),
|
||||
+ style("Reviewed(MRs)").bold(),
|
||||
+ style("Notes").bold(),
|
||||
+ style("Authored(MRs)").bold(),
|
||||
|
||||
4) Deterministic output: participants + MR refs + tie-breakers
|
||||
Why
|
||||
|
||||
You’ve correctly focused on reproducibility (resolved_input), but you still have nondeterministic lists:
|
||||
|
||||
participants: GROUP_CONCAT order is undefined → vector order changes run-to-run.
|
||||
|
||||
mr_refs: you dedup via HashSet then iterate → undefined order.
|
||||
|
||||
user sorting in overlap is missing stable tie-breakers.
|
||||
|
||||
This is a real “robot mode flake” source.
|
||||
|
||||
Diff (Active participants sort)
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- let participants: Vec<String> = participants_csv
|
||||
+ let mut participants: Vec<String> = participants_csv
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|csv| csv.split('\x1F').map(String::from).collect())
|
||||
.unwrap_or_default();
|
||||
+ participants.sort(); // stable, deterministic
|
||||
|
||||
Diff (Overlap MR refs sort + stable user sort)
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- users.sort_by(|a, b| b.touch_count.cmp(&a.touch_count));
|
||||
+ users.sort_by(|a, b| {
|
||||
+ b.touch_count.cmp(&a.touch_count)
|
||||
+ .then_with(|| b.last_touch_at.cmp(&a.last_touch_at))
|
||||
+ .then_with(|| a.username.cmp(&b.username))
|
||||
+ });
|
||||
@@
|
||||
- entry.mr_refs = set.into_iter().collect();
|
||||
+ let mut v: Vec<String> = set.into_iter().collect();
|
||||
+ v.sort();
|
||||
+ entry.mr_refs = v;
|
||||
|
||||
5) Make --limit actionable: surface truncation explicitly (human + robot)
|
||||
Why
|
||||
|
||||
Agents (and humans) need to know if results were cut off so they can rerun with a bigger -n.
|
||||
Right now there’s no signal.
|
||||
|
||||
Minimal pattern: query limit + 1, set truncated = true if you got > limit, then truncate.
|
||||
|
||||
Diff (result types)
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
pub struct ExpertResult {
|
||||
pub path_query: String,
|
||||
pub experts: Vec<Expert>,
|
||||
+ pub truncated: bool,
|
||||
}
|
||||
@@
|
||||
pub struct ActiveResult {
|
||||
pub discussions: Vec<ActiveDiscussion>,
|
||||
pub total_unresolved: u32,
|
||||
+ pub truncated: bool,
|
||||
}
|
||||
@@
|
||||
pub struct OverlapResult {
|
||||
pub path_query: String,
|
||||
pub users: Vec<OverlapUser>,
|
||||
+ pub truncated: bool,
|
||||
}
|
||||
|
||||
Diff (query pattern example)
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- let limit_i64 = limit as i64;
|
||||
+ let limit_plus_one = (limit + 1) as i64;
|
||||
@@
|
||||
- LIMIT ?4
|
||||
+ LIMIT ?4
|
||||
@@
|
||||
- rusqlite::params![pq.value, since_ms, project_id, limit_i64],
|
||||
+ rusqlite::params![pq.value, since_ms, project_id, limit_plus_one],
|
||||
@@
|
||||
- Ok(ExpertResult {
|
||||
+ let truncated = experts.len() > limit;
|
||||
+ let experts = experts.into_iter().take(limit).collect();
|
||||
+ Ok(ExpertResult {
|
||||
path_query: path.to_string(),
|
||||
experts,
|
||||
+ truncated,
|
||||
})
|
||||
|
||||
|
||||
Human output hint:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
if r.experts.is_empty() { ... }
|
||||
+ if r.truncated {
|
||||
+ println!(" {}", style("(showing first -n; rerun with a higher --limit)").dim());
|
||||
+ }
|
||||
|
||||
|
||||
Robot output field:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
fn expert_to_json(r: &ExpertResult) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"path_query": r.path_query,
|
||||
+ "truncated": r.truncated,
|
||||
"experts": ...
|
||||
})
|
||||
}
|
||||
|
||||
6) Overlap merge hot loop: avoid repeated HashSet rebuild per row
|
||||
Why
|
||||
|
||||
This line is expensive in a UNION result with many rows:
|
||||
|
||||
rust
|
||||
Copy code
|
||||
let mut set: HashSet<String> = entry.mr_refs.drain(..).collect();
|
||||
|
||||
|
||||
It reallocates and rehashes every time.
|
||||
|
||||
Fix: store an accumulator with HashSet during merge, convert once at end.
|
||||
|
||||
Diff (internal accumulator)
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
- let mut user_map: HashMap<String, OverlapUser> = HashMap::new();
|
||||
+ struct OverlapAcc {
|
||||
+ username: String,
|
||||
+ author_touch_count: u32,
|
||||
+ review_touch_count: u32,
|
||||
+ touch_count: u32,
|
||||
+ last_touch_at: i64,
|
||||
+ mr_refs: HashSet<String>,
|
||||
+ }
|
||||
+ let mut user_map: HashMap<String, OverlapAcc> = HashMap::new();
|
||||
@@
|
||||
- let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser {
|
||||
+ let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapAcc {
|
||||
username: username.clone(),
|
||||
author_touch_count: 0,
|
||||
review_touch_count: 0,
|
||||
touch_count: 0,
|
||||
last_touch_at: 0,
|
||||
- mr_refs: Vec::new(),
|
||||
+ mr_refs: HashSet::new(),
|
||||
});
|
||||
@@
|
||||
- let mut set: HashSet<String> = entry.mr_refs.drain(..).collect();
|
||||
- for r in mr_refs { set.insert(r); }
|
||||
- entry.mr_refs = set.into_iter().collect();
|
||||
+ for r in mr_refs { entry.mr_refs.insert(r); }
|
||||
@@
|
||||
- let mut users: Vec<OverlapUser> = user_map.into_values().collect();
|
||||
+ let mut users: Vec<OverlapUser> = user_map.into_values().map(|a| {
|
||||
+ let mut mr_refs: Vec<String> = a.mr_refs.into_iter().collect();
|
||||
+ mr_refs.sort();
|
||||
+ OverlapUser {
|
||||
+ username: a.username,
|
||||
+ author_touch_count: a.author_touch_count,
|
||||
+ review_touch_count: a.review_touch_count,
|
||||
+ touch_count: a.touch_count,
|
||||
+ last_touch_at: a.last_touch_at,
|
||||
+ mr_refs,
|
||||
+ }
|
||||
+ }).collect();
|
||||
|
||||
7) Tests to lock these behaviors
|
||||
Add tests (high value)
|
||||
|
||||
dotless subdir file uses DB probe → exact match
|
||||
|
||||
self-review exclusion prevents MR author showing up as reviewer
|
||||
|
||||
deterministic ordering for participants and mr_refs (sort)
|
||||
|
||||
Diff (test additions outline)
|
||||
diff
|
||||
Copy code
|
||||
@@
|
||||
#[test]
|
||||
+ fn test_build_path_query_dotless_subdir_file_uses_probe() {
|
||||
+ let conn = setup_test_db();
|
||||
+ insert_project(&conn, 1, "team/backend");
|
||||
+ insert_mr(&conn, 1, 1, 100, "author_a", "opened");
|
||||
+ insert_discussion(&conn, 1, 1, Some(1), None, true, false);
|
||||
+ insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/Dockerfile", "note");
|
||||
+
|
||||
+ let pq = build_path_query(&conn, "src/Dockerfile").unwrap();
|
||||
+ assert_eq!(pq.value, "src/Dockerfile");
|
||||
+ assert!(!pq.is_prefix);
|
||||
+ }
|
||||
+
|
||||
+ #[test]
|
||||
+ fn test_overlap_excludes_self_review_notes() {
|
||||
+ let conn = setup_test_db();
|
||||
+ insert_project(&conn, 1, "team/backend");
|
||||
+ insert_mr(&conn, 1, 1, 100, "author_a", "opened");
|
||||
+ insert_discussion(&conn, 1, 1, Some(1), None, true, false);
|
||||
+ // author_a comments on their own MR diff
|
||||
+ insert_diffnote(&conn, 1, 1, 1, "author_a", "src/auth/login.rs", "clarification");
|
||||
+
|
||||
+ let result = query_overlap(&conn, "src/auth/", None, 0, 20).unwrap();
|
||||
+ let u = result.users.iter().find(|u| u.username == "author_a");
|
||||
+ // should not be credited as reviewer touch
|
||||
+ assert!(u.map(|x| x.review_touch_count).unwrap_or(0) == 0);
|
||||
+ }
|
||||
|
||||
Net effect
|
||||
|
||||
Correctness: fixes dotless subdir files + self-review pollution.
|
||||
|
||||
Signal quality: Expert ranking becomes harder to game by comment volume.
|
||||
|
||||
Robot reproducibility: deterministic ordering + explicit truncation.
|
||||
|
||||
Performance: avoids rehash loops in overlap merges; path probe uses indexed equality.
|
||||
|
||||
If you want one “single best” change: #1 (DB probe exact-match) is the most likely to prevent confusing “why is this empty?” behavior without adding any user-facing complexity.
|
||||
@@ -1,353 +0,0 @@
|
||||
Below are the highest-leverage revisions I’d make to iteration 6 to improve correctness (multi-project edge cases), robot-mode reliability (bounded payloads + truncation), and signal quality—without changing the fundamental scope (still pure SQL over existing tables).
|
||||
|
||||
1) Make build_path_query project-aware and two-way probe (exact and prefix)
|
||||
Why
|
||||
|
||||
Your DB probe currently answers: “does this exact file exist anywhere in DiffNotes?” That can misclassify in a project-scoped run:
|
||||
|
||||
Path exists as a dotless file in Project A → probe returns true
|
||||
|
||||
User runs -p Project B where the path is a directory (or different shape) → you switch to exact, return empty, and miss valid prefix hits.
|
||||
|
||||
Also, you still have a minor heuristic fragility for dot directories when the user omits trailing / (e.g., .github/workflows): last segment has a dot → you treat as file unless forced dir.
|
||||
|
||||
Revision
|
||||
|
||||
Thread project_id into build_path_query(conn, path, project_id)
|
||||
|
||||
Probe exact first (scoped), then probe prefix (scoped)
|
||||
|
||||
Only fall back to heuristics if both probes fail
|
||||
|
||||
This keeps “static SQL, no dynamic assembly,” and costs at most 2 indexed existence queries per invocation.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
- fn build_path_query(conn: &Connection, path: &str) -> Result<PathQuery> {
|
||||
+ fn build_path_query(conn: &Connection, path: &str, project_id: Option<i64>) -> Result<PathQuery> {
|
||||
let trimmed = path.trim_end_matches('/');
|
||||
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
|
||||
let is_root = !trimmed.contains('/');
|
||||
let forced_dir = path.ends_with('/');
|
||||
- let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
|
||||
+ // Heuristic is now only a fallback; probes decide first.
|
||||
+ let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
|
||||
|
||||
- let exact_exists = if !looks_like_file && !forced_dir {
|
||||
- conn.query_row(
|
||||
- "SELECT 1 FROM notes
|
||||
- WHERE note_type = 'DiffNote'
|
||||
- AND is_system = 0
|
||||
- AND position_new_path = ?1
|
||||
- LIMIT 1",
|
||||
- rusqlite::params![trimmed],
|
||||
- |_| Ok(()),
|
||||
- )
|
||||
- .is_ok()
|
||||
- } else {
|
||||
- false
|
||||
- };
|
||||
+ // Probe 1: exact file exists (scoped)
|
||||
+ let exact_exists = conn.query_row(
|
||||
+ "SELECT 1 FROM notes
|
||||
+ WHERE note_type = 'DiffNote'
|
||||
+ AND is_system = 0
|
||||
+ AND position_new_path = ?1
|
||||
+ AND (?2 IS NULL OR project_id = ?2)
|
||||
+ LIMIT 1",
|
||||
+ rusqlite::params![trimmed, project_id],
|
||||
+ |_| Ok(()),
|
||||
+ ).is_ok();
|
||||
+
|
||||
+ // Probe 2: directory prefix exists (scoped)
|
||||
+ let prefix_exists = if !forced_dir {
|
||||
+ let escaped = escape_like(trimmed);
|
||||
+ let pat = format!("{escaped}/%");
|
||||
+ conn.query_row(
|
||||
+ "SELECT 1 FROM notes
|
||||
+ WHERE note_type = 'DiffNote'
|
||||
+ AND is_system = 0
|
||||
+ AND position_new_path LIKE ?1 ESCAPE '\\'
|
||||
+ AND (?2 IS NULL OR project_id = ?2)
|
||||
+ LIMIT 1",
|
||||
+ rusqlite::params![pat, project_id],
|
||||
+ |_| Ok(()),
|
||||
+ ).is_ok()
|
||||
+ } else { false };
|
||||
|
||||
- let is_file = looks_like_file || exact_exists;
|
||||
+ // Forced directory always wins; otherwise: exact > prefix > heuristic
|
||||
+ let is_file = if forced_dir { false }
|
||||
+ else if exact_exists { true }
|
||||
+ else if prefix_exists { false }
|
||||
+ else { looks_like_file };
|
||||
|
||||
if is_file {
|
||||
Ok(PathQuery { value: trimmed.to_string(), is_prefix: false })
|
||||
} else {
|
||||
let escaped = escape_like(trimmed);
|
||||
Ok(PathQuery { value: format!("{escaped}/%"), is_prefix: true })
|
||||
}
|
||||
}
|
||||
@@
|
||||
- let pq = build_path_query(conn, path)?;
|
||||
+ let pq = build_path_query(conn, path, project_id)?;
|
||||
|
||||
|
||||
Add test coverage for the multi-project misclassification case:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
#[test]
|
||||
fn test_build_path_query_dotless_subdir_file_uses_db_probe() {
|
||||
@@
|
||||
- let pq = build_path_query(&conn, "src/Dockerfile").unwrap();
|
||||
+ let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap();
|
||||
@@
|
||||
- let pq2 = build_path_query(&conn2, "src/Dockerfile").unwrap();
|
||||
+ let pq2 = build_path_query(&conn2, "src/Dockerfile", None).unwrap();
|
||||
}
|
||||
+
|
||||
+ #[test]
|
||||
+ fn test_build_path_query_probe_is_project_scoped() {
|
||||
+ // Path exists as a dotless file in project 1; project 2 should not
|
||||
+ // treat it as an exact file unless it exists there too.
|
||||
+ let conn = setup_test_db();
|
||||
+ insert_project(&conn, 1, "team/a");
|
||||
+ insert_project(&conn, 2, "team/b");
|
||||
+ insert_mr(&conn, 1, 1, 10, "author_a", "opened");
|
||||
+ insert_discussion(&conn, 1, 1, Some(1), None, true, false);
|
||||
+ insert_diffnote(&conn, 1, 1, 1, "rev", "infra/Makefile", "note");
|
||||
+
|
||||
+ let pq_scoped = build_path_query(&conn, "infra/Makefile", Some(2)).unwrap();
|
||||
+ assert!(pq_scoped.is_prefix); // should fall back to prefix in project 2
|
||||
+ }
|
||||
|
||||
2) Bound robot payload sizes for participants and mr_refs (with totals + truncation)
|
||||
Why
|
||||
|
||||
mr_refs and participants can become unbounded arrays in robot mode, which is a real operational hazard:
|
||||
|
||||
huge JSON → slow, noisy diffs, brittle downstream pipelines
|
||||
|
||||
potential SQLite group_concat truncation becomes invisible (and you can’t distinguish “no refs” vs “refs truncated”)
|
||||
|
||||
Revision
|
||||
|
||||
Introduce hard caps and explicit metadata:
|
||||
|
||||
participants_total, participants_truncated
|
||||
|
||||
mr_refs_total, mr_refs_truncated
|
||||
|
||||
This is not scope creep—it’s defensive output hygiene.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
pub struct ActiveDiscussion {
|
||||
@@
|
||||
pub participants: Vec<String>,
|
||||
+ pub participants_total: u32,
|
||||
+ pub participants_truncated: bool,
|
||||
}
|
||||
@@
|
||||
pub struct OverlapUser {
|
||||
@@
|
||||
pub mr_refs: Vec<String>,
|
||||
+ pub mr_refs_total: u32,
|
||||
+ pub mr_refs_truncated: bool,
|
||||
}
|
||||
|
||||
|
||||
Implementation sketch (Rust-side, deterministic):
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
fn query_active(...) -> Result<ActiveResult> {
|
||||
+ const MAX_PARTICIPANTS: usize = 50;
|
||||
@@
|
||||
- participants.sort();
|
||||
+ participants.sort();
|
||||
+ let participants_total = participants.len() as u32;
|
||||
+ let participants_truncated = participants.len() > MAX_PARTICIPANTS;
|
||||
+ if participants_truncated {
|
||||
+ participants.truncate(MAX_PARTICIPANTS);
|
||||
+ }
|
||||
@@
|
||||
Ok(ActiveDiscussion {
|
||||
@@
|
||||
participants,
|
||||
+ participants_total,
|
||||
+ participants_truncated,
|
||||
})
|
||||
@@
|
||||
fn query_overlap(...) -> Result<OverlapResult> {
|
||||
+ const MAX_MR_REFS_PER_USER: usize = 50;
|
||||
@@
|
||||
.map(|a| {
|
||||
let mut mr_refs: Vec<String> = a.mr_refs.into_iter().collect();
|
||||
mr_refs.sort();
|
||||
+ let mr_refs_total = mr_refs.len() as u32;
|
||||
+ let mr_refs_truncated = mr_refs.len() > MAX_MR_REFS_PER_USER;
|
||||
+ if mr_refs_truncated {
|
||||
+ mr_refs.truncate(MAX_MR_REFS_PER_USER);
|
||||
+ }
|
||||
OverlapUser {
|
||||
@@
|
||||
mr_refs,
|
||||
+ mr_refs_total,
|
||||
+ mr_refs_truncated,
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
Update robot JSON accordingly:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
fn active_to_json(r: &ActiveResult) -> serde_json::Value {
|
||||
@@
|
||||
"participants": d.participants,
|
||||
+ "participants_total": d.participants_total,
|
||||
+ "participants_truncated": d.participants_truncated,
|
||||
}))
|
||||
@@
|
||||
fn overlap_to_json(r: &OverlapResult) -> serde_json::Value {
|
||||
@@
|
||||
"mr_refs": u.mr_refs,
|
||||
+ "mr_refs_total": u.mr_refs_total,
|
||||
+ "mr_refs_truncated": u.mr_refs_truncated,
|
||||
}))
|
||||
|
||||
|
||||
Also update robot-docs manifest schema snippet for who.active.discussions[] and who.overlap.users[].
|
||||
|
||||
3) Add truncation metadata to Workload sections (same LIMIT+1 pattern)
|
||||
Why
|
||||
|
||||
Workload is the mode most likely to be consumed by agents, and right now it has silent truncation (each section is LIMIT N with no signal). Your plan already treats truncation as a first-class contract elsewhere; Workload should match.
|
||||
|
||||
Revision
|
||||
|
||||
For each workload query:
|
||||
|
||||
request LIMIT + 1
|
||||
|
||||
set *_truncated booleans
|
||||
|
||||
trim to requested limit
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
pub struct WorkloadResult {
|
||||
pub username: String,
|
||||
pub assigned_issues: Vec<WorkloadIssue>,
|
||||
pub authored_mrs: Vec<WorkloadMr>,
|
||||
pub reviewing_mrs: Vec<WorkloadMr>,
|
||||
pub unresolved_discussions: Vec<WorkloadDiscussion>,
|
||||
+ pub assigned_issues_truncated: bool,
|
||||
+ pub authored_mrs_truncated: bool,
|
||||
+ pub reviewing_mrs_truncated: bool,
|
||||
+ pub unresolved_discussions_truncated: bool,
|
||||
}
|
||||
|
||||
|
||||
And in JSON include the booleans (plus you already have summary.counts).
|
||||
|
||||
This is mechanically repetitive but extremely valuable for automation.
|
||||
|
||||
4) Rename “Last Active” → “Last Seen” for Expert/Overlap
|
||||
Why
|
||||
|
||||
For “author” rows, the timestamp is derived from review activity on their MR (via MAX(n.created_at)), not necessarily that person’s direct action. Calling that “active” is semantically misleading. “Last seen” is accurate across both reviewer+author branches.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
pub struct Expert {
|
||||
@@
|
||||
- pub last_active_ms: i64,
|
||||
+ pub last_seen_ms: i64,
|
||||
}
|
||||
@@
|
||||
pub struct OverlapUser {
|
||||
@@
|
||||
- pub last_touch_at: i64,
|
||||
+ pub last_seen_at: i64,
|
||||
@@
|
||||
fn print_expert_human(...) {
|
||||
@@
|
||||
- style("Last Active").bold(),
|
||||
+ style("Last Seen").bold(),
|
||||
@@
|
||||
- style(format_relative_time(expert.last_active_ms)).dim(),
|
||||
+ style(format_relative_time(expert.last_seen_ms)).dim(),
|
||||
|
||||
|
||||
(Keep internal SQL aliases consistent: last_seen_at everywhere.)
|
||||
|
||||
5) Make MR state filtering consistent in Expert/Overlap reviewer branches
|
||||
Why
|
||||
|
||||
You already restrict Overlap author branch to opened|merged, but reviewer branches can include closed/unmerged noise. Consistency improves signal quality and can reduce scan churn.
|
||||
|
||||
Low-risk revision: apply the same state filter to reviewer branches (Expert + Overlap). You can keep “closed” excluded by default without adding new flags.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
WHERE n.note_type = 'DiffNote'
|
||||
AND n.is_system = 0
|
||||
@@
|
||||
- AND n.created_at >= ?2
|
||||
+ AND m.state IN ('opened','merged')
|
||||
+ AND n.created_at >= ?2
|
||||
|
||||
|
||||
This is a semantic choice; if you later want archaeology across closed/unmerged, that belongs in a separate mode/flag, but I would not add it now.
|
||||
|
||||
6) Add a design principle for bounded outputs (aligns with robot-first reproducibility)
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
10. **Truncation transparency.** Result types carry a `truncated: bool` flag...
|
||||
+11. **Bounded payloads.** Robot JSON must never emit unbounded arrays (participants, refs).
|
||||
+ Large list fields are capped with `*_total` + `*_truncated` so agents can page/retry.
|
||||
|
||||
Consolidated plan metadata bump (Iteration 7)
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
-iteration: 6
|
||||
+iteration: 7
|
||||
updated: 2026-02-07
|
||||
|
||||
Net effect (what you get)
|
||||
|
||||
Correct path classification under -p scoping (no cross-project probe leakage)
|
||||
|
||||
Deterministic + bounded robot payloads (no giant JSON surprises)
|
||||
|
||||
Uniform truncation contract across all modes (Workload no longer silently truncates)
|
||||
|
||||
Clearer semantics (“Last Seen” avoids misinterpretation)
|
||||
|
||||
Cleaner signals (reviewer branches ignore closed/unmerged by default)
|
||||
|
||||
If you want, I can also produce a second diff that updates the robot-docs schema block and the Verification EXPLAIN expectations to reflect the new probe queries and the state filter.
|
||||
@@ -124,6 +124,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
|
||||
"--no-docs",
|
||||
"--no-events",
|
||||
"--no-file-changes",
|
||||
"--no-status",
|
||||
"--dry-run",
|
||||
"--no-dry-run",
|
||||
"--timings",
|
||||
@@ -232,6 +233,16 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
|
||||
"--default-project",
|
||||
],
|
||||
),
|
||||
(
|
||||
"file-history",
|
||||
&[
|
||||
"--project",
|
||||
"--discussions",
|
||||
"--no-follow-renames",
|
||||
"--merged",
|
||||
"--limit",
|
||||
],
|
||||
),
|
||||
("generate-docs", &["--full", "--project"]),
|
||||
("completions", &[]),
|
||||
("robot-docs", &["--brief"]),
|
||||
@@ -296,6 +307,8 @@ const SUBCOMMAND_ALIASES: &[(&str, &str)] = &[
|
||||
("syncstatus", "status"),
|
||||
("auth_test", "auth"),
|
||||
("authtest", "auth"),
|
||||
("file_history", "file-history"),
|
||||
("filehistory", "file-history"),
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
334
src/cli/commands/file_history.rs
Normal file
334
src/cli/commands/file_history.rs
Normal file
@@ -0,0 +1,334 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::Config;
|
||||
use crate::cli::render::{self, Icons, Theme};
|
||||
use crate::core::db::create_connection;
|
||||
use crate::core::error::Result;
|
||||
use crate::core::file_history::resolve_rename_chain;
|
||||
use crate::core::paths::get_db_path;
|
||||
use crate::core::project::resolve_project;
|
||||
use crate::core::time::ms_to_iso;
|
||||
|
||||
/// Maximum rename chain BFS depth.
|
||||
const MAX_RENAME_HOPS: usize = 10;
|
||||
|
||||
/// A single MR that touched the file.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct FileHistoryMr {
|
||||
pub iid: i64,
|
||||
pub title: String,
|
||||
pub state: String,
|
||||
pub author_username: String,
|
||||
pub change_type: String,
|
||||
pub merged_at_iso: Option<String>,
|
||||
pub updated_at_iso: String,
|
||||
pub merge_commit_sha: Option<String>,
|
||||
pub web_url: Option<String>,
|
||||
}
|
||||
|
||||
/// A DiffNote discussion snippet on the file.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct FileDiscussion {
|
||||
pub discussion_id: String,
|
||||
pub author_username: String,
|
||||
pub body_snippet: String,
|
||||
pub path: String,
|
||||
pub created_at_iso: String,
|
||||
}
|
||||
|
||||
/// Full result of a file-history query.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct FileHistoryResult {
|
||||
pub path: String,
|
||||
pub rename_chain: Vec<String>,
|
||||
pub renames_followed: bool,
|
||||
pub merge_requests: Vec<FileHistoryMr>,
|
||||
pub discussions: Vec<FileDiscussion>,
|
||||
pub total_mrs: usize,
|
||||
pub paths_searched: usize,
|
||||
}
|
||||
|
||||
/// Run the file-history query.
|
||||
pub fn run_file_history(
|
||||
config: &Config,
|
||||
path: &str,
|
||||
project: Option<&str>,
|
||||
no_follow_renames: bool,
|
||||
merged_only: bool,
|
||||
include_discussions: bool,
|
||||
limit: usize,
|
||||
) -> Result<FileHistoryResult> {
|
||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||
let conn = create_connection(&db_path)?;
|
||||
|
||||
let project_id = project.map(|p| resolve_project(&conn, p)).transpose()?;
|
||||
|
||||
// Resolve rename chain unless disabled
|
||||
let (all_paths, renames_followed) = if no_follow_renames {
|
||||
(vec![path.to_string()], false)
|
||||
} else if let Some(pid) = project_id {
|
||||
let chain = resolve_rename_chain(&conn, pid, path, MAX_RENAME_HOPS)?;
|
||||
let followed = chain.len() > 1;
|
||||
(chain, followed)
|
||||
} else {
|
||||
// Without a project scope, can't resolve renames (need project_id)
|
||||
(vec![path.to_string()], false)
|
||||
};
|
||||
|
||||
let paths_searched = all_paths.len();
|
||||
|
||||
// Build placeholders for IN clause
|
||||
let placeholders: Vec<String> = (0..all_paths.len())
|
||||
.map(|i| format!("?{}", i + 2))
|
||||
.collect();
|
||||
let in_clause = placeholders.join(", ");
|
||||
|
||||
let merged_filter = if merged_only {
|
||||
" AND mr.state = 'merged'"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let project_filter = if project_id.is_some() {
|
||||
"AND mfc.project_id = ?1"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let sql = format!(
|
||||
"SELECT DISTINCT \
|
||||
mr.iid, mr.title, mr.state, mr.author_username, \
|
||||
mfc.change_type, mr.merged_at, mr.updated_at, mr.merge_commit_sha, mr.web_url \
|
||||
FROM mr_file_changes mfc \
|
||||
JOIN merge_requests mr ON mr.id = mfc.merge_request_id \
|
||||
WHERE mfc.new_path IN ({in_clause}) {project_filter} {merged_filter} \
|
||||
ORDER BY COALESCE(mr.merged_at, mr.updated_at) DESC \
|
||||
LIMIT ?{}",
|
||||
all_paths.len() + 2
|
||||
);
|
||||
|
||||
let mut stmt = conn.prepare(&sql)?;
|
||||
|
||||
// Bind parameters: ?1 = project_id (or 0 placeholder), ?2..?N+1 = paths, ?N+2 = limit
|
||||
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
|
||||
params.push(Box::new(project_id.unwrap_or(0)));
|
||||
for p in &all_paths {
|
||||
params.push(Box::new(p.clone()));
|
||||
}
|
||||
params.push(Box::new(limit as i64));
|
||||
|
||||
let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
|
||||
|
||||
let merge_requests: Vec<FileHistoryMr> = stmt
|
||||
.query_map(param_refs.as_slice(), |row| {
|
||||
let merged_at: Option<i64> = row.get(5)?;
|
||||
let updated_at: i64 = row.get(6)?;
|
||||
Ok(FileHistoryMr {
|
||||
iid: row.get(0)?,
|
||||
title: row.get(1)?,
|
||||
state: row.get(2)?,
|
||||
author_username: row.get(3)?,
|
||||
change_type: row.get(4)?,
|
||||
merged_at_iso: merged_at.map(ms_to_iso),
|
||||
updated_at_iso: ms_to_iso(updated_at),
|
||||
merge_commit_sha: row.get(7)?,
|
||||
web_url: row.get(8)?,
|
||||
})
|
||||
})?
|
||||
.filter_map(std::result::Result::ok)
|
||||
.collect();
|
||||
|
||||
let total_mrs = merge_requests.len();
|
||||
|
||||
// Optionally fetch DiffNote discussions on this file
|
||||
let discussions = if include_discussions && !merge_requests.is_empty() {
|
||||
fetch_file_discussions(&conn, &all_paths, project_id)?
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Ok(FileHistoryResult {
|
||||
path: path.to_string(),
|
||||
rename_chain: all_paths,
|
||||
renames_followed,
|
||||
merge_requests,
|
||||
discussions,
|
||||
total_mrs,
|
||||
paths_searched,
|
||||
})
|
||||
}
|
||||
|
||||
/// Fetch DiffNote discussions that reference the given file paths.
|
||||
fn fetch_file_discussions(
|
||||
conn: &rusqlite::Connection,
|
||||
paths: &[String],
|
||||
project_id: Option<i64>,
|
||||
) -> Result<Vec<FileDiscussion>> {
|
||||
let placeholders: Vec<String> = (0..paths.len()).map(|i| format!("?{}", i + 2)).collect();
|
||||
let in_clause = placeholders.join(", ");
|
||||
|
||||
let project_filter = if project_id.is_some() {
|
||||
"AND d.project_id = ?1"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let sql = format!(
|
||||
"SELECT d.gitlab_discussion_id, n.author_username, n.body, n.position_new_path, n.created_at \
|
||||
FROM notes n \
|
||||
JOIN discussions d ON d.id = n.discussion_id \
|
||||
WHERE n.position_new_path IN ({in_clause}) {project_filter} \
|
||||
AND n.is_system = 0 \
|
||||
ORDER BY n.created_at DESC \
|
||||
LIMIT 50"
|
||||
);
|
||||
|
||||
let mut stmt = conn.prepare(&sql)?;
|
||||
|
||||
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
|
||||
params.push(Box::new(project_id.unwrap_or(0)));
|
||||
for p in paths {
|
||||
params.push(Box::new(p.clone()));
|
||||
}
|
||||
|
||||
let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
|
||||
|
||||
let discussions: Vec<FileDiscussion> = stmt
|
||||
.query_map(param_refs.as_slice(), |row| {
|
||||
let body: String = row.get(2)?;
|
||||
let snippet = if body.len() > 200 {
|
||||
format!("{}...", &body[..body.floor_char_boundary(200)])
|
||||
} else {
|
||||
body
|
||||
};
|
||||
let created_at: i64 = row.get(4)?;
|
||||
Ok(FileDiscussion {
|
||||
discussion_id: row.get(0)?,
|
||||
author_username: row.get(1)?,
|
||||
body_snippet: snippet,
|
||||
path: row.get(3)?,
|
||||
created_at_iso: ms_to_iso(created_at),
|
||||
})
|
||||
})?
|
||||
.filter_map(std::result::Result::ok)
|
||||
.collect();
|
||||
|
||||
Ok(discussions)
|
||||
}
|
||||
|
||||
// ── Human output ────────────────────────────────────────────────────────────
|
||||
|
||||
pub fn print_file_history(result: &FileHistoryResult) {
|
||||
// Header
|
||||
let paths_info = if result.paths_searched > 1 {
|
||||
format!(
|
||||
" (via {} paths, {} MRs)",
|
||||
result.paths_searched, result.total_mrs
|
||||
)
|
||||
} else {
|
||||
format!(" ({} MRs)", result.total_mrs)
|
||||
};
|
||||
|
||||
println!();
|
||||
println!(
|
||||
"{}",
|
||||
Theme::bold().render(&format!("File History: {}{}", result.path, paths_info))
|
||||
);
|
||||
|
||||
// Rename chain
|
||||
if result.renames_followed && result.rename_chain.len() > 1 {
|
||||
let chain_str: Vec<&str> = result.rename_chain.iter().map(String::as_str).collect();
|
||||
println!(
|
||||
" Rename chain: {}",
|
||||
Theme::dim().render(&chain_str.join(" -> "))
|
||||
);
|
||||
}
|
||||
|
||||
if result.merge_requests.is_empty() {
|
||||
println!(
|
||||
"\n {} {}",
|
||||
Icons::info(),
|
||||
Theme::dim().render("No merge requests found touching this file.")
|
||||
);
|
||||
println!(
|
||||
" {}",
|
||||
Theme::dim().render("Hint: Run 'lore sync' to fetch MR file changes.")
|
||||
);
|
||||
println!();
|
||||
return;
|
||||
}
|
||||
|
||||
println!();
|
||||
|
||||
for mr in &result.merge_requests {
|
||||
let (icon, state_style) = match mr.state.as_str() {
|
||||
"merged" => (Icons::mr_merged(), Theme::accent()),
|
||||
"opened" => (Icons::mr_opened(), Theme::success()),
|
||||
"closed" => (Icons::mr_closed(), Theme::warning()),
|
||||
_ => (Icons::mr_opened(), Theme::dim()),
|
||||
};
|
||||
|
||||
let date = mr
|
||||
.merged_at_iso
|
||||
.as_deref()
|
||||
.or(Some(mr.updated_at_iso.as_str()))
|
||||
.unwrap_or("")
|
||||
.split('T')
|
||||
.next()
|
||||
.unwrap_or("");
|
||||
|
||||
println!(
|
||||
" {} {} {} {} @{} {} {}",
|
||||
icon,
|
||||
Theme::accent().render(&format!("!{}", mr.iid)),
|
||||
render::truncate(&mr.title, 50),
|
||||
state_style.render(&mr.state),
|
||||
mr.author_username,
|
||||
date,
|
||||
Theme::dim().render(&mr.change_type),
|
||||
);
|
||||
}
|
||||
|
||||
// Discussions
|
||||
if !result.discussions.is_empty() {
|
||||
println!(
|
||||
"\n {} File discussions ({}):",
|
||||
Icons::note(),
|
||||
result.discussions.len()
|
||||
);
|
||||
for d in &result.discussions {
|
||||
let date = d.created_at_iso.split('T').next().unwrap_or("");
|
||||
println!(
|
||||
" @{} ({}) [{}]: {}",
|
||||
d.author_username,
|
||||
date,
|
||||
Theme::dim().render(&d.path),
|
||||
d.body_snippet
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
// ── Robot (JSON) output ─────────────────────────────────────────────────────
|
||||
|
||||
pub fn print_file_history_json(result: &FileHistoryResult, elapsed_ms: u64) {
|
||||
let output = serde_json::json!({
|
||||
"ok": true,
|
||||
"data": {
|
||||
"path": result.path,
|
||||
"rename_chain": if result.renames_followed { Some(&result.rename_chain) } else { None },
|
||||
"merge_requests": result.merge_requests,
|
||||
"discussions": if result.discussions.is_empty() { None } else { Some(&result.discussions) },
|
||||
},
|
||||
"meta": {
|
||||
"elapsed_ms": elapsed_ms,
|
||||
"total_mrs": result.total_mrs,
|
||||
"renames_followed": result.renames_followed,
|
||||
"paths_searched": result.paths_searched,
|
||||
}
|
||||
});
|
||||
|
||||
println!("{}", serde_json::to_string(&output).unwrap_or_default());
|
||||
}
|
||||
@@ -3,6 +3,7 @@ pub mod count;
|
||||
pub mod doctor;
|
||||
pub mod drift;
|
||||
pub mod embed;
|
||||
pub mod file_history;
|
||||
pub mod generate_docs;
|
||||
pub mod ingest;
|
||||
pub mod init;
|
||||
@@ -23,6 +24,7 @@ pub use count::{
|
||||
pub use doctor::{DoctorChecks, print_doctor_results, run_doctor};
|
||||
pub use drift::{DriftResponse, print_drift_human, print_drift_json, run_drift};
|
||||
pub use embed::{print_embed, print_embed_json, run_embed};
|
||||
pub use file_history::{print_file_history, print_file_history_json, run_file_history};
|
||||
pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs};
|
||||
pub use ingest::{
|
||||
DryRunPreview, IngestDisplay, print_dry_run_preview, print_dry_run_preview_json,
|
||||
|
||||
@@ -234,6 +234,10 @@ pub enum Commands {
|
||||
/// People intelligence: experts, workload, active discussions, overlap
|
||||
Who(WhoArgs),
|
||||
|
||||
/// Show MRs that touched a file, with linked discussions
|
||||
#[command(name = "file-history")]
|
||||
FileHistory(FileHistoryArgs),
|
||||
|
||||
/// Detect discussion divergence from original intent
|
||||
Drift {
|
||||
/// Entity type (currently only "issues" supported)
|
||||
@@ -966,6 +970,42 @@ pub struct WhoArgs {
|
||||
pub all_history: bool,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(after_help = "\x1b[1mExamples:\x1b[0m
|
||||
lore file-history src/main.rs # MRs that touched this file
|
||||
lore file-history src/auth/ -p group/repo # Scoped to project
|
||||
lore file-history src/foo.rs --discussions # Include DiffNote snippets
|
||||
lore file-history src/bar.rs --no-follow-renames # Skip rename chain")]
|
||||
pub struct FileHistoryArgs {
|
||||
/// File path to trace history for
|
||||
pub path: String,
|
||||
|
||||
/// Scope to a specific project (fuzzy match)
|
||||
#[arg(short = 'p', long, help_heading = "Filters")]
|
||||
pub project: Option<String>,
|
||||
|
||||
/// Include discussion snippets from DiffNotes on this file
|
||||
#[arg(long, help_heading = "Output")]
|
||||
pub discussions: bool,
|
||||
|
||||
/// Disable rename chain resolution
|
||||
#[arg(long = "no-follow-renames", help_heading = "Filters")]
|
||||
pub no_follow_renames: bool,
|
||||
|
||||
/// Only show merged MRs
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub merged: bool,
|
||||
|
||||
/// Maximum results
|
||||
#[arg(
|
||||
short = 'n',
|
||||
long = "limit",
|
||||
default_value = "50",
|
||||
help_heading = "Output"
|
||||
)]
|
||||
pub limit: usize,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
pub struct CountArgs {
|
||||
/// Entity type to count (issues, mrs, discussions, notes, events)
|
||||
|
||||
72
src/main.rs
72
src/main.rs
@@ -13,23 +13,24 @@ use lore::cli::commands::{
|
||||
NoteListFilters, SearchCliFilters, SyncOptions, TimelineParams, open_issue_in_browser,
|
||||
open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_drift_human,
|
||||
print_drift_json, print_dry_run_preview, print_dry_run_preview_json, print_embed,
|
||||
print_embed_json, print_event_count, print_event_count_json, print_generate_docs,
|
||||
print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues,
|
||||
print_list_issues_json, print_list_mrs, print_list_mrs_json, print_list_notes,
|
||||
print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl, print_search_results,
|
||||
print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr,
|
||||
print_show_mr_json, print_stats, print_stats_json, print_sync, print_sync_json,
|
||||
print_sync_status, print_sync_status_json, print_timeline, print_timeline_json_with_meta,
|
||||
print_who_human, print_who_json, query_notes, run_auth_test, run_count, run_count_events,
|
||||
run_doctor, run_drift, run_embed, run_generate_docs, run_ingest, run_ingest_dry_run, run_init,
|
||||
run_list_issues, run_list_mrs, run_search, run_show_issue, run_show_mr, run_stats, run_sync,
|
||||
run_sync_status, run_timeline, run_who,
|
||||
print_embed_json, print_event_count, print_event_count_json, print_file_history,
|
||||
print_file_history_json, print_generate_docs, print_generate_docs_json, print_ingest_summary,
|
||||
print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs,
|
||||
print_list_mrs_json, print_list_notes, print_list_notes_csv, print_list_notes_json,
|
||||
print_list_notes_jsonl, print_search_results, print_search_results_json, print_show_issue,
|
||||
print_show_issue_json, print_show_mr, print_show_mr_json, print_stats, print_stats_json,
|
||||
print_sync, print_sync_json, print_sync_status, print_sync_status_json, print_timeline,
|
||||
print_timeline_json_with_meta, print_who_human, print_who_json, query_notes, run_auth_test,
|
||||
run_count, run_count_events, run_doctor, run_drift, run_embed, run_file_history,
|
||||
run_generate_docs, run_ingest, run_ingest_dry_run, run_init, run_list_issues, run_list_mrs,
|
||||
run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, run_timeline,
|
||||
run_who,
|
||||
};
|
||||
use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme};
|
||||
use lore::cli::robot::{RobotMeta, strip_schemas};
|
||||
use lore::cli::{
|
||||
Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
|
||||
NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, WhoArgs,
|
||||
Cli, Commands, CountArgs, EmbedArgs, FileHistoryArgs, GenerateDocsArgs, IngestArgs, IssuesArgs,
|
||||
MrsArgs, NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, WhoArgs,
|
||||
};
|
||||
use lore::core::db::{
|
||||
LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations,
|
||||
@@ -195,6 +196,9 @@ async fn main() {
|
||||
handle_timeline(cli.config.as_deref(), args, robot_mode).await
|
||||
}
|
||||
Some(Commands::Who(args)) => handle_who(cli.config.as_deref(), args, robot_mode),
|
||||
Some(Commands::FileHistory(args)) => {
|
||||
handle_file_history(cli.config.as_deref(), args, robot_mode)
|
||||
}
|
||||
Some(Commands::Drift {
|
||||
entity_type,
|
||||
iid,
|
||||
@@ -720,6 +724,7 @@ fn suggest_similar_command(invalid: &str) -> String {
|
||||
("notes", "notes"),
|
||||
("note", "notes"),
|
||||
("drift", "drift"),
|
||||
("file-history", "file-history"),
|
||||
];
|
||||
|
||||
let invalid_lower = invalid.to_lowercase();
|
||||
@@ -1852,6 +1857,37 @@ async fn handle_stats(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_file_history(
|
||||
config_override: Option<&str>,
|
||||
args: FileHistoryArgs,
|
||||
robot_mode: bool,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let start = std::time::Instant::now();
|
||||
let config = Config::load(config_override)?;
|
||||
|
||||
let project = config
|
||||
.effective_project(args.project.as_deref())
|
||||
.map(String::from);
|
||||
|
||||
let result = run_file_history(
|
||||
&config,
|
||||
&args.path,
|
||||
project.as_deref(),
|
||||
args.no_follow_renames,
|
||||
args.merged,
|
||||
args.discussions,
|
||||
args.limit,
|
||||
)?;
|
||||
|
||||
if robot_mode {
|
||||
let elapsed_ms = start.elapsed().as_millis() as u64;
|
||||
print_file_history_json(&result, elapsed_ms);
|
||||
} else {
|
||||
print_file_history(&result);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_timeline(
|
||||
config_override: Option<&str>,
|
||||
args: TimelineArgs,
|
||||
@@ -2520,6 +2556,16 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
|
||||
"active_minimal": ["entity_type", "iid", "title", "participants"]
|
||||
}
|
||||
},
|
||||
"file-history": {
|
||||
"description": "Show MRs that touched a file, with rename chain resolution and optional DiffNote discussions",
|
||||
"flags": ["<path>", "-p/--project <path>", "--discussions", "--no-follow-renames", "--merged", "-n/--limit <N>"],
|
||||
"example": "lore --robot file-history src/main.rs -p group/repo",
|
||||
"response_schema": {
|
||||
"ok": "bool",
|
||||
"data": {"path": "string", "rename_chain": "[string]?", "merge_requests": "[{iid:int, title:string, state:string, author_username:string, change_type:string, merged_at_iso:string?, updated_at_iso:string, merge_commit_sha:string?, web_url:string?}]", "discussions": "[{discussion_id:string, author_username:string, body_snippet:string, path:string, created_at_iso:string}]?"},
|
||||
"meta": {"elapsed_ms": "int", "total_mrs": "int", "renames_followed": "bool", "paths_searched": "int"}
|
||||
}
|
||||
},
|
||||
"drift": {
|
||||
"description": "Detect discussion divergence from original issue intent",
|
||||
"flags": ["<entity_type: issues>", "<IID>", "--threshold <0.0-1.0>", "-p/--project <path>"],
|
||||
|
||||
Reference in New Issue
Block a user