Below are the revisions I’d make to iteration 2 to improve correctness, determinism, query-plan quality, and multi-project usability without turning this into a bigger product. I’m treating your plan as the “source of truth” and showing git-diff style patches against the plan text/code blocks you included. Change 1 — Fix project scoping to hit the right index (DiffNote branches) Why Your hot-path index is: idx_notes_diffnote_path_created ON notes(position_new_path, created_at, project_id) WHERE note_type='DiffNote' AND is_system=0 But in Expert/Overlap you sometimes scope by m.project_id = ?3 (MR table), not n.project_id = ?3 (notes table). That weakens the optimizer’s ability to use the composite notes index (and can force broader joins before filtering). Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ Query: Expert Mode @@ - AND (?3 IS NULL OR m.project_id = ?3) + -- IMPORTANT: scope on notes.project_id to maximize use of + -- idx_notes_diffnote_path_created (notes is the selective table) + AND (?3 IS NULL OR n.project_id = ?3) @@ Query: Overlap Mode @@ - AND (?3 IS NULL OR m.project_id = ?3) + AND (?3 IS NULL OR n.project_id = ?3) @@ Query: Overlap Mode (author branch) @@ - AND (?3 IS NULL OR m.project_id = ?3) + AND (?3 IS NULL OR n.project_id = ?3) Change 2 — Introduce a “prefix vs exact” path query to avoid LIKE when you don’t need it Why For exact file paths (e.g. src/auth/login.rs), you currently do: position_new_path LIKE ?1 ESCAPE '\' where ?1 has no wildcard That’s logically fine, but it’s a worse signal to the planner than = and can degrade performance depending on collation/case settings. This doesn’t violate “static SQL” — you can pick between two static query strings. Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ Helper: Path Pattern Construction @@ -fn build_path_pattern(path: &str) -> String { +struct PathQuery { + /// The parameter value to bind. + value: String, + /// If true: use LIKE value || '%'. If false: use '='. + is_prefix: bool, +} + +fn build_path_query(path: &str) -> PathQuery { let trimmed = path.trim_end_matches('/'); let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); let is_file = !path.ends_with('/') && last_segment.contains('.'); let escaped = escape_like(trimmed); if is_file { - escaped + PathQuery { value: escaped, is_prefix: false } } else { - format!("{escaped}/%") + PathQuery { value: format!("{escaped}/%"), is_prefix: true } } } And then (example for DiffNote predicates): diff Copy code @@ Query: Expert Mode @@ - let path_pattern = build_path_pattern(path); + let pq = build_path_query(path); - let sql = " ... n.position_new_path LIKE ?1 ESCAPE '\\' ... "; + let sql_prefix = " ... n.position_new_path LIKE ?1 ESCAPE '\\' ... "; + let sql_exact = " ... n.position_new_path = ?1 ... "; - let mut stmt = conn.prepare(sql)?; + let mut stmt = if pq.is_prefix { conn.prepare_cached(sql_prefix)? } + else { conn.prepare_cached(sql_exact)? }; let rows = stmt.query_map(params![... pq.value ...], ...); Change 3 — Push Expert aggregation into SQL (less Rust, fewer rows, SQL-level LIMIT) Why Right now Expert does: UNION ALL return per-role rows HashMap merge score compute sort/truncate You can do all of that in SQL deterministically, then LIMIT ?N actually works. Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ Query: Expert Mode @@ - let sql = "SELECT username, role, activity_count, last_active_at FROM ( - ... - )"; + let sql = " + WITH activity AS ( + SELECT + n.author_username AS username, + 'reviewer' AS role, + COUNT(*) AS cnt, + MAX(n.created_at) AS last_active_at + FROM notes n + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND n.author_username IS NOT NULL + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + AND ( + (?4 = 1 AND n.position_new_path LIKE ?1 ESCAPE '\\') OR + (?4 = 0 AND n.position_new_path = ?1) + ) + GROUP BY n.author_username + + UNION ALL + + SELECT + m.author_username AS username, + 'author' AS role, + COUNT(DISTINCT m.id) AS cnt, + MAX(n.created_at) AS last_active_at + FROM merge_requests m + JOIN discussions d ON d.merge_request_id = m.id + JOIN notes n ON n.discussion_id = d.id + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND m.author_username IS NOT NULL + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + AND ( + (?4 = 1 AND n.position_new_path LIKE ?1 ESCAPE '\\') OR + (?4 = 0 AND n.position_new_path = ?1) + ) + GROUP BY m.author_username + ) + SELECT + username, + SUM(CASE WHEN role='reviewer' THEN cnt ELSE 0 END) AS review_count, + SUM(CASE WHEN role='author' THEN cnt ELSE 0 END) AS author_count, + MAX(last_active_at) AS last_active_at, + (SUM(CASE WHEN role='reviewer' THEN cnt ELSE 0 END) * 3.0) + + (SUM(CASE WHEN role='author' THEN cnt ELSE 0 END) * 2.0) AS score + FROM activity + GROUP BY username + ORDER BY score DESC, last_active_at DESC, username ASC + LIMIT ?5 + "; - // Aggregate by username: combine reviewer + author counts - let mut user_map: HashMap<...> = HashMap::new(); - ... - experts.sort_by(...); experts.truncate(limit); + // No Rust-side merge/sort needed; SQL already returns final rows. Change 4 — Overlap output is ambiguous across projects: include stable MR refs (project_path!iid) Why mr_iids: Vec is ambiguous in a multi-project DB. !123 only means something with a project. Also: your MR IID dedup is currently Vec.contains() inside a loop (O(n²)). Use a HashSet. Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ OverlapResult @@ pub struct OverlapUser { pub username: String, @@ - pub mr_iids: Vec, + /// Stable MR references like "group/project!123" + pub mr_refs: Vec, } @@ Query: Overlap Mode (SQL) @@ - GROUP_CONCAT(DISTINCT m.iid) AS mr_iids + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN merge_requests m ON d.merge_request_id = m.id + JOIN projects p ON m.project_id = p.id @@ - GROUP_CONCAT(DISTINCT m.iid) AS mr_iids + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs FROM merge_requests m JOIN discussions d ON d.merge_request_id = m.id JOIN notes n ON n.discussion_id = d.id + JOIN projects p ON m.project_id = p.id @@ Query: Overlap Mode (Rust merge) @@ - let mr_iids: Vec = mr_iids_csv ... + let mr_refs: Vec = mr_refs_csv + .as_deref() + .map(|csv| csv.split(',').map(|s| s.trim().to_string()).collect()) + .unwrap_or_default(); @@ - // Merge MR IIDs, deduplicate - for iid in &mr_iids { - if !entry.mr_iids.contains(iid) { - entry.mr_iids.push(*iid); - } - } + // Merge MR refs, deduplicate + use std::collections::HashSet; + let mut set: HashSet = entry.mr_refs.drain(..).collect(); + for r in mr_refs { set.insert(r); } + entry.mr_refs = set.into_iter().collect(); Change 5 — Active mode: avoid correlated subqueries by preselecting discussions, then aggregating notes once Why Your Active query does two correlated subqueries per discussion row: note_count participants With LIMIT 20 it’s not catastrophic, but it is still unnecessary work and creates “spiky” behavior if the planner chooses poorly. Pattern to use: CTE selects the limited set of discussions Join notes once, aggregate with GROUP BY Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ Query: Active Mode @@ - let sql = - "SELECT - d.noteable_type, - ... - (SELECT COUNT(*) FROM notes n - WHERE n.discussion_id = d.id AND n.is_system = 0) AS note_count, - (SELECT GROUP_CONCAT(username, X'1F') FROM ( - SELECT DISTINCT n.author_username AS username - FROM notes n - WHERE n.discussion_id = d.id - AND n.is_system = 0 - AND n.author_username IS NOT NULL - ORDER BY username - )) AS participants - FROM discussions d - ... - LIMIT ?3"; + let sql = " + WITH picked AS ( + SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, d.project_id, d.last_note_at + FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.last_note_at >= ?1 + AND (?2 IS NULL OR d.project_id = ?2) + ORDER BY d.last_note_at DESC + LIMIT ?3 + ), + note_agg AS ( + SELECT + n.discussion_id, + COUNT(*) AS note_count, + GROUP_CONCAT(n.author_username, X'1F') AS participants + FROM ( + SELECT DISTINCT discussion_id, author_username + FROM notes + WHERE is_system = 0 AND author_username IS NOT NULL + ) n + JOIN picked p ON p.id = n.discussion_id + GROUP BY n.discussion_id + ) + SELECT + p.noteable_type, + COALESCE(i.iid, m.iid) AS entity_iid, + COALESCE(i.title, m.title) AS entity_title, + proj.path_with_namespace, + p.last_note_at, + COALESCE(na.note_count, 0) AS note_count, + COALESCE(na.participants, '') AS participants + FROM picked p + JOIN projects proj ON p.project_id = proj.id + LEFT JOIN issues i ON p.issue_id = i.id + LEFT JOIN merge_requests m ON p.merge_request_id = m.id + LEFT JOIN note_agg na ON na.discussion_id = p.id + ORDER BY p.last_note_at DESC + "; Change 6 — Use prepare_cached() everywhere (cheap perf win, no scope creep) Why You already worked hard to keep SQL static. Taking advantage of sqlite statement caching completes the loop. Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ Query functions @@ - let mut stmt = conn.prepare(sql)?; + let mut stmt = conn.prepare_cached(sql)?; Apply in all query fns (query_workload, query_reviews, query_active, query_expert, query_overlap, lookup_project_path). Change 7 — Human output: show project_path where ambiguity exists (Workload + Overlap) Why When not project-scoped, #42 and !100 aren’t unique. You already have project paths in the query results — you’re just not printing them. Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ print_workload_human @@ - println!( - " {} {} {}", + println!( + " {} {} {} {}", style(format!("#{:<5}", item.iid)).cyan(), truncate_str(&item.title, 45), style(format_relative_time(item.updated_at)).dim(), + style(&item.project_path).dim(), ); @@ print_workload_human (MRs) @@ - println!( - " {} {}{} {}", + println!( + " {} {}{} {} {}", style(format!("!{:<5}", mr.iid)).cyan(), truncate_str(&mr.title, 40), style(draft).dim(), style(format_relative_time(mr.updated_at)).dim(), + style(&mr.project_path).dim(), ); @@ print_overlap_human @@ - let mr_str = user.mr_iids.iter().take(5).map(|iid| format!("!{iid}")).collect::>().join(", "); + let mr_str = user.mr_refs.iter().take(5).cloned().collect::>().join(", "); Change 8 — Robot JSON: add stable IDs + “defaulted” flags for reproducibility Why You already added resolved_input — good. Two more reproducibility gaps remain: Agents can’t reliably “open” an entity without IDs (discussion_id, mr_id, issue_id). Agents can’t tell whether since was user-provided vs defaulted (important when replaying intent). Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ WhoResolvedInput @@ pub struct WhoResolvedInput { @@ pub since_ms: Option, pub since_iso: Option, + pub since_was_default: bool, pub limit: usize, } @@ run_who @@ - let since_ms = resolve_since(args.since.as_deref(), "6m")?; + let since_was_default = args.since.is_none(); + let since_ms = resolve_since(args.since.as_deref(), "6m")?; Ok(WhoRun { resolved_input: WhoResolvedInput { @@ since_ms: Some(since_ms), since_iso: Some(ms_to_iso(since_ms)), + since_was_default, limit: args.limit, }, @@ print_who_json resolved_input @@ let resolved_input = serde_json::json!({ @@ "since_ms": run.resolved_input.since_ms, "since_iso": run.resolved_input.since_iso, + "since_was_default": run.resolved_input.since_was_default, "limit": run.resolved_input.limit, }); And for Active/Workload discussion items, add IDs in SQL and JSON: diff Copy code @@ ActiveDiscussion @@ pub struct ActiveDiscussion { + pub discussion_id: i64, @@ } @@ query_active SELECT @@ - SELECT - p.noteable_type, + SELECT + p.id AS discussion_id, + p.noteable_type, @@ active_to_json @@ - "discussions": r.discussions.iter().map(|d| json!({ + "discussions": r.discussions.iter().map(|d| json!({ + "discussion_id": d.discussion_id, ... })) Change 9 — Make performance verification explicit: require EXPLAIN QUERY PLAN checks for each mode Why You’re adding indexes specifically for these queries. The only way to ensure the planner is doing what you think is to lock in a short perf checklist (especially after schema drift or SQLite version differences). Diff diff Copy code --- a/who-command-design.md +++ b/who-command-design.md @@ Verification @@ # Manual verification against real data cargo run --release -- who src/features/global-search/ @@ cargo run --release -- who src/features/global-search/ -p typescript # project scoped + +# Perf verification (required before merge): +# Confirm idx_notes_diffnote_path_created is used for Expert/Overlap and +# idx_discussions_unresolved_recent is used for Active. +sqlite3 path/to/db.sqlite " + EXPLAIN QUERY PLAN + SELECT ... -- paste final Expert SQL with representative bindings +"; (Keep it lightweight: one representative query per mode is enough.) Net effect Correctness: project scoping hits the notes index; IDs added for agent workflows. Performance: fewer rows/materialization in Expert; statement caching everywhere; Active avoids correlated subqueries. UX: human output no longer ambiguous across projects; Overlap MR references become actionable. Reproducibility: agents can distinguish defaults vs explicit inputs; can dereference entities reliably. If you want one “highest ROI” subset to implement first: Change 1 + Change 4 + Change 6 + Change 7. That’s where the real operational value lands.