diff --git a/docs/who-command-design.feedback-1.md b/docs/who-command-design.feedback-1.md new file mode 100644 index 0000000..b84728b --- /dev/null +++ b/docs/who-command-design.feedback-1.md @@ -0,0 +1,434 @@ +Below are the highest-leverage revisions I’d make to this plan. I’m focusing on correctness pitfalls, SQLite gotchas, query performance on 280K notes, and reducing “dynamic SQL + param juggling” complexity—without turning this into a new ingestion project. + +Change 1 — Fix a hard SQLite bug in --active (GROUP_CONCAT DISTINCT + separator) +Why + +SQLite does not allow GROUP_CONCAT(DISTINCT x, sep). With DISTINCT, SQLite only permits a single argument (GROUP_CONCAT(DISTINCT x)). Your current query will error at runtime in many SQLite versions. + +Revision + +Use a subquery that selects distinct participants, then GROUP_CONCAT with your separator. + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ fn query_active(...) +- (SELECT GROUP_CONCAT(DISTINCT n.author_username, X'1F') +- FROM notes n +- WHERE n.discussion_id = d.id +- AND n.is_system = 0 +- AND n.author_username IS NOT NULL) AS participants ++ (SELECT GROUP_CONCAT(username, X'1F') FROM ( ++ SELECT DISTINCT n.author_username AS username ++ FROM notes n ++ WHERE n.discussion_id = d.id ++ AND n.is_system = 0 ++ AND n.author_username IS NOT NULL ++ ORDER BY username ++ )) AS participants + +Change 2 — Replace “contains('.') => exact file match” with segment-aware path classification +Why + +path.contains('.') misclassifies directories like: + +.github/workflows/ + +src/v1.2/auth/ + +It also fails the “root file” case (README.md) because your mode discriminator only treats paths as paths if they contain /. + +Revision + +Add explicit --path to force Expert mode (covers root files cleanly). + +Classify file-vs-dir by checking last path segment for a dot, and whether the input ends with /. + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ pub struct WhoArgs { +- /// Username or file path (path if contains /) +- pub target: Option, ++ /// Username or file path shorthand (ambiguous for root files like README.md) ++ pub target: Option, ++ ++ /// Force expert mode for a file/directory path (supports root files like README.md) ++ #[arg(long, help_heading = "Mode", conflicts_with_all = ["active", "overlap", "reviews"])] ++ pub path: Option, +@@ fn resolve_mode<'a>(args: &'a WhoArgs) -> Result> { +- if let Some(target) = &args.target { ++ if let Some(p) = &args.path { ++ return Ok(WhoMode::Expert { path: p }); ++ } ++ if let Some(target) = &args.target { + let clean = target.strip_prefix('@').unwrap_or(target); + if args.reviews { + return Ok(WhoMode::Reviews { username: clean }); + } +- // Disambiguation: if target contains '/', it's a file path. +- // GitLab usernames never contain '/'. +- if target.contains('/') { ++ // Disambiguation: ++ // - treat as path if it contains '/' ++ // - otherwise treat as username (root files require --path) ++ if target.contains('/') { + return Ok(WhoMode::Expert { path: target }); + } + return Ok(WhoMode::Workload { username: clean }); + } + + +And update the path pattern logic used by Expert/Overlap: + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ fn query_expert(...) +- // Normalize path for LIKE matching: add trailing % if no extension +- let path_pattern = if path.contains('.') { +- path.to_string() // Exact file match +- } else { +- let trimmed = path.trim_end_matches('/'); +- format!("{trimmed}/%") +- }; ++ // Normalize: ++ // - if ends_with('/') => directory prefix ++ // - else if last segment contains '.' => file exact match ++ // - else => directory prefix ++ let trimmed = path.trim_end_matches('/'); ++ let last = trimmed.rsplit('/').next().unwrap_or(trimmed); ++ let is_file = !path.ends_with('/') && last.contains('.'); ++ let path_pattern = if is_file { trimmed.to_string() } else { format!("{trimmed}/%") }; + +Change 3 — Stop building dynamic SQL strings for optional filters; always bind params +Why + +Right now you’re mixing: + +dynamic project_clause string fragments + +ad-hoc param vectors + +placeholder renumbering by branch + +That’s brittle and easy to regress (especially when you add more conditions later). SQLite/rusqlite can bind Option to NULL, which enables a simple pattern: + +sql +Copy code +AND (?3 IS NULL OR n.project_id = ?3) + +Revision (representative; apply to all queries) +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ fn query_expert(...) +- let project_clause = if project_id.is_some() { +- "AND n.project_id = ?3" +- } else { +- "" +- }; +- +- let sql = format!( ++ let sql = format!( + "SELECT username, role, activity_count, last_active_at FROM ( +@@ + FROM notes n + WHERE n.position_new_path LIKE ?1 + AND n.is_system = 0 + AND n.author_username IS NOT NULL + AND n.created_at >= ?2 +- {project_clause} ++ AND (?3 IS NULL OR n.project_id = ?3) +@@ + WHERE n.position_new_path LIKE ?1 + AND m.author_username IS NOT NULL + AND m.updated_at >= ?2 +- {project_clause} ++ AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY m.author_username +- )" ++ ) t" + ); +- +- let mut params: Vec> = Vec::new(); +- params.push(Box::new(path_pattern.clone())); +- params.push(Box::new(since_ms)); +- if let Some(pid) = project_id { +- params.push(Box::new(pid)); +- } +- let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect(); ++ let param_refs = rusqlite::params![path_pattern, since_ms, project_id]; + + +Notes: + +Adds required derived-table alias t (some SQLite configurations are stricter). + +Eliminates the dynamic param vector and placeholder gymnastics. + +Change 4 — Filter “path touch” queries to DiffNotes and escape LIKE properly +Why + +Only DiffNotes reliably have position_new_path; including other note types can skew counts and harm performance. + +LIKE treats % and _ as wildcards—rare in file paths, but not impossible (generated files, templates). Escaping is a low-cost robustness win. + +Revision + +Add note_type='DiffNote' and LIKE ... ESCAPE '\' plus a tiny escape helper. + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ fn query_expert(...) +- FROM notes n +- WHERE n.position_new_path LIKE ?1 ++ FROM notes n ++ WHERE n.note_type = 'DiffNote' ++ AND n.position_new_path LIKE ?1 ESCAPE '\' + AND n.is_system = 0 +@@ +diff --git a/Plan.md b/Plan.md +@@ Helper Functions ++fn escape_like(input: &str) -> String { ++ input.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_") ++} + + +And when building patterns: + +diff +Copy code +- let path_pattern = if is_file { trimmed.to_string() } else { format!("{trimmed}/%") }; ++ let base = escape_like(trimmed); ++ let path_pattern = if is_file { base } else { format!("{base}/%") }; + + +Apply the same changes to query_overlap and any other position_new_path LIKE .... + +Change 5 — Use note timestamps for “touch since” semantics (Expert/Overlap author branch) +Why + +In Expert/Overlap “author” branches you filter by m.updated_at >= since. That answers “MR updated recently” rather than “MR touched at this path recently”, which can surface stale ownership. + +Revision + +Filter by the note creation time (and use it for “last touch” where relevant). You can still compute author activity, but anchor it to note activity. + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ fn query_overlap(...) +- WHERE n.position_new_path LIKE ?1 ++ WHERE n.note_type = 'DiffNote' ++ AND n.position_new_path LIKE ?1 ESCAPE '\' + AND m.state IN ('opened', 'merged') + AND m.author_username IS NOT NULL +- AND m.updated_at >= ?2 ++ AND n.created_at >= ?2 + AND (?3 IS NULL OR m.project_id = ?3) + + +Same idea in Expert mode’s “MR authors” branch. + +Change 6 — Workload mode: apply --since consistently to unresolved discussions +Why + +Workload’s unresolved discussions ignore since_ms. That makes --since partially misleading and can dump very old threads. + +Revision + +Filter on d.last_note_at when since_ms is set. + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ fn query_workload(...) +- let disc_sql = format!( ++ let disc_since = if since_ms.is_some() { ++ "AND d.last_note_at >= ?2" ++ } else { "" }; ++ let disc_sql = format!( + "SELECT d.noteable_type, +@@ + WHERE d.resolvable = 1 AND d.resolved = 0 + AND EXISTS ( +@@ + ) + {disc_project_filter} ++ {disc_since} + ORDER BY d.last_note_at DESC + LIMIT {limit}" + ); +@@ +- // Rebuild params for discussion query (only username + optional project_id) +- let mut disc_params: Vec> = Vec::new(); +- disc_params.push(Box::new(username.to_string())); +- if let Some(pid) = project_id { +- disc_params.push(Box::new(pid)); +- } ++ // Params: username, since_ms, project_id (NULLs ok) ++ let disc_param_refs = rusqlite::params![username, since_ms, project_id]; + + +(If you adopt Change 3 fully, this becomes very clean.) + +Change 7 — Make Overlap results represent “both roles” instead of collapsing to one +Why + +Collapsing to a single role loses valuable info (“they authored and reviewed”). Also your current “prefer author” rule is arbitrary for the “who else is touching this” question. + +Revision + +Track role counts separately and render as A, R, or A+R. + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ pub struct OverlapUser { + pub username: String, +- pub role: String, +- pub touch_count: u32, ++ pub author_touch_count: u32, ++ pub review_touch_count: u32, ++ pub touch_count: u32, + pub last_touch_at: i64, + pub mr_iids: Vec, + } +@@ fn query_overlap(...) +- let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser { ++ let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser { + username: username.clone(), +- role: role.clone(), ++ author_touch_count: 0, ++ review_touch_count: 0, + touch_count: 0, + last_touch_at: 0, + mr_iids: Vec::new(), + }); + entry.touch_count += count; ++ if role == "author" { entry.author_touch_count += count; } ++ if role == "reviewer" { entry.review_touch_count += count; } +@@ human output +- println!( +- " {:<16} {:<8} {:>7} {:<12} {}", ++ println!( ++ " {:<16} {:<6} {:>7} {:<12} {}", + ... + ); +@@ +- user.role, ++ format_roles(user.author_touch_count, user.review_touch_count), + +Change 8 — Add an “Index Audit + optional migration” step (big perf win, low blast radius) +Why + +With 280K notes, the path/timestamp queries will degrade quickly without indexes. This isn’t “scope creep”; it’s making the feature usable. + +Revision (plan-level) + +Add a non-breaking migration that only creates indexes if missing. + +Optionally add a runtime check: if EXPLAIN QUERY PLAN indicates full table scan on notes, print a dim warning in human mode. + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ Implementation Order +-| Step | What | Files | ++| Step | What | Files | + | 1 | CLI skeleton: `WhoArgs` + `Commands::Who` + dispatch + stub | `cli/mod.rs`, `commands/mod.rs`, `main.rs` | ++| 1.5 | Index audit + add `CREATE INDEX IF NOT EXISTS` migration for who hot paths | `migrations/0xx_who_indexes.sql` | +@@ + + +Suggested indexes (tune names to your conventions): + +notes(note_type, position_new_path, created_at) + +notes(discussion_id, is_system, author_username) + +discussions(resolvable, resolved, last_note_at, project_id) + +merge_requests(project_id, state, updated_at, author_username) + +issue_assignees(username, issue_id) + +Even if SQLite can’t perfectly index LIKE, these still help with join and timestamp filters. + +Change 9 — Make robot JSON reproducible by echoing the effective query inputs +Why + +Agent workflows benefit from a stable “query record”: what mode ran, what path/user, resolved project, effective since, limit. + +Revision + +Include an input object in JSON output. + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ struct WhoJsonData { + mode: String, ++ input: serde_json::Value, + #[serde(flatten)] + result: serde_json::Value, + } +@@ pub fn print_who_json(...) +- let output = WhoJsonEnvelope { ++ let input = serde_json::json!({ ++ "project": /* resolved or raw args.project */, ++ "since": /* resolved since ISO */, ++ "limit": /* args.limit */, ++ }); ++ let output = WhoJsonEnvelope { + ok: true, + data: WhoJsonData { + mode: mode.to_string(), ++ input, + result: data, + }, + meta: RobotMeta { elapsed_ms }, + }; + +Change 10 — Tighten clap constraints so invalid combinations never reach resolve_mode +Why + +Right now conflicts are enforced manually (or not at all). Clamp the invalid combos at the CLI layer: + +--active should conflict with target, --overlap, --reviews, --path + +--reviews should require a username (and should conflict with Expert path modes) + +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ pub struct WhoArgs { +- pub active: bool, ++ #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "overlap", "reviews", "path"])] ++ pub active: bool, +@@ +- pub overlap: Option, ++ #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "active", "reviews", "path"])] ++ pub overlap: Option, +@@ +- pub reviews: bool, ++ #[arg(long, help_heading = "Mode", requires = "target", conflicts_with_all = ["active", "overlap", "path"])] ++ pub reviews: bool, + +Summary of what I’d definitely change + +If you do nothing else, do these first: + +Fix GROUP_CONCAT(DISTINCT ..., sep) in Active mode (runtime error). + +Path classification: add --path, and stop using contains('.') globally. + +Remove dynamic SQL + param vectors: always bind project_id as nullable and use (? IS NULL OR ...). + +Filter to DiffNotes + LIKE escaping for correctness and fewer rows scanned. + +Optional index migration: otherwise this will feel slow/non-deterministically slow depending on local DB state. + +If you want, I can also provide a consolidated “v2 plan” as a single unified patch (one diff) rather than per-change snippets. \ No newline at end of file diff --git a/docs/who-command-design.feedback-2.md b/docs/who-command-design.feedback-2.md new file mode 100644 index 0000000..e708903 --- /dev/null +++ b/docs/who-command-design.feedback-2.md @@ -0,0 +1,303 @@ +Below are the highest-leverage revisions I’d make to iteration 1 to tighten correctness, performance, and “agent usefulness” without blowing up scope. For each change: (1) rationale, (2) a focused unified diff against the plan you pasted. + +Change 1 — Make robot “input echo” actually resolved (project_id, project_path, since_ms/iso, mode) +Why + +Your Design Principle #5 says the robot envelope should echo resolved inputs (“effective since, resolved project”), but the current input object echoes only raw CLI strings. Agents can’t reliably reproduce or compare runs (e.g., fuzzy project resolution may map differently over time). + +This is also a reliability improvement: “what ran” should be computed once and propagated, not recomputed in output. + +Plan diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ +-5. **Robot-first reproducibility.** Robot JSON output includes an `input` object echoing the resolved query parameters (effective since, resolved project, limit) so agents can trace exactly what ran. ++5. **Robot-first reproducibility.** Robot JSON output includes a `resolved_input` object (mode, since_ms + since_iso, resolved project_id + project_path, limit, db_path) so agents can trace exactly what ran. + +@@ +-/// Main entry point. Resolves mode from args and dispatches. +-pub fn run_who(config: &Config, args: &WhoArgs) -> Result { ++/// Main entry point. Resolves mode + resolved inputs once, then dispatches. ++pub fn run_who(config: &Config, args: &WhoArgs) -> Result { + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + +- let project_id = args ++ let project_id = args + .project + .as_deref() + .map(|p| resolve_project(&conn, p)) + .transpose()?; ++ let project_path = project_id ++ .map(|id| lookup_project_path(&conn, id)) ++ .transpose()?; + + let mode = resolve_mode(args)?; + + match mode { + WhoMode::Expert { path } => { + let since_ms = resolve_since(args.since.as_deref(), "6m")?; + let result = query_expert(&conn, path, project_id, since_ms, args.limit)?; +- Ok(WhoResult::Expert(result)) ++ Ok(WhoRun::new("expert", &db_path, project_id, project_path, since_ms, args.limit, WhoResult::Expert(result))) + } +@@ + } + } ++ ++/// Wrapper that carries resolved inputs for reproducible output. ++pub struct WhoRun { ++ pub mode: String, ++ pub resolved_input: WhoResolvedInput, ++ pub result: WhoResult, ++} ++ ++pub struct WhoResolvedInput { ++ pub db_path: String, ++ pub project_id: Option, ++ pub project_path: Option, ++ pub since_ms: i64, ++ pub since_iso: String, ++ pub limit: usize, ++} +@@ +-pub fn print_who_json(result: &WhoResult, args: &WhoArgs, elapsed_ms: u64) { +- let (mode, data) = match result { ++pub fn print_who_json(run: &WhoRun, args: &WhoArgs, elapsed_ms: u64) { ++ let (mode, data) = match &run.result { + WhoResult::Expert(r) => ("expert", expert_to_json(r)), +@@ +- let input = serde_json::json!({ ++ let input = serde_json::json!({ + "target": args.target, + "path": args.path, + "project": args.project, + "since": args.since, + "limit": args.limit, + }); ++ ++ let resolved_input = serde_json::json!({ ++ "mode": run.mode, ++ "db_path": run.resolved_input.db_path, ++ "project_id": run.resolved_input.project_id, ++ "project_path": run.resolved_input.project_path, ++ "since_ms": run.resolved_input.since_ms, ++ "since_iso": run.resolved_input.since_iso, ++ "limit": run.resolved_input.limit, ++ }); +@@ +- data: WhoJsonData { +- mode: mode.to_string(), +- input, +- result: data, +- }, ++ data: WhoJsonData { mode: mode.to_string(), input, resolved_input, result: data }, + meta: RobotMeta { elapsed_ms }, + }; +@@ + struct WhoJsonData { + mode: String, + input: serde_json::Value, ++ resolved_input: serde_json::Value, + #[serde(flatten)] + result: serde_json::Value, + } + +Change 2 — Remove dynamic SQL format!(..LIMIT {limit}) and parameterize LIMIT everywhere +Why + +You explicitly prefer static SQL ((?N IS NULL OR ...)) to avoid subtle bugs; but Workload/Active use format! for LIMIT. Even though limit is typed, it’s an inconsistency that complicates statement caching and encourages future string assembly creep. + +SQLite supports LIMIT ? with bound parameters; rusqlite can bind an i64. + +Plan diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ +- let issues_sql = format!( +- "SELECT ... +- ORDER BY i.updated_at DESC +- LIMIT {limit}" +- ); +- let mut stmt = conn.prepare(&issues_sql)?; ++ let issues_sql = ++ "SELECT ... ++ ORDER BY i.updated_at DESC ++ LIMIT ?4"; ++ let mut stmt = conn.prepare(issues_sql)?; + let assigned_issues: Vec = stmt +- .query_map(rusqlite::params![username, project_id, since_ms], |row| { ++ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| { +@@ +- let authored_sql = format!( +- "SELECT ... +- ORDER BY m.updated_at DESC +- LIMIT {limit}" +- ); +- let mut stmt = conn.prepare(&authored_sql)?; ++ let authored_sql = ++ "SELECT ... ++ ORDER BY m.updated_at DESC ++ LIMIT ?4"; ++ let mut stmt = conn.prepare(authored_sql)?; +@@ +- .query_map(rusqlite::params![username, project_id, since_ms], |row| { ++ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| { +@@ +- let reviewing_sql = format!( +- "SELECT ... +- ORDER BY m.updated_at DESC +- LIMIT {limit}" +- ); +- let mut stmt = conn.prepare(&reviewing_sql)?; ++ let reviewing_sql = ++ "SELECT ... ++ ORDER BY m.updated_at DESC ++ LIMIT ?4"; ++ let mut stmt = conn.prepare(reviewing_sql)?; +@@ +- .query_map(rusqlite::params![username, project_id, since_ms], |row| { ++ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| { +@@ +- let disc_sql = format!( +- "SELECT ... +- ORDER BY d.last_note_at DESC +- LIMIT {limit}" +- ); +- let mut stmt = conn.prepare(&disc_sql)?; ++ let disc_sql = ++ "SELECT ... ++ ORDER BY d.last_note_at DESC ++ LIMIT ?4"; ++ let mut stmt = conn.prepare(disc_sql)?; +@@ +- .query_map(rusqlite::params![username, project_id, since_ms], |row| { ++ .query_map(rusqlite::params![username, project_id, since_ms, limit as i64], |row| { +@@ +- let sql = format!( +- "SELECT ... +- ORDER BY d.last_note_at DESC +- LIMIT {limit}" +- ); +- let mut stmt = conn.prepare(&sql)?; ++ let sql = ++ "SELECT ... ++ ORDER BY d.last_note_at DESC ++ LIMIT ?3"; ++ let mut stmt = conn.prepare(sql)?; +@@ +- .query_map(rusqlite::params![since_ms, project_id], |row| { ++ .query_map(rusqlite::params![since_ms, project_id, limit as i64], |row| { + +Change 3 — Fix path matching for dotless files (LICENSE/Makefile) via “exact OR prefix” (no new flags) +Why + +Your improved “dot only in last segment” heuristic still fails on dotless files (LICENSE, Makefile, Dockerfile) which are common, especially at repo root. Right now they’ll be treated as directories (LICENSE/%) and silently return nothing. + +Best minimal UX: if user provides a path that’s ambiguous (no trailing slash), match either exact file OR directory prefix. + +Plan diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ +-/// Build a LIKE pattern from a user-supplied path, with proper LIKE escaping. +-/// +-/// Rules: +-/// - If the path ends with `/`, it's a directory prefix → `escaped_path%` +-/// - If the last path segment contains `.`, it's a file → exact match +-/// - Otherwise, it's a directory prefix → `escaped_path/%` ++/// Build an exact + prefix match from a user-supplied path, with proper LIKE escaping. ++/// ++/// Rules: ++/// - If the path ends with `/`, treat as directory-only (prefix match) ++/// - Otherwise, treat as ambiguous: exact match OR directory prefix ++/// (fixes dotless files like LICENSE/Makefile without requiring new flags) +@@ +-fn build_path_pattern(path: &str) -> String { ++struct PathMatch { ++ exact: String, ++ prefix: String, ++ dir_only: bool, ++} ++ ++fn build_path_match(path: &str) -> PathMatch { + let trimmed = path.trim_end_matches('/'); +- let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); +- let is_file = !path.ends_with('/') && last_segment.contains('.'); + let escaped = escape_like(trimmed); +- +- if is_file { +- escaped +- } else { +- format!("{escaped}/%") +- } ++ PathMatch { ++ exact: escaped.clone(), ++ prefix: format!("{escaped}/%"), ++ dir_only: path.ends_with('/'), ++ } + } +@@ +- let path_pattern = build_path_pattern(path); ++ let pm = build_path_match(path); +@@ +- AND n.position_new_path LIKE ?1 ESCAPE '\\' ++ AND ( ++ (?4 = 1 AND n.position_new_path LIKE ?2 ESCAPE '\\') ++ OR (?4 = 0 AND (n.position_new_path = ?1 OR n.position_new_path LIKE ?2 ESCAPE '\\')) ++ ) +@@ +- let rows: Vec<(String, String, u32, i64)> = stmt +- .query_map(rusqlite::params![path_pattern, since_ms, project_id], |row| { ++ let rows: Vec<(String, String, u32, i64)> = stmt ++ .query_map(rusqlite::params![pm.exact, pm.prefix, since_ms, i32::from(pm.dir_only), project_id], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) + })? + + +(Apply the same pattern to Overlap mode.) + +Change 4 — Consistently exclude system notes in all DiffNote-based branches (Expert/Overlap author branches currently don’t) +Why + +You filter n.is_system = 0 for reviewer branches, but not in the author branches of Expert/Overlap. That can skew “author touch” via system-generated diff notes or bot activity. + +Consistency here improves correctness and also enables more aggressive partial indexing. + +Plan diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ +- WHERE n.note_type = 'DiffNote' ++ WHERE n.note_type = 'DiffNote' + AND n.position_new_path LIKE ?1 ESCAPE '\\' ++ AND n.is_system = 0 + AND m.author_username IS NOT NULL + AND n.created_at >= ?2 + AND (?3 IS NULL OR m.project_id = ?3) +@@ +- WHERE n.note_type = 'DiffNote' ++ WHERE n.note_type = 'DiffNote' + AND n.position_new_path LIKE ?1 ESCAPE '\\' ++ AND n.is_system = 0 + AND m.state IN ('opened', 'merged') + AND m.author_username IS NOT NULL + AND n.created_at >= ?2 + AND (?3 IS NULL OR m.project_id = ?3) + +Change 5 — Rework Migration 017 indexes to match real predicates + add one critical notes index for discussion participation +Why + +(a) idx_notes_diffnote_path_created currently leads with note_type even though it’s constant via partial index. You want the leading columns to match your most selective predicates: position_new_path prefix + created_at range, with optional project_id. + +(b) Active + Workload discussion participation repeatedly hits notes by (discussion_id, author_username); you only guarantee notes(discussion_id) is indexed. Adding a narrow partial composite index pays off immediately for both “participants” and “EXISTS user participated” checks. + +(c) The discussions index should focus on (project_id, last_note_at) with a partial predicate; resolvable/resolved a_ \ No newline at end of file diff --git a/docs/who-command-design.feedback-3.md b/docs/who-command-design.feedback-3.md new file mode 100644 index 0000000..5d03da0 --- /dev/null +++ b/docs/who-command-design.feedback-3.md @@ -0,0 +1,471 @@ +Below are the revisions I’d make to iteration 2 to improve correctness, determinism, query-plan quality, and multi-project usability without turning this into a bigger product. + +I’m treating your plan as the “source of truth” and showing git-diff style patches against the plan text/code blocks you included. + +Change 1 — Fix project scoping to hit the right index (DiffNote branches) +Why + +Your hot-path index is: + +idx_notes_diffnote_path_created ON notes(position_new_path, created_at, project_id) WHERE note_type='DiffNote' AND is_system=0 + +But in Expert/Overlap you sometimes scope by m.project_id = ?3 (MR table), not n.project_id = ?3 (notes table). That weakens the optimizer’s ability to use the composite notes index (and can force broader joins before filtering). + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ Query: Expert Mode @@ +- AND (?3 IS NULL OR m.project_id = ?3) ++ -- IMPORTANT: scope on notes.project_id to maximize use of ++ -- idx_notes_diffnote_path_created (notes is the selective table) ++ AND (?3 IS NULL OR n.project_id = ?3) + +@@ Query: Overlap Mode @@ +- AND (?3 IS NULL OR m.project_id = ?3) ++ AND (?3 IS NULL OR n.project_id = ?3) + +@@ Query: Overlap Mode (author branch) @@ +- AND (?3 IS NULL OR m.project_id = ?3) ++ AND (?3 IS NULL OR n.project_id = ?3) + +Change 2 — Introduce a “prefix vs exact” path query to avoid LIKE when you don’t need it +Why + +For exact file paths (e.g. src/auth/login.rs), you currently do: + +position_new_path LIKE ?1 ESCAPE '\' where ?1 has no wildcard + +That’s logically fine, but it’s a worse signal to the planner than = and can degrade performance depending on collation/case settings. + +This doesn’t violate “static SQL” — you can pick between two static query strings. + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ Helper: Path Pattern Construction @@ +-fn build_path_pattern(path: &str) -> String { ++struct PathQuery { ++ /// The parameter value to bind. ++ value: String, ++ /// If true: use LIKE value || '%'. If false: use '='. ++ is_prefix: bool, ++} ++ ++fn build_path_query(path: &str) -> PathQuery { + let trimmed = path.trim_end_matches('/'); + let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); + let is_file = !path.ends_with('/') && last_segment.contains('.'); + let escaped = escape_like(trimmed); + + if is_file { +- escaped ++ PathQuery { value: escaped, is_prefix: false } + } else { +- format!("{escaped}/%") ++ PathQuery { value: format!("{escaped}/%"), is_prefix: true } + } + } + + +And then (example for DiffNote predicates): + +diff +Copy code +@@ Query: Expert Mode @@ +- let path_pattern = build_path_pattern(path); ++ let pq = build_path_query(path); + +- let sql = " ... n.position_new_path LIKE ?1 ESCAPE '\\' ... "; ++ let sql_prefix = " ... n.position_new_path LIKE ?1 ESCAPE '\\' ... "; ++ let sql_exact = " ... n.position_new_path = ?1 ... "; + +- let mut stmt = conn.prepare(sql)?; ++ let mut stmt = if pq.is_prefix { conn.prepare_cached(sql_prefix)? } ++ else { conn.prepare_cached(sql_exact)? }; + let rows = stmt.query_map(params![... pq.value ...], ...); + +Change 3 — Push Expert aggregation into SQL (less Rust, fewer rows, SQL-level LIMIT) +Why + +Right now Expert does: + +UNION ALL + +return per-role rows + +HashMap merge + +score compute + +sort/truncate + +You can do all of that in SQL deterministically, then LIMIT ?N actually works. + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ Query: Expert Mode @@ +- let sql = "SELECT username, role, activity_count, last_active_at FROM ( +- ... +- )"; ++ let sql = " ++ WITH activity AS ( ++ SELECT ++ n.author_username AS username, ++ 'reviewer' AS role, ++ COUNT(*) AS cnt, ++ MAX(n.created_at) AS last_active_at ++ FROM notes n ++ WHERE n.note_type = 'DiffNote' ++ AND n.is_system = 0 ++ AND n.author_username IS NOT NULL ++ AND n.created_at >= ?2 ++ AND (?3 IS NULL OR n.project_id = ?3) ++ AND ( ++ (?4 = 1 AND n.position_new_path LIKE ?1 ESCAPE '\\') OR ++ (?4 = 0 AND n.position_new_path = ?1) ++ ) ++ GROUP BY n.author_username ++ ++ UNION ALL ++ ++ SELECT ++ m.author_username AS username, ++ 'author' AS role, ++ COUNT(DISTINCT m.id) AS cnt, ++ MAX(n.created_at) AS last_active_at ++ FROM merge_requests m ++ JOIN discussions d ON d.merge_request_id = m.id ++ JOIN notes n ON n.discussion_id = d.id ++ WHERE n.note_type = 'DiffNote' ++ AND n.is_system = 0 ++ AND m.author_username IS NOT NULL ++ AND n.created_at >= ?2 ++ AND (?3 IS NULL OR n.project_id = ?3) ++ AND ( ++ (?4 = 1 AND n.position_new_path LIKE ?1 ESCAPE '\\') OR ++ (?4 = 0 AND n.position_new_path = ?1) ++ ) ++ GROUP BY m.author_username ++ ) ++ SELECT ++ username, ++ SUM(CASE WHEN role='reviewer' THEN cnt ELSE 0 END) AS review_count, ++ SUM(CASE WHEN role='author' THEN cnt ELSE 0 END) AS author_count, ++ MAX(last_active_at) AS last_active_at, ++ (SUM(CASE WHEN role='reviewer' THEN cnt ELSE 0 END) * 3.0) + ++ (SUM(CASE WHEN role='author' THEN cnt ELSE 0 END) * 2.0) AS score ++ FROM activity ++ GROUP BY username ++ ORDER BY score DESC, last_active_at DESC, username ASC ++ LIMIT ?5 ++ "; + +- // Aggregate by username: combine reviewer + author counts +- let mut user_map: HashMap<...> = HashMap::new(); +- ... +- experts.sort_by(...); experts.truncate(limit); ++ // No Rust-side merge/sort needed; SQL already returns final rows. + +Change 4 — Overlap output is ambiguous across projects: include stable MR refs (project_path!iid) +Why + +mr_iids: Vec is ambiguous in a multi-project DB. !123 only means something with a project. + +Also: your MR IID dedup is currently Vec.contains() inside a loop (O(n²)). Use a HashSet. + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ OverlapResult @@ + pub struct OverlapUser { + pub username: String, +@@ +- pub mr_iids: Vec, ++ /// Stable MR references like "group/project!123" ++ pub mr_refs: Vec, + } + +@@ Query: Overlap Mode (SQL) @@ +- GROUP_CONCAT(DISTINCT m.iid) AS mr_iids ++ GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id ++ JOIN projects p ON m.project_id = p.id +@@ +- GROUP_CONCAT(DISTINCT m.iid) AS mr_iids ++ GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM merge_requests m + JOIN discussions d ON d.merge_request_id = m.id + JOIN notes n ON n.discussion_id = d.id ++ JOIN projects p ON m.project_id = p.id + +@@ Query: Overlap Mode (Rust merge) @@ +- let mr_iids: Vec = mr_iids_csv ... ++ let mr_refs: Vec = mr_refs_csv ++ .as_deref() ++ .map(|csv| csv.split(',').map(|s| s.trim().to_string()).collect()) ++ .unwrap_or_default(); +@@ +- // Merge MR IIDs, deduplicate +- for iid in &mr_iids { +- if !entry.mr_iids.contains(iid) { +- entry.mr_iids.push(*iid); +- } +- } ++ // Merge MR refs, deduplicate ++ use std::collections::HashSet; ++ let mut set: HashSet = entry.mr_refs.drain(..).collect(); ++ for r in mr_refs { set.insert(r); } ++ entry.mr_refs = set.into_iter().collect(); + +Change 5 — Active mode: avoid correlated subqueries by preselecting discussions, then aggregating notes once +Why + +Your Active query does two correlated subqueries per discussion row: + +note_count + +participants + +With LIMIT 20 it’s not catastrophic, but it is still unnecessary work and creates “spiky” behavior if the planner chooses poorly. + +Pattern to use: + +CTE selects the limited set of discussions + +Join notes once, aggregate with GROUP BY + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ Query: Active Mode @@ +- let sql = +- "SELECT +- d.noteable_type, +- ... +- (SELECT COUNT(*) FROM notes n +- WHERE n.discussion_id = d.id AND n.is_system = 0) AS note_count, +- (SELECT GROUP_CONCAT(username, X'1F') FROM ( +- SELECT DISTINCT n.author_username AS username +- FROM notes n +- WHERE n.discussion_id = d.id +- AND n.is_system = 0 +- AND n.author_username IS NOT NULL +- ORDER BY username +- )) AS participants +- FROM discussions d +- ... +- LIMIT ?3"; ++ let sql = " ++ WITH picked AS ( ++ SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, d.project_id, d.last_note_at ++ FROM discussions d ++ WHERE d.resolvable = 1 AND d.resolved = 0 ++ AND d.last_note_at >= ?1 ++ AND (?2 IS NULL OR d.project_id = ?2) ++ ORDER BY d.last_note_at DESC ++ LIMIT ?3 ++ ), ++ note_agg AS ( ++ SELECT ++ n.discussion_id, ++ COUNT(*) AS note_count, ++ GROUP_CONCAT(n.author_username, X'1F') AS participants ++ FROM ( ++ SELECT DISTINCT discussion_id, author_username ++ FROM notes ++ WHERE is_system = 0 AND author_username IS NOT NULL ++ ) n ++ JOIN picked p ON p.id = n.discussion_id ++ GROUP BY n.discussion_id ++ ) ++ SELECT ++ p.noteable_type, ++ COALESCE(i.iid, m.iid) AS entity_iid, ++ COALESCE(i.title, m.title) AS entity_title, ++ proj.path_with_namespace, ++ p.last_note_at, ++ COALESCE(na.note_count, 0) AS note_count, ++ COALESCE(na.participants, '') AS participants ++ FROM picked p ++ JOIN projects proj ON p.project_id = proj.id ++ LEFT JOIN issues i ON p.issue_id = i.id ++ LEFT JOIN merge_requests m ON p.merge_request_id = m.id ++ LEFT JOIN note_agg na ON na.discussion_id = p.id ++ ORDER BY p.last_note_at DESC ++ "; + +Change 6 — Use prepare_cached() everywhere (cheap perf win, no scope creep) +Why + +You already worked hard to keep SQL static. Taking advantage of sqlite statement caching completes the loop. + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ Query functions @@ +- let mut stmt = conn.prepare(sql)?; ++ let mut stmt = conn.prepare_cached(sql)?; + + +Apply in all query fns (query_workload, query_reviews, query_active, query_expert, query_overlap, lookup_project_path). + +Change 7 — Human output: show project_path where ambiguity exists (Workload + Overlap) +Why + +When not project-scoped, #42 and !100 aren’t unique. You already have project paths in the query results — you’re just not printing them. + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ print_workload_human @@ +- println!( +- " {} {} {}", ++ println!( ++ " {} {} {} {}", + style(format!("#{:<5}", item.iid)).cyan(), + truncate_str(&item.title, 45), + style(format_relative_time(item.updated_at)).dim(), ++ style(&item.project_path).dim(), + ); + +@@ print_workload_human (MRs) @@ +- println!( +- " {} {}{} {}", ++ println!( ++ " {} {}{} {} {}", + style(format!("!{:<5}", mr.iid)).cyan(), + truncate_str(&mr.title, 40), + style(draft).dim(), + style(format_relative_time(mr.updated_at)).dim(), ++ style(&mr.project_path).dim(), + ); + +@@ print_overlap_human @@ +- let mr_str = user.mr_iids.iter().take(5).map(|iid| format!("!{iid}")).collect::>().join(", "); ++ let mr_str = user.mr_refs.iter().take(5).cloned().collect::>().join(", "); + +Change 8 — Robot JSON: add stable IDs + “defaulted” flags for reproducibility +Why + +You already added resolved_input — good. Two more reproducibility gaps remain: + +Agents can’t reliably “open” an entity without IDs (discussion_id, mr_id, issue_id). + +Agents can’t tell whether since was user-provided vs defaulted (important when replaying intent). + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ WhoResolvedInput @@ + pub struct WhoResolvedInput { +@@ + pub since_ms: Option, + pub since_iso: Option, ++ pub since_was_default: bool, + pub limit: usize, + } + +@@ run_who @@ +- let since_ms = resolve_since(args.since.as_deref(), "6m")?; ++ let since_was_default = args.since.is_none(); ++ let since_ms = resolve_since(args.since.as_deref(), "6m")?; + Ok(WhoRun { + resolved_input: WhoResolvedInput { +@@ + since_ms: Some(since_ms), + since_iso: Some(ms_to_iso(since_ms)), ++ since_was_default, + limit: args.limit, + }, + +@@ print_who_json resolved_input @@ + let resolved_input = serde_json::json!({ +@@ + "since_ms": run.resolved_input.since_ms, + "since_iso": run.resolved_input.since_iso, ++ "since_was_default": run.resolved_input.since_was_default, + "limit": run.resolved_input.limit, + }); + + +And for Active/Workload discussion items, add IDs in SQL and JSON: + +diff +Copy code +@@ ActiveDiscussion @@ + pub struct ActiveDiscussion { ++ pub discussion_id: i64, +@@ + } + +@@ query_active SELECT @@ +- SELECT +- p.noteable_type, ++ SELECT ++ p.id AS discussion_id, ++ p.noteable_type, + +@@ active_to_json @@ +- "discussions": r.discussions.iter().map(|d| json!({ ++ "discussions": r.discussions.iter().map(|d| json!({ ++ "discussion_id": d.discussion_id, + ... + })) + +Change 9 — Make performance verification explicit: require EXPLAIN QUERY PLAN checks for each mode +Why + +You’re adding indexes specifically for these queries. The only way to ensure the planner is doing what you think is to lock in a short perf checklist (especially after schema drift or SQLite version differences). + +Diff +diff +Copy code +--- a/who-command-design.md ++++ b/who-command-design.md +@@ Verification @@ + # Manual verification against real data + cargo run --release -- who src/features/global-search/ +@@ + cargo run --release -- who src/features/global-search/ -p typescript # project scoped ++ ++# Perf verification (required before merge): ++# Confirm idx_notes_diffnote_path_created is used for Expert/Overlap and ++# idx_discussions_unresolved_recent is used for Active. ++sqlite3 path/to/db.sqlite " ++ EXPLAIN QUERY PLAN ++ SELECT ... -- paste final Expert SQL with representative bindings ++"; + + +(Keep it lightweight: one representative query per mode is enough.) + +Net effect + +Correctness: project scoping hits the notes index; IDs added for agent workflows. + +Performance: fewer rows/materialization in Expert; statement caching everywhere; Active avoids correlated subqueries. + +UX: human output no longer ambiguous across projects; Overlap MR references become actionable. + +Reproducibility: agents can distinguish defaults vs explicit inputs; can dereference entities reliably. + +If you want one “highest ROI” subset to implement first: Change 1 + Change 4 + Change 6 + Change 7. That’s where the real operational value lands. \ No newline at end of file diff --git a/docs/who-command-design.feedback-4.md b/docs/who-command-design.feedback-4.md new file mode 100644 index 0000000..802fb1e --- /dev/null +++ b/docs/who-command-design.feedback-4.md @@ -0,0 +1,3 @@ +ChatGPT said: +Proposing code revisions for performance and determinism +Answer now \ No newline at end of file diff --git a/docs/who-command-design.feedback-5.md b/docs/who-command-design.feedback-5.md new file mode 100644 index 0000000..6686dbf --- /dev/null +++ b/docs/who-command-design.feedback-5.md @@ -0,0 +1,356 @@ +Below are the highest-leverage revisions I’d make. They’re tightly scoped (no new tables/APIs), but fix a few real correctness issues and make the outputs more actionable. + +1) Fix a correctness bug in PathQuery: don’t escape for =, and make --path Makefile actually work +Why + +Bug: build_path_query() currently runs escape_like() even when is_prefix = false (exact match). That will break exact matches for paths containing _, %, or \ because = does not treat those as metacharacters (so the escaped string won’t equal the stored path). + +UX mismatch: The plan says --path handles dotless root files (Makefile/LICENSE), but the current logic still treats them as directory prefixes (Makefile/%) → zero results. + +Change + +Only escape for LIKE. + +Treat root paths (no /) passed via --path as exact matches by default (unless they end with /). + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ +-/// Build a path query from a user-supplied path. +-/// +-/// Rules: +-/// - If the path ends with `/`, it's a directory prefix -> `escaped_path%` (LIKE) +-/// - If the last path segment contains `.`, it's a file -> exact match (=) +-/// - Otherwise, it's a directory prefix -> `escaped_path/%` (LIKE) ++/// Build a path query from a user-supplied path. ++/// ++/// Rules: ++/// - If the path ends with `/`, it's a directory prefix -> `escaped_path/%` (LIKE) ++/// - If the path is a root path (no `/`) and does NOT end with `/`, treat as exact (=) ++/// (this makes `--path Makefile` and `--path LICENSE` work as intended) ++/// - Else if the last path segment contains `.`, treat as exact (=) ++/// - Otherwise, treat as directory prefix -> `escaped_path/%` (LIKE) +@@ +-fn build_path_query(path: &str) -> PathQuery { ++fn build_path_query(path: &str) -> PathQuery { + let trimmed = path.trim_end_matches('/'); + let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); +- let is_file = !path.ends_with('/') && last_segment.contains('.'); +- let escaped = escape_like(trimmed); ++ let is_root = !trimmed.contains('/'); ++ let is_file = !path.ends_with('/') && (is_root || last_segment.contains('.')); + + if is_file { + PathQuery { +- value: escaped, ++ // IMPORTANT: do NOT escape for exact match (=) ++ value: trimmed.to_string(), + is_prefix: false, + } + } else { ++ let escaped = escape_like(trimmed); + PathQuery { + value: format!("{escaped}/%"), + is_prefix: true, + } + } + } +@@ +-/// **Known limitation:** Dotless root files (LICENSE, Makefile, Dockerfile) +-/// without a trailing `/` will be treated as directory prefixes. Use `--path` +-/// for these — the `--path` flag passes through to Expert mode directly, +-/// and the `build_path_query` output for "LICENSE" is a prefix `LICENSE/%` +-/// which will simply return zero results (a safe, obvious failure mode that the +-/// help text addresses). ++/// Note: Root file paths passed via `--path` (including dotless files like Makefile/LICENSE) ++/// are treated as exact matches unless they end with `/`. + + +Also update the --path help text to be explicit: + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ +- /// Force expert mode for a file/directory path (handles root files like +- /// README.md, LICENSE, Makefile that lack a / and can't be auto-detected) ++ /// Force expert mode for a file/directory path. ++ /// Root files (README.md, LICENSE, Makefile) are treated as exact matches. ++ /// Use a trailing `/` to force directory-prefix matching. + +2) Fix Active mode: your note_count is currently counting participants, and the CTE scans too broadly +Why + +In note_agg, you do SELECT DISTINCT discussion_id, author_username and then COUNT(*) AS note_count. That’s participant count, not note count. + +The current note_agg also builds the DISTINCT set from all notes then joins to picked. It’s avoidable work. + +Change + +Split into two aggregations scoped to picked: + +note_counts: counts non-system notes per picked discussion. + +participants: distinct usernames per picked discussion, then GROUP_CONCAT. + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ +- note_agg AS ( +- SELECT +- n.discussion_id, +- COUNT(*) AS note_count, +- GROUP_CONCAT(n.author_username, X'1F') AS participants +- FROM ( +- SELECT DISTINCT discussion_id, author_username +- FROM notes +- WHERE is_system = 0 AND author_username IS NOT NULL +- ) n +- JOIN picked p ON p.id = n.discussion_id +- GROUP BY n.discussion_id +- ) ++ note_counts AS ( ++ SELECT ++ n.discussion_id, ++ COUNT(*) AS note_count ++ FROM notes n ++ JOIN picked p ON p.id = n.discussion_id ++ WHERE n.is_system = 0 ++ GROUP BY n.discussion_id ++ ), ++ participants AS ( ++ SELECT ++ x.discussion_id, ++ GROUP_CONCAT(x.author_username, X'1F') AS participants ++ FROM ( ++ SELECT DISTINCT n.discussion_id, n.author_username ++ FROM notes n ++ JOIN picked p ON p.id = n.discussion_id ++ WHERE n.is_system = 0 AND n.author_username IS NOT NULL ++ ) x ++ GROUP BY x.discussion_id ++ ) +@@ +- LEFT JOIN note_agg na ON na.discussion_id = p.id ++ LEFT JOIN note_counts nc ON nc.discussion_id = p.id ++ LEFT JOIN participants pa ON pa.discussion_id = p.id +@@ +- COALESCE(na.note_count, 0) AS note_count, +- COALESCE(na.participants, '') AS participants ++ COALESCE(nc.note_count, 0) AS note_count, ++ COALESCE(pa.participants, '') AS participants + + +Net effect: correctness fix + more predictable perf. + +Add a test that would have failed before: + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ + #[test] + fn test_active_query() { +@@ +- insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/foo.rs", "needs work"); ++ insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/foo.rs", "needs work"); ++ insert_diffnote(&conn, 2, 1, 1, "reviewer_b", "src/foo.rs", "follow-up"); +@@ +- assert_eq!(result.discussions[0].participants, vec!["reviewer_b"]); ++ assert_eq!(result.discussions[0].participants, vec!["reviewer_b"]); ++ assert_eq!(result.discussions[0].note_count, 2); + +3) Index fix: idx_discussions_unresolved_recent won’t help global --active ordering +Why + +Your index is (project_id, last_note_at) with WHERE resolvable=1 AND resolved=0. + +When --active is not project-scoped (common default), SQLite can’t use (project_id, last_note_at) to satisfy ORDER BY last_note_at DESC efficiently because project_id isn’t constrained. + +This can turn into a scan+sort over potentially large unresolved sets. + +Change + +Keep the project-scoped index, but add a global ordering index (partial, still small): + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ + CREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent + ON discussions(project_id, last_note_at) + WHERE resolvable = 1 AND resolved = 0; ++ ++-- Active (global): unresolved discussions by recency (no project scope). ++-- Supports ORDER BY last_note_at DESC LIMIT N when project_id is unconstrained. ++CREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent_global ++ ON discussions(last_note_at) ++ WHERE resolvable = 1 AND resolved = 0; + +4) Make Overlap “touches” coherent: count MRs for reviewers, not DiffNotes +Why + +Overlap’s question is “Who else has MRs touching my files?” but: + +reviewer branch uses COUNT(*) (DiffNotes) + +author branch uses COUNT(DISTINCT m.id) (MRs) + +Those are different units; summing them into touch_count is misleading. + +Change + +Count distinct MRs on the reviewer branch too: + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ +- COUNT(*) AS touch_count, ++ COUNT(DISTINCT m.id) AS touch_count, + MAX(n.created_at) AS last_touch_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + + +Also update human output labeling: + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ +- style("Touches").bold(), ++ style("MRs").bold(), + + +(You still preserve “strength” via mr_refs and last_touch_at.) + +5) Make outputs more actionable: add a canonical ref field (group/project!iid, group/project#iid) +Why + +You already do this for Overlap (mr_refs). Doing the same for Workload and Active reduces friction for both humans and agents: + +humans can copy/paste a single token + +robots don’t need to stitch project_path + iid + prefix + +Change (Workload structs + SQL) +diff +Copy code +diff --git a/plan.md b/plan.md +@@ + pub struct WorkloadIssue { + pub iid: i64, ++ pub ref_: String, + pub title: String, + pub project_path: String, + pub updated_at: i64, + } +@@ + pub struct WorkloadMr { + pub iid: i64, ++ pub ref_: String, + pub title: String, + pub draft: bool, + pub project_path: String, +@@ +- let issues_sql = +- "SELECT i.iid, i.title, p.path_with_namespace, i.updated_at ++ let issues_sql = ++ "SELECT i.iid, ++ (p.path_with_namespace || '#' || i.iid) AS ref, ++ i.title, p.path_with_namespace, i.updated_at +@@ +- iid: row.get(0)?, +- title: row.get(1)?, +- project_path: row.get(2)?, +- updated_at: row.get(3)?, ++ iid: row.get(0)?, ++ ref_: row.get(1)?, ++ title: row.get(2)?, ++ project_path: row.get(3)?, ++ updated_at: row.get(4)?, + }) +@@ +- let authored_sql = +- "SELECT m.iid, m.title, m.draft, p.path_with_namespace, m.updated_at ++ let authored_sql = ++ "SELECT m.iid, ++ (p.path_with_namespace || '!' || m.iid) AS ref, ++ m.title, m.draft, p.path_with_namespace, m.updated_at +@@ +- iid: row.get(0)?, +- title: row.get(1)?, +- draft: row.get::<_, i32>(2)? != 0, +- project_path: row.get(3)?, ++ iid: row.get(0)?, ++ ref_: row.get(1)?, ++ title: row.get(2)?, ++ draft: row.get::<_, i32>(3)? != 0, ++ project_path: row.get(4)?, + author_username: None, +- updated_at: row.get(4)?, ++ updated_at: row.get(5)?, + }) + + +Then use ref_ in human output + robot JSON. + +6) Reviews mode: tolerate leading whitespace before **prefix** +Why + +Many people write " **suggestion**: ...". Current LIKE '**%**%' misses that. + +Change + +Use ltrim(n.body) consistently: + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ +- AND n.body LIKE '**%**%' ++ AND ltrim(n.body) LIKE '**%**%' +@@ +- SUBSTR(n.body, 3, INSTR(SUBSTR(n.body, 3), '**') - 1) AS raw_prefix, ++ SUBSTR(ltrim(n.body), 3, INSTR(SUBSTR(ltrim(n.body), 3), '**') - 1) AS raw_prefix, + +7) Add two small tests that catch the above regressions +Why + +These are exactly the kind of issues that slip through without targeted tests. + +diff +Copy code +diff --git a/plan.md b/plan.md +@@ + #[test] + fn test_escape_like() { +@@ + } ++ ++ #[test] ++ fn test_build_path_query_exact_does_not_escape() { ++ // '_' must not be escaped for '=' ++ let pq = build_path_query("README_with_underscore.md"); ++ assert_eq!(pq.value, "README_with_underscore.md"); ++ assert!(!pq.is_prefix); ++ } ++ ++ #[test] ++ fn test_path_flag_dotless_root_file_is_exact() { ++ let pq = build_path_query("Makefile"); ++ assert_eq!(pq.value, "Makefile"); ++ assert!(!pq.is_prefix); ++ } + +Summary of net effect + +Correctness fixes: exact-path escaping bug; Active.note_count bug. + +Perf fixes: global --active index; avoid broad note scans in Active. + +Usefulness upgrades: coherent overlap “touch” metric; canonical refs everywhere; reviews prefix more robust. + +If you want one extra “stretch” that still isn’t scope creep: add an unscoped warning line in human output when project_id == None (e.g., “Aggregated across projects; use -p to scope”) for Expert/Overlap/Active. That’s pure presentation, but prevents misinterpretation in multi-project DBs. \ No newline at end of file diff --git a/docs/who-command-design.feedback-6.md b/docs/who-command-design.feedback-6.md new file mode 100644 index 0000000..5876662 --- /dev/null +++ b/docs/who-command-design.feedback-6.md @@ -0,0 +1,471 @@ +Proposed revisions (Iteration 6) + +Below are the highest-leverage changes I’d make on top of your current Iteration 5 plan, with rationale and git-diff style edits to the plan text/snippets. + +1) Fix a real edge case: dotless non-root files (src/Dockerfile, infra/Makefile, etc.) +Why + +Your current build_path_query() treats dotless last segments as directories (prefix match) unless the path is root. That misclassifies legitimate dotless files inside directories and silently produces path/% (zero hits or wrong hits). + +Best minimal fix: keep your static SQL approach, but add a DB existence probe (static SQL) for path queries: + +If user didn’t force directory (/), and exact path exists in DiffNotes, treat as exact =. + +Otherwise use prefix LIKE 'dir/%'. + +This avoids new CLI flags, avoids heuristics lists, and uses your existing partial index (idx_notes_diffnote_path_created) efficiently. + +Diff +diff +Copy code +diff --git a/Plan.md b/Plan.md +@@ +-struct PathQuery { ++struct PathQuery { + /// The parameter value to bind. + value: String, + /// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`. + is_prefix: bool, + } + +-/// Build a path query from a user-supplied path. ++/// Build a path query from a user-supplied path, with a DB probe for dotless files. +@@ +-fn build_path_query(path: &str) -> PathQuery { ++fn build_path_query(conn: &Connection, path: &str) -> Result { + let trimmed = path.trim_end_matches('/'); + let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); + let is_root = !trimmed.contains('/'); +- let is_file = !path.ends_with('/') && (is_root || last_segment.contains('.')); ++ let forced_dir = path.ends_with('/'); ++ let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); ++ ++ // If it doesn't "look like a file" but the exact path exists in DiffNotes, ++ // treat as exact (handles src/Dockerfile, infra/Makefile, etc.). ++ let exact_exists = if !looks_like_file && !forced_dir { ++ conn.query_row( ++ "SELECT 1 ++ FROM notes ++ WHERE note_type = 'DiffNote' ++ AND is_system = 0 ++ AND position_new_path = ?1 ++ LIMIT 1", ++ rusqlite::params![trimmed], ++ |_| Ok(()), ++ ).is_ok() ++ } else { ++ false ++ }; ++ ++ let is_file = looks_like_file || exact_exists; + + if is_file { + PathQuery { + value: trimmed.to_string(), + is_prefix: false, + } + } else { + let escaped = escape_like(trimmed); + PathQuery { + value: format!("{escaped}/%"), + is_prefix: true, + } + } + } + + +Also update callers: + +diff +Copy code +@@ +- let pq = build_path_query(path); ++ let pq = build_path_query(conn, path)?; +@@ +- let pq = build_path_query(path); ++ let pq = build_path_query(conn, path)?; + + +And tests: + +diff +Copy code +@@ +- fn test_build_path_query() { ++ fn test_build_path_query() { +@@ +- // Dotless root file -> exact match (root path without '/') ++ // Dotless root file -> exact match (root path without '/') + let pq = build_path_query("Makefile"); + assert_eq!(pq.value, "Makefile"); + assert!(!pq.is_prefix); ++ ++ // Dotless file in subdir should become exact if DB contains it (probe) ++ // (set up: insert one DiffNote with position_new_path = "src/Dockerfile") + +2) Make “reviewer” semantics correct: exclude MR authors commenting on their own diffs +Why + +Right now, Overlap (and Expert reviewer branch) will count MR authors as “reviewers” if they leave DiffNotes in their own MR (clarifications / replies), inflating A+R and contaminating “who reviewed here” signals. + +You already enforce this in --reviews mode (m.author_username != ?1). Apply the same principle consistently: + +Reviewer branch: only count notes where n.author_username != m.author_username (when both non-NULL). + +Diff (Overlap reviewer branch) +diff +Copy code +@@ +- WHERE n.note_type = 'DiffNote' ++ WHERE n.note_type = 'DiffNote' + AND n.position_new_path LIKE ?1 ESCAPE '\\' + AND n.is_system = 0 + AND n.author_username IS NOT NULL ++ AND (m.author_username IS NULL OR n.author_username != m.author_username) + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + + +Same change for sql_exact. + +3) Expert mode scoring: align units + reduce single-MR “comment storms” +Why + +Expert currently mixes units: + +reviewer side: DiffNote count + +author side: distinct MR count + +That makes score noisy and can crown “someone who wrote 30 comments on one MR” as top expert. + +Fix: make both sides primarily MR-breadth: + +reviewer: COUNT(DISTINCT m.id) as review_mr_count + +author: COUNT(DISTINCT m.id) as author_mr_count +Optionally keep review_note_count as a secondary intensity signal (but not the main driver). + +Diff (types + SQL) +diff +Copy code +@@ + pub struct Expert { + pub username: String, +- pub score: f64, +- pub review_count: u32, +- pub author_count: u32, ++ pub score: i64, ++ pub review_mr_count: u32, ++ pub review_note_count: u32, ++ pub author_mr_count: u32, + pub last_active_ms: i64, + } + + +Reviewer branch now joins to MR so it can count distinct MRs and exclude self-comments: + +diff +Copy code +@@ +- SELECT +- n.author_username AS username, +- 'reviewer' AS role, +- COUNT(*) AS cnt, +- MAX(n.created_at) AS last_active_at +- FROM notes n ++ SELECT ++ n.author_username AS username, ++ 'reviewer' AS role, ++ COUNT(DISTINCT m.id) AS mr_cnt, ++ COUNT(*) AS note_cnt, ++ MAX(n.created_at) AS last_active_at ++ FROM notes n ++ JOIN discussions d ON n.discussion_id = d.id ++ JOIN merge_requests m ON d.merge_request_id = m.id + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND n.author_username IS NOT NULL ++ AND (m.author_username IS NULL OR n.author_username != m.author_username) + AND n.position_new_path LIKE ?1 ESCAPE '\\' + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY n.author_username + + +Update author branch payload to match shape: + +diff +Copy code +@@ + SELECT + m.author_username AS username, + 'author' AS role, +- COUNT(DISTINCT m.id) AS cnt, ++ COUNT(DISTINCT m.id) AS mr_cnt, ++ 0 AS note_cnt, + MAX(n.created_at) AS last_active_at + + +Aggregate: + +diff +Copy code +@@ + SELECT + username, +- SUM(CASE WHEN role = 'reviewer' THEN cnt ELSE 0 END) AS review_count, +- SUM(CASE WHEN role = 'author' THEN cnt ELSE 0 END) AS author_count, ++ SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) AS review_mr_count, ++ SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) AS review_note_count, ++ SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) AS author_mr_count, + MAX(last_active_at) AS last_active_at, +- (SUM(CASE WHEN role = 'reviewer' THEN cnt ELSE 0 END) * 3.0) + +- (SUM(CASE WHEN role = 'author' THEN cnt ELSE 0 END) * 2.0) AS score ++ ( ++ (SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) * 20) + ++ (SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) * 12) + ++ (SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) * 1) ++ ) AS score + + +Human header: + +diff +Copy code +@@ +- style("Reviews").bold(), +- style("Authored").bold(), ++ style("Reviewed(MRs)").bold(), ++ style("Notes").bold(), ++ style("Authored(MRs)").bold(), + +4) Deterministic output: participants + MR refs + tie-breakers +Why + +You’ve correctly focused on reproducibility (resolved_input), but you still have nondeterministic lists: + +participants: GROUP_CONCAT order is undefined → vector order changes run-to-run. + +mr_refs: you dedup via HashSet then iterate → undefined order. + +user sorting in overlap is missing stable tie-breakers. + +This is a real “robot mode flake” source. + +Diff (Active participants sort) +diff +Copy code +@@ +- let participants: Vec = participants_csv ++ let mut participants: Vec = participants_csv + .as_deref() + .filter(|s| !s.is_empty()) + .map(|csv| csv.split('\x1F').map(String::from).collect()) + .unwrap_or_default(); ++ participants.sort(); // stable, deterministic + +Diff (Overlap MR refs sort + stable user sort) +diff +Copy code +@@ +- users.sort_by(|a, b| b.touch_count.cmp(&a.touch_count)); ++ users.sort_by(|a, b| { ++ b.touch_count.cmp(&a.touch_count) ++ .then_with(|| b.last_touch_at.cmp(&a.last_touch_at)) ++ .then_with(|| a.username.cmp(&b.username)) ++ }); +@@ +- entry.mr_refs = set.into_iter().collect(); ++ let mut v: Vec = set.into_iter().collect(); ++ v.sort(); ++ entry.mr_refs = v; + +5) Make --limit actionable: surface truncation explicitly (human + robot) +Why + +Agents (and humans) need to know if results were cut off so they can rerun with a bigger -n. +Right now there’s no signal. + +Minimal pattern: query limit + 1, set truncated = true if you got > limit, then truncate. + +Diff (result types) +diff +Copy code +@@ + pub struct ExpertResult { + pub path_query: String, + pub experts: Vec, ++ pub truncated: bool, + } +@@ + pub struct ActiveResult { + pub discussions: Vec, + pub total_unresolved: u32, ++ pub truncated: bool, + } +@@ + pub struct OverlapResult { + pub path_query: String, + pub users: Vec, ++ pub truncated: bool, + } + +Diff (query pattern example) +diff +Copy code +@@ +- let limit_i64 = limit as i64; ++ let limit_plus_one = (limit + 1) as i64; +@@ +- LIMIT ?4 ++ LIMIT ?4 +@@ +- rusqlite::params![pq.value, since_ms, project_id, limit_i64], ++ rusqlite::params![pq.value, since_ms, project_id, limit_plus_one], +@@ +- Ok(ExpertResult { ++ let truncated = experts.len() > limit; ++ let experts = experts.into_iter().take(limit).collect(); ++ Ok(ExpertResult { + path_query: path.to_string(), + experts, ++ truncated, + }) + + +Human output hint: + +diff +Copy code +@@ + if r.experts.is_empty() { ... } ++ if r.truncated { ++ println!(" {}", style("(showing first -n; rerun with a higher --limit)").dim()); ++ } + + +Robot output field: + +diff +Copy code +@@ + fn expert_to_json(r: &ExpertResult) -> serde_json::Value { + serde_json::json!({ + "path_query": r.path_query, ++ "truncated": r.truncated, + "experts": ... + }) + } + +6) Overlap merge hot loop: avoid repeated HashSet rebuild per row +Why + +This line is expensive in a UNION result with many rows: + +rust +Copy code +let mut set: HashSet = entry.mr_refs.drain(..).collect(); + + +It reallocates and rehashes every time. + +Fix: store an accumulator with HashSet during merge, convert once at end. + +Diff (internal accumulator) +diff +Copy code +@@ +- let mut user_map: HashMap = HashMap::new(); ++ struct OverlapAcc { ++ username: String, ++ author_touch_count: u32, ++ review_touch_count: u32, ++ touch_count: u32, ++ last_touch_at: i64, ++ mr_refs: HashSet, ++ } ++ let mut user_map: HashMap = HashMap::new(); +@@ +- let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser { ++ let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapAcc { + username: username.clone(), + author_touch_count: 0, + review_touch_count: 0, + touch_count: 0, + last_touch_at: 0, +- mr_refs: Vec::new(), ++ mr_refs: HashSet::new(), + }); +@@ +- let mut set: HashSet = entry.mr_refs.drain(..).collect(); +- for r in mr_refs { set.insert(r); } +- entry.mr_refs = set.into_iter().collect(); ++ for r in mr_refs { entry.mr_refs.insert(r); } +@@ +- let mut users: Vec = user_map.into_values().collect(); ++ let mut users: Vec = user_map.into_values().map(|a| { ++ let mut mr_refs: Vec = a.mr_refs.into_iter().collect(); ++ mr_refs.sort(); ++ OverlapUser { ++ username: a.username, ++ author_touch_count: a.author_touch_count, ++ review_touch_count: a.review_touch_count, ++ touch_count: a.touch_count, ++ last_touch_at: a.last_touch_at, ++ mr_refs, ++ } ++ }).collect(); + +7) Tests to lock these behaviors +Add tests (high value) + +dotless subdir file uses DB probe → exact match + +self-review exclusion prevents MR author showing up as reviewer + +deterministic ordering for participants and mr_refs (sort) + +Diff (test additions outline) +diff +Copy code +@@ + #[test] ++ fn test_build_path_query_dotless_subdir_file_uses_probe() { ++ let conn = setup_test_db(); ++ insert_project(&conn, 1, "team/backend"); ++ insert_mr(&conn, 1, 1, 100, "author_a", "opened"); ++ insert_discussion(&conn, 1, 1, Some(1), None, true, false); ++ insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/Dockerfile", "note"); ++ ++ let pq = build_path_query(&conn, "src/Dockerfile").unwrap(); ++ assert_eq!(pq.value, "src/Dockerfile"); ++ assert!(!pq.is_prefix); ++ } ++ ++ #[test] ++ fn test_overlap_excludes_self_review_notes() { ++ let conn = setup_test_db(); ++ insert_project(&conn, 1, "team/backend"); ++ insert_mr(&conn, 1, 1, 100, "author_a", "opened"); ++ insert_discussion(&conn, 1, 1, Some(1), None, true, false); ++ // author_a comments on their own MR diff ++ insert_diffnote(&conn, 1, 1, 1, "author_a", "src/auth/login.rs", "clarification"); ++ ++ let result = query_overlap(&conn, "src/auth/", None, 0, 20).unwrap(); ++ let u = result.users.iter().find(|u| u.username == "author_a"); ++ // should not be credited as reviewer touch ++ assert!(u.map(|x| x.review_touch_count).unwrap_or(0) == 0); ++ } + +Net effect + +Correctness: fixes dotless subdir files + self-review pollution. + +Signal quality: Expert ranking becomes harder to game by comment volume. + +Robot reproducibility: deterministic ordering + explicit truncation. + +Performance: avoids rehash loops in overlap merges; path probe uses indexed equality. + +If you want one “single best” change: #1 (DB probe exact-match) is the most likely to prevent confusing “why is this empty?” behavior without adding any user-facing complexity. \ No newline at end of file diff --git a/docs/who-command-design.feedback-7.md b/docs/who-command-design.feedback-7.md new file mode 100644 index 0000000..de68fec --- /dev/null +++ b/docs/who-command-design.feedback-7.md @@ -0,0 +1,353 @@ +Below are the highest-leverage revisions I’d make to iteration 6 to improve correctness (multi-project edge cases), robot-mode reliability (bounded payloads + truncation), and signal quality—without changing the fundamental scope (still pure SQL over existing tables). + +1) Make build_path_query project-aware and two-way probe (exact and prefix) +Why + +Your DB probe currently answers: “does this exact file exist anywhere in DiffNotes?” That can misclassify in a project-scoped run: + +Path exists as a dotless file in Project A → probe returns true + +User runs -p Project B where the path is a directory (or different shape) → you switch to exact, return empty, and miss valid prefix hits. + +Also, you still have a minor heuristic fragility for dot directories when the user omits trailing / (e.g., .github/workflows): last segment has a dot → you treat as file unless forced dir. + +Revision + +Thread project_id into build_path_query(conn, path, project_id) + +Probe exact first (scoped), then probe prefix (scoped) + +Only fall back to heuristics if both probes fail + +This keeps “static SQL, no dynamic assembly,” and costs at most 2 indexed existence queries per invocation. + +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ +- fn build_path_query(conn: &Connection, path: &str) -> Result { ++ fn build_path_query(conn: &Connection, path: &str, project_id: Option) -> Result { + let trimmed = path.trim_end_matches('/'); + let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); + let is_root = !trimmed.contains('/'); + let forced_dir = path.ends_with('/'); +- let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); ++ // Heuristic is now only a fallback; probes decide first. ++ let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); + +- let exact_exists = if !looks_like_file && !forced_dir { +- conn.query_row( +- "SELECT 1 FROM notes +- WHERE note_type = 'DiffNote' +- AND is_system = 0 +- AND position_new_path = ?1 +- LIMIT 1", +- rusqlite::params![trimmed], +- |_| Ok(()), +- ) +- .is_ok() +- } else { +- false +- }; ++ // Probe 1: exact file exists (scoped) ++ let exact_exists = conn.query_row( ++ "SELECT 1 FROM notes ++ WHERE note_type = 'DiffNote' ++ AND is_system = 0 ++ AND position_new_path = ?1 ++ AND (?2 IS NULL OR project_id = ?2) ++ LIMIT 1", ++ rusqlite::params![trimmed, project_id], ++ |_| Ok(()), ++ ).is_ok(); ++ ++ // Probe 2: directory prefix exists (scoped) ++ let prefix_exists = if !forced_dir { ++ let escaped = escape_like(trimmed); ++ let pat = format!("{escaped}/%"); ++ conn.query_row( ++ "SELECT 1 FROM notes ++ WHERE note_type = 'DiffNote' ++ AND is_system = 0 ++ AND position_new_path LIKE ?1 ESCAPE '\\' ++ AND (?2 IS NULL OR project_id = ?2) ++ LIMIT 1", ++ rusqlite::params![pat, project_id], ++ |_| Ok(()), ++ ).is_ok() ++ } else { false }; + +- let is_file = looks_like_file || exact_exists; ++ // Forced directory always wins; otherwise: exact > prefix > heuristic ++ let is_file = if forced_dir { false } ++ else if exact_exists { true } ++ else if prefix_exists { false } ++ else { looks_like_file }; + + if is_file { + Ok(PathQuery { value: trimmed.to_string(), is_prefix: false }) + } else { + let escaped = escape_like(trimmed); + Ok(PathQuery { value: format!("{escaped}/%"), is_prefix: true }) + } + } +@@ +- let pq = build_path_query(conn, path)?; ++ let pq = build_path_query(conn, path, project_id)?; + + +Add test coverage for the multi-project misclassification case: + +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ + #[test] + fn test_build_path_query_dotless_subdir_file_uses_db_probe() { +@@ +- let pq = build_path_query(&conn, "src/Dockerfile").unwrap(); ++ let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap(); +@@ +- let pq2 = build_path_query(&conn2, "src/Dockerfile").unwrap(); ++ let pq2 = build_path_query(&conn2, "src/Dockerfile", None).unwrap(); + } ++ ++ #[test] ++ fn test_build_path_query_probe_is_project_scoped() { ++ // Path exists as a dotless file in project 1; project 2 should not ++ // treat it as an exact file unless it exists there too. ++ let conn = setup_test_db(); ++ insert_project(&conn, 1, "team/a"); ++ insert_project(&conn, 2, "team/b"); ++ insert_mr(&conn, 1, 1, 10, "author_a", "opened"); ++ insert_discussion(&conn, 1, 1, Some(1), None, true, false); ++ insert_diffnote(&conn, 1, 1, 1, "rev", "infra/Makefile", "note"); ++ ++ let pq_scoped = build_path_query(&conn, "infra/Makefile", Some(2)).unwrap(); ++ assert!(pq_scoped.is_prefix); // should fall back to prefix in project 2 ++ } + +2) Bound robot payload sizes for participants and mr_refs (with totals + truncation) +Why + +mr_refs and participants can become unbounded arrays in robot mode, which is a real operational hazard: + +huge JSON → slow, noisy diffs, brittle downstream pipelines + +potential SQLite group_concat truncation becomes invisible (and you can’t distinguish “no refs” vs “refs truncated”) + +Revision + +Introduce hard caps and explicit metadata: + +participants_total, participants_truncated + +mr_refs_total, mr_refs_truncated + +This is not scope creep—it’s defensive output hygiene. + +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ + pub struct ActiveDiscussion { +@@ + pub participants: Vec, ++ pub participants_total: u32, ++ pub participants_truncated: bool, + } +@@ + pub struct OverlapUser { +@@ + pub mr_refs: Vec, ++ pub mr_refs_total: u32, ++ pub mr_refs_truncated: bool, + } + + +Implementation sketch (Rust-side, deterministic): + +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ + fn query_active(...) -> Result { ++ const MAX_PARTICIPANTS: usize = 50; +@@ +- participants.sort(); ++ participants.sort(); ++ let participants_total = participants.len() as u32; ++ let participants_truncated = participants.len() > MAX_PARTICIPANTS; ++ if participants_truncated { ++ participants.truncate(MAX_PARTICIPANTS); ++ } +@@ + Ok(ActiveDiscussion { +@@ + participants, ++ participants_total, ++ participants_truncated, + }) +@@ + fn query_overlap(...) -> Result { ++ const MAX_MR_REFS_PER_USER: usize = 50; +@@ + .map(|a| { + let mut mr_refs: Vec = a.mr_refs.into_iter().collect(); + mr_refs.sort(); ++ let mr_refs_total = mr_refs.len() as u32; ++ let mr_refs_truncated = mr_refs.len() > MAX_MR_REFS_PER_USER; ++ if mr_refs_truncated { ++ mr_refs.truncate(MAX_MR_REFS_PER_USER); ++ } + OverlapUser { +@@ + mr_refs, ++ mr_refs_total, ++ mr_refs_truncated, + } + }) + + +Update robot JSON accordingly: + +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ + fn active_to_json(r: &ActiveResult) -> serde_json::Value { +@@ + "participants": d.participants, ++ "participants_total": d.participants_total, ++ "participants_truncated": d.participants_truncated, + })) +@@ + fn overlap_to_json(r: &OverlapResult) -> serde_json::Value { +@@ + "mr_refs": u.mr_refs, ++ "mr_refs_total": u.mr_refs_total, ++ "mr_refs_truncated": u.mr_refs_truncated, + })) + + +Also update robot-docs manifest schema snippet for who.active.discussions[] and who.overlap.users[]. + +3) Add truncation metadata to Workload sections (same LIMIT+1 pattern) +Why + +Workload is the mode most likely to be consumed by agents, and right now it has silent truncation (each section is LIMIT N with no signal). Your plan already treats truncation as a first-class contract elsewhere; Workload should match. + +Revision + +For each workload query: + +request LIMIT + 1 + +set *_truncated booleans + +trim to requested limit + +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ + pub struct WorkloadResult { + pub username: String, + pub assigned_issues: Vec, + pub authored_mrs: Vec, + pub reviewing_mrs: Vec, + pub unresolved_discussions: Vec, ++ pub assigned_issues_truncated: bool, ++ pub authored_mrs_truncated: bool, ++ pub reviewing_mrs_truncated: bool, ++ pub unresolved_discussions_truncated: bool, + } + + +And in JSON include the booleans (plus you already have summary.counts). + +This is mechanically repetitive but extremely valuable for automation. + +4) Rename “Last Active” → “Last Seen” for Expert/Overlap +Why + +For “author” rows, the timestamp is derived from review activity on their MR (via MAX(n.created_at)), not necessarily that person’s direct action. Calling that “active” is semantically misleading. “Last seen” is accurate across both reviewer+author branches. + +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ + pub struct Expert { +@@ +- pub last_active_ms: i64, ++ pub last_seen_ms: i64, + } +@@ + pub struct OverlapUser { +@@ +- pub last_touch_at: i64, ++ pub last_seen_at: i64, +@@ + fn print_expert_human(...) { +@@ +- style("Last Active").bold(), ++ style("Last Seen").bold(), +@@ +- style(format_relative_time(expert.last_active_ms)).dim(), ++ style(format_relative_time(expert.last_seen_ms)).dim(), + + +(Keep internal SQL aliases consistent: last_seen_at everywhere.) + +5) Make MR state filtering consistent in Expert/Overlap reviewer branches +Why + +You already restrict Overlap author branch to opened|merged, but reviewer branches can include closed/unmerged noise. Consistency improves signal quality and can reduce scan churn. + +Low-risk revision: apply the same state filter to reviewer branches (Expert + Overlap). You can keep “closed” excluded by default without adding new flags. + +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 +@@ +- AND n.created_at >= ?2 ++ AND m.state IN ('opened','merged') ++ AND n.created_at >= ?2 + + +This is a semantic choice; if you later want archaeology across closed/unmerged, that belongs in a separate mode/flag, but I would not add it now. + +6) Add a design principle for bounded outputs (aligns with robot-first reproducibility) +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ + 10. **Truncation transparency.** Result types carry a `truncated: bool` flag... ++11. **Bounded payloads.** Robot JSON must never emit unbounded arrays (participants, refs). ++ Large list fields are capped with `*_total` + `*_truncated` so agents can page/retry. + +Consolidated plan metadata bump (Iteration 7) +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +@@ +-iteration: 6 ++iteration: 7 + updated: 2026-02-07 + +Net effect (what you get) + +Correct path classification under -p scoping (no cross-project probe leakage) + +Deterministic + bounded robot payloads (no giant JSON surprises) + +Uniform truncation contract across all modes (Workload no longer silently truncates) + +Clearer semantics (“Last Seen” avoids misinterpretation) + +Cleaner signals (reviewer branches ignore closed/unmerged by default) + +If you want, I can also produce a second diff that updates the robot-docs schema block and the Verification EXPLAIN expectations to reflect the new probe queries and the state filter. \ No newline at end of file diff --git a/docs/who-command-design.feedback-8.md b/docs/who-command-design.feedback-8.md new file mode 100644 index 0000000..ca7eacd --- /dev/null +++ b/docs/who-command-design.feedback-8.md @@ -0,0 +1,552 @@ +Below are the highest-leverage revisions I’d make for iteration 8, staying within your MVP constraints (static SQL, no scope creep into new data sources), but tightening correctness, index utilization predictability, debuggability, and output safety. + +1) Fix the semantic bug in since_was_default (Workload mode) by introducing since_mode +Why this is better + +Right now since_was_default = args.since.is_none() is misleading for Workload, because Workload has no default window (it’s “unbounded unless explicitly filtered”). In robot mode, this creates incorrect intent replay and ambiguity. + +Replace the boolean with a tri-state: + +since_mode: "default" | "explicit" | "none" + +Keep since_was_default only if you want backward compatibility, but compute it as since_mode == "default". + +Patch +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +--- a/who-command-design.md ++++ b/who-command-design.md +@@ +-5. **Robot-first reproducibility.** Robot JSON output includes both a raw `input` object (echoing CLI args) and a `resolved_input` object (computed `since_ms`, `since_iso`, `since_was_default`, resolved `project_id` + `project_path`, effective `mode`, `limit`) so agents can trace exactly what ran and reproduce it precisely. ++5. **Robot-first reproducibility.** Robot JSON output includes both a raw `input` object (echoing CLI args) and a `resolved_input` object (computed `since_ms`, `since_iso`, `since_mode`, resolved `project_id` + `project_path`, effective `mode`, `limit`) so agents can trace exactly what ran and reproduce it precisely. +@@ + pub struct WhoResolvedInput { + pub mode: String, + pub project_id: Option, + pub project_path: Option, + pub since_ms: Option, + pub since_iso: Option, +- pub since_was_default: bool, ++ /// "default" (mode default applied), "explicit" (user provided --since), "none" (no window) ++ pub since_mode: String, + pub limit: usize, + } +@@ +- let since_was_default = args.since.is_none(); ++ // since_mode semantics: ++ // - expert/reviews/active/overlap: default window applies if args.since is None ++ // - workload: no default window; args.since None => "none" ++ let since_mode_for_defaulted = if args.since.is_some() { "explicit" } else { "default" }; ++ let since_mode_for_workload = if args.since.is_some() { "explicit" } else { "none" }; +@@ + WhoMode::Expert { path } => { + let since_ms = resolve_since(args.since.as_deref(), "6m")?; + let result = query_expert(&conn, path, project_id, since_ms, args.limit)?; + Ok(WhoRun { + resolved_input: WhoResolvedInput { + mode: "expert".to_string(), + project_id, + project_path, + since_ms: Some(since_ms), + since_iso: Some(ms_to_iso(since_ms)), +- since_was_default, ++ since_mode: since_mode_for_defaulted.to_string(), + limit: args.limit, + }, + result: WhoResult::Expert(result), + }) + } +@@ + WhoMode::Workload { username } => { + let since_ms = args + .since + .as_deref() + .map(|s| resolve_since_required(s)) + .transpose()?; + let result = query_workload(&conn, username, project_id, since_ms, args.limit)?; + Ok(WhoRun { + resolved_input: WhoResolvedInput { + mode: "workload".to_string(), + project_id, + project_path, + since_ms, + since_iso: since_ms.map(ms_to_iso), +- since_was_default, ++ since_mode: since_mode_for_workload.to_string(), + limit: args.limit, + }, + result: WhoResult::Workload(result), + }) + } +@@ + fn print_who_json(run: &WhoRun, args: &WhoArgs, elapsed_ms: u64) { +@@ + let resolved_input = serde_json::json!({ + "mode": run.resolved_input.mode, + "project_id": run.resolved_input.project_id, + "project_path": run.resolved_input.project_path, + "since_ms": run.resolved_input.since_ms, + "since_iso": run.resolved_input.since_iso, +- "since_was_default": run.resolved_input.since_was_default, ++ "since_mode": run.resolved_input.since_mode, + "limit": run.resolved_input.limit, + }); + } + +2) Stop using nullable-OR ((? IS NULL OR col = ?)) where it determines the “right” index (Active is the big one) +Why this is better + +Your global vs project-scoped Active indexes are correct, but the nullable binding pattern undermines them because SQLite’s planner can’t assume whether ?2 is NULL at prepare time. Result: it can pick a “good enough for both” plan, which is often the wrong one for -p. + +Fix: keep SQL static, but use two static statements selected at runtime (like you already do for exact vs prefix path matching). + +Patch +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +--- a/who-command-design.md ++++ b/who-command-design.md +@@ +-1. **Lean on existing infrastructure.** Use `(?N IS NULL OR ...)` nullable binding pattern (already used in `timeline_seed.rs`) instead of dynamic SQL string assembly. ++1. **Lean on existing infrastructure.** Prefer `(?N IS NULL OR ...)` nullable binding for optional filters **unless** it materially changes index choice. In those cases, select between **two static SQL strings** at runtime (no `format!()`), e.g. Active mode uses separate global vs project-scoped statements to ensure the intended index is used. +@@ + fn query_active( + conn: &Connection, + project_id: Option, + since_ms: i64, + limit: usize, + ) -> Result { + let limit_plus_one = (limit + 1) as i64; + +- // Total unresolved count +- let total_sql = +- "SELECT COUNT(*) FROM discussions d +- WHERE d.resolvable = 1 AND d.resolved = 0 +- AND d.last_note_at >= ?1 +- AND (?2 IS NULL OR d.project_id = ?2)"; ++ // Total unresolved count (two static variants to avoid nullable-OR planner ambiguity) ++ let total_sql_global = ++ "SELECT COUNT(*) FROM discussions d ++ WHERE d.resolvable = 1 AND d.resolved = 0 ++ AND d.last_note_at >= ?1"; ++ let total_sql_scoped = ++ "SELECT COUNT(*) FROM discussions d ++ WHERE d.resolvable = 1 AND d.resolved = 0 ++ AND d.last_note_at >= ?1 ++ AND d.project_id = ?2"; + +- let total_unresolved: u32 = +- conn.query_row(total_sql, rusqlite::params![since_ms, project_id], |row| row.get(0))?; ++ let total_unresolved: u32 = match project_id { ++ None => conn.query_row(total_sql_global, rusqlite::params![since_ms], |row| row.get(0))?, ++ Some(pid) => conn.query_row(total_sql_scoped, rusqlite::params![since_ms, pid], |row| row.get(0))?, ++ }; + +- let sql = " ++ let sql_global = " + WITH picked AS ( + SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, + d.project_id, d.last_note_at + FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.last_note_at >= ?1 +- AND (?2 IS NULL OR d.project_id = ?2) + ORDER BY d.last_note_at DESC + LIMIT ?2 + ), +@@ + ORDER BY p.last_note_at DESC + "; + +- let mut stmt = conn.prepare_cached(sql)?; +- let discussions: Vec = stmt +- .query_map(rusqlite::params![since_ms, project_id, limit_plus_one], |row| { ++ let sql_scoped = " ++ WITH picked AS ( ++ SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, ++ d.project_id, d.last_note_at ++ FROM discussions d ++ WHERE d.resolvable = 1 AND d.resolved = 0 ++ AND d.last_note_at >= ?1 ++ AND d.project_id = ?2 ++ ORDER BY d.last_note_at DESC ++ LIMIT ?3 ++ ), ++ note_counts AS ( ++ SELECT n.discussion_id, COUNT(*) AS note_count ++ FROM notes n ++ JOIN picked p ON p.id = n.discussion_id ++ WHERE n.is_system = 0 ++ GROUP BY n.discussion_id ++ ), ++ participants AS ( ++ SELECT x.discussion_id, GROUP_CONCAT(x.author_username, X'1F') AS participants ++ FROM ( ++ SELECT DISTINCT n.discussion_id, n.author_username ++ FROM notes n ++ JOIN picked p ON p.id = n.discussion_id ++ WHERE n.is_system = 0 AND n.author_username IS NOT NULL ++ ) x ++ GROUP BY x.discussion_id ++ ) ++ SELECT ++ p.id AS discussion_id, ++ p.noteable_type, ++ COALESCE(i.iid, m.iid) AS entity_iid, ++ COALESCE(i.title, m.title) AS entity_title, ++ proj.path_with_namespace, ++ p.last_note_at, ++ COALESCE(nc.note_count, 0) AS note_count, ++ COALESCE(pa.participants, '') AS participants ++ FROM picked p ++ JOIN projects proj ON p.project_id = proj.id ++ LEFT JOIN issues i ON p.issue_id = i.id ++ LEFT JOIN merge_requests m ON p.merge_request_id = m.id ++ LEFT JOIN note_counts nc ON nc.discussion_id = p.id ++ LEFT JOIN participants pa ON pa.discussion_id = p.id ++ ORDER BY p.last_note_at DESC ++ "; ++ ++ let discussions: Vec = match project_id { ++ None => { ++ let mut stmt = conn.prepare_cached(sql_global)?; ++ stmt.query_map(rusqlite::params![since_ms, limit_plus_one], |row| { ++ /* unchanged row mapping */ ++ })?.collect::, _>>()? ++ } ++ Some(pid) => { ++ let mut stmt = conn.prepare_cached(sql_scoped)?; ++ stmt.query_map(rusqlite::params![since_ms, pid, limit_plus_one], |row| { ++ /* unchanged row mapping */ ++ })?.collect::, _>>()? ++ } ++ }; + + +Also update Verification to explicitly check both variants: + +diff +Copy code +@@ + # Performance verification (required before merge): +@@ + sqlite3 path/to/db.sqlite " + EXPLAIN QUERY PLAN + SELECT d.id, d.last_note_at + FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.last_note_at >= 0 + ORDER BY d.last_note_at DESC + LIMIT 20; + " + # Expected: SEARCH discussions USING INDEX idx_discussions_unresolved_recent_global ++ ++sqlite3 path/to/db.sqlite " ++ EXPLAIN QUERY PLAN ++ SELECT d.id, d.last_note_at ++ FROM discussions d ++ WHERE d.resolvable = 1 AND d.resolved = 0 ++ AND d.project_id = 1 ++ AND d.last_note_at >= 0 ++ ORDER BY d.last_note_at DESC ++ LIMIT 20; ++" ++# Expected: SEARCH discussions USING INDEX idx_discussions_unresolved_recent + +3) Add repo-path normalization (eliminate trivial “no results” footguns) +Why this is better + +People paste: + +./src/foo/ + +/src/foo/ + +src\foo\bar.rs (Windows) +These currently lead to silent misses. + +Normalize only user input (not DB content): + +trim whitespace + +strip leading ./ and / + +convert \ → / when present + +collapse repeated // + +Patch +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +--- a/who-command-design.md ++++ b/who-command-design.md +@@ + fn resolve_mode<'a>(args: &'a WhoArgs) -> Result> { +@@ +- if let Some(p) = &args.path { +- return Ok(WhoMode::Expert { path: p }); ++ if let Some(p) = &args.path { ++ let norm = normalize_repo_path(p); ++ return Ok(WhoMode::Expert { path: Box::leak(norm.into_boxed_str()) }); + } +@@ +- if let Some(path) = &args.overlap { +- return Ok(WhoMode::Overlap { path }); ++ if let Some(path) = &args.overlap { ++ let norm = normalize_repo_path(path); ++ return Ok(WhoMode::Overlap { path: Box::leak(norm.into_boxed_str()) }); + } +@@ +- if target.contains('/') { +- return Ok(WhoMode::Expert { path: target }); ++ if target.contains('/') { ++ let norm = normalize_repo_path(target); ++ return Ok(WhoMode::Expert { path: Box::leak(norm.into_boxed_str()) }); + } +@@ + } ++ ++/// Normalize user-supplied repo paths to match stored DiffNote paths. ++/// - trims whitespace ++/// - strips leading "./" and "/" (repo-relative) ++/// - converts '\' to '/' (Windows paste) ++/// - collapses repeated slashes ++fn normalize_repo_path(input: &str) -> String { ++ let mut s = input.trim().to_string(); ++ if s.contains('\\') && !s.contains('/') { ++ s = s.replace('\\', "/"); ++ } ++ while s.starts_with("./") { ++ s = s.trim_start_matches("./").to_string(); ++ } ++ while s.starts_with('/') { ++ s = s.trim_start_matches('/').to_string(); ++ } ++ while s.contains("//") { ++ s = s.replace("//", "/"); ++ } ++ s ++} + + +(Add a small test block for normalization; even 2–3 asserts catch regressions.) + +4) Make path matching observable: include path_match (exact vs prefix) in results/JSON +Why this is better + +You’ve made path classification smarter (heuristics + two-way probe). That’s great, but without visibility you’ll get “why did it treat this as a directory?” confusion. Exposing match metadata is low cost and hugely helps debugging. + +Patch +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +--- a/who-command-design.md ++++ b/who-command-design.md +@@ +-struct PathQuery { +- /// The parameter value to bind. +- value: String, +- /// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`. +- is_prefix: bool, +-} ++struct PathQuery { ++ /// User input after normalization (no trailing slash stripping yet). ++ input: String, ++ /// Trimmed path without trailing '/' used for exact/prefix construction. ++ normalized: String, ++ /// The SQL parameter bound to the statement (`foo/bar` or `foo/bar/%`). ++ sql_value: String, ++ /// If true: use `LIKE sql_value ESCAPE '\'`. If false: use `= normalized`. ++ is_prefix: bool, ++} +@@ +- let trimmed = path.trim_end_matches('/'); ++ let input = normalize_repo_path(path); ++ let trimmed = input.trim_end_matches('/').to_string(); +@@ +- Ok(PathQuery { +- value: trimmed.to_string(), +- is_prefix: false, +- }) ++ Ok(PathQuery { input, normalized: trimmed.clone(), sql_value: trimmed, is_prefix: false }) + } else { +- Ok(PathQuery { +- value: format!("{escaped}/%"), +- is_prefix: true, +- }) ++ Ok(PathQuery { input, normalized: trimmed.clone(), sql_value: format!("{escaped}/%"), is_prefix: true }) + } +@@ + pub struct ExpertResult { + pub path_query: String, ++ pub path_match: String, // "exact" or "prefix" + pub experts: Vec, + pub truncated: bool, + } +@@ + pub struct OverlapResult { + pub path_query: String, ++ pub path_match: String, // "exact" or "prefix" + pub users: Vec, + pub truncated: bool, + } +@@ + fn query_expert(...) -> Result { + let pq = build_path_query(conn, path, project_id)?; +@@ + Ok(ExpertResult { + path_query: path.to_string(), ++ path_match: if pq.is_prefix { "prefix".to_string() } else { "exact".to_string() }, + experts, + truncated, + }) + } +@@ + fn query_overlap(...) -> Result { + let pq = build_path_query(conn, path, project_id)?; +@@ + Ok(OverlapResult { + path_query: path.to_string(), ++ path_match: if pq.is_prefix { "prefix".to_string() } else { "exact".to_string() }, + users, + truncated, + }) + } +@@ + fn expert_to_json(r: &ExpertResult) -> serde_json::Value { + serde_json::json!({ + "path_query": r.path_query, ++ "path_match": r.path_match, + "truncated": r.truncated, + "experts": ... + }) + } +@@ + fn overlap_to_json(r: &OverlapResult) -> serde_json::Value { + serde_json::json!({ + "path_query": r.path_query, ++ "path_match": r.path_match, + "truncated": r.truncated, + "users": ... + }) + } + + +Human output can add a single dim hint line: + +(matching exact file) or (matching directory prefix) + +5) Put a hard upper bound on --limit at the CLI boundary +Why this is better + +You already bounded nested arrays (participants, mr_refs), but top-level lists are still user-unbounded. A single --limit 50000 can: + +generate huge JSON payloads + +blow up downstream agent pipelines + +create slow queries / memory spikes + +Clamp it before execution. A max of 500 is usually plenty; even 200 is fine. + +Patch +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +--- a/who-command-design.md ++++ b/who-command-design.md +@@ + pub struct WhoArgs { +@@ +- /// Maximum results per section +- #[arg(short = 'n', long = "limit", default_value = "20", help_heading = "Output")] ++ /// Maximum results per section (bounded for output safety) ++ #[arg( ++ short = 'n', ++ long = "limit", ++ default_value = "20", ++ value_parser = clap::value_parser!(u16).range(1..=500), ++ help_heading = "Output" ++ )] + pub limit: usize, + } +@@ +-11. **Bounded payloads.** Robot JSON must never emit unbounded arrays ... ++11. **Bounded payloads.** Robot JSON must never emit unbounded arrays ... ++ Top-level result set size is also bounded via `--limit` (1..=500) to prevent runaway payloads. + +6) Clarify Active “unresolved count” semantics (window vs total) +Why this is better + +total_unresolved currently means “unresolved within the time window”. The human header prints “Active Discussions (X unresolved)” which can easily be misread as “total unresolved overall”. + +Small rename avoids confusion, no new behavior. + +Patch +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +--- a/who-command-design.md ++++ b/who-command-design.md +@@ + pub struct ActiveResult { + pub discussions: Vec, +- pub total_unresolved: u32, ++ pub total_unresolved_in_window: u32, + pub truncated: bool, + } +@@ +- println!( +- "{}", +- style(format!( +- "Active Discussions ({} unresolved)", +- r.total_unresolved +- )) +- .bold() +- ); ++ println!("{}", style(format!( ++ "Active Discussions ({} unresolved in window)", ++ r.total_unresolved_in_window ++ )).bold()); + + +(If you later want global total, add a second count query—but I’d keep MVP lean.) + +7) Tighten statement cache behavior: avoid preparing both SQL variants when not needed +Why this is better + +You already use prepare_cached(), but as you add more “two static variants” (exact/prefix; scoped/unscoped), it’s easy to accidentally prepare multiple statements per invocation. + +Codify: select variant first, then prepare exactly one. + +This is mostly a plan hygiene change (helps future you keep perf predictable). + +Patch (plan-level emphasis) +diff +Copy code +diff --git a/who-command-design.md b/who-command-design.md +--- a/who-command-design.md ++++ b/who-command-design.md +@@ +-1. **Lean on existing infrastructure.** ... ++1. **Lean on existing infrastructure.** ... ++ When multiple static SQL variants exist (exact/prefix; scoped/unscoped), always: ++ (a) resolve which variant applies, then (b) `prepare_cached()` exactly one statement. + +Net effect (what you gain) + +Correct robot semantics (since_mode) without breaking your static-SQL/agent-first contract. + +Guaranteed intended index usage for Active global vs scoped queries (the nullable-OR planner pitfall is real). + +Fewer “why no results?” surprises via path normalization. + +Better debugging (path match introspection) with essentially no runtime cost. + +Output safety even when users/agents misconfigure --limit. + +Less ambiguous UX around “unresolved” counts. + +If you want a single “most important” change to ship before iteration 8 locks: #2 (Active query variants) and #1 (since semantics) are the two that prevent the most painful, hard-to-diagnose failures. \ No newline at end of file diff --git a/docs/who-command-design.md b/docs/who-command-design.md new file mode 100644 index 0000000..779addf --- /dev/null +++ b/docs/who-command-design.md @@ -0,0 +1,3251 @@ +--- +plan: true +title: "who-command-design" +status: iterating +iteration: 8 +target_iterations: 8 +beads_revision: 0 +related_plans: [] +created: 2026-02-07 +updated: 2026-02-07 +--- + +# Plan: `lore who` — People Intelligence Commands + +## Context + +The current beads roadmap focuses on Gate 4/5 (file-history, code-trace) — archaeology queries requiring `mr_file_changes` data that doesn't exist yet. Meanwhile, the DB has rich people/activity data (280K notes, 210K discussions, 33K DiffNotes with file positions, 53 active participants) that can answer collaboration questions immediately with zero new tables or API calls. + +This plan builds `lore who` — a pure SQL query layer over existing data answering five questions: +1. "Who should I talk to about this feature/file?" +2. "What is person X working on?" +3. "What review patterns does person X have?" +4. "What discussions are actively in progress?" +5. "Who else has MRs touching my files?" + +## Design Principles + +1. **Lean on existing infrastructure.** Prefer `(?N IS NULL OR ...)` nullable binding pattern (already used in `timeline_seed.rs`) instead of dynamic SQL string assembly **unless** it materially changes index choice. In those cases, select between **two static SQL strings** at runtime (no `format!()`), e.g., Active mode uses separate global vs project-scoped statements to ensure the intended index is used. **All SQL is fully static** — no `format!()` for query text, including `LIMIT` (bound as `?N`). Use `prepare_cached()` everywhere to capitalize on static SQL (cheap perf win, no scope creep). When multiple static SQL variants exist (exact/prefix; scoped/unscoped), always: (a) resolve which variant applies, then (b) `prepare_cached()` exactly one statement per invocation — avoid preparing both variants. +2. **Precision over breadth.** Path-based queries filter to `note_type = 'DiffNote'` only — the sole note type with reliable `position_new_path` data. System notes (`is_system = 1`) are excluded from **all** DiffNote-based branches consistently (reviewer AND author). +3. **Performance-aware.** A new migration adds composite indexes for the hot query paths. With 280K notes, unindexed LIKE on `position_new_path` plus timestamp filters would be unusable. Index column ordering is designed for query selectivity, not just predicate coverage. Performance is verified post-implementation with `EXPLAIN QUERY PLAN`. +4. **Fail-fast at the CLI layer.** Clap `conflicts_with` / `requires` attributes catch invalid flag combinations before reaching application code. +5. **Robot-first reproducibility.** Robot JSON output includes both a raw `input` object (echoing CLI args) and a `resolved_input` object (computed `since_ms`, `since_iso`, `since_mode`, resolved `project_id` + `project_path`, effective `mode`, `limit`) so agents can trace exactly what ran and reproduce it precisely. Fuzzy project resolution is non-deterministic over time; resolved values are the stable anchor. Entity IDs (`discussion_id`) are included in output so agents can dereference entities. `since_mode` is a tri-state (`"default"` / `"explicit"` / `"none"`) that distinguishes "mode default applied" from "user provided --since" from "no window" (Workload), fixing the semantic bug where `since_was_default = true` was ambiguous for Workload mode (which has no default window). +6. **Multi-project correctness.** All entity references are project-qualified in output (human: show `project_path` for workload/overlap; robot: `mr_refs` as `group/project!123` instead of bare IIDs). Project scoping predicates target the same table as the covering index (`n.project_id` for DiffNote queries, not `m.project_id`). +7. **Exact vs prefix path matching.** For exact file paths (no wildcard needed), use `=` instead of `LIKE` for better query plan signaling. Two static SQL strings are selected at call time via a `PathQuery` struct — no dynamic SQL. +8. **Self-review exclusion.** MR authors commenting on their own diffs (clarifications, replies) must not be counted as reviewers. All reviewer-role branches (Expert, Overlap) filter `n.author_username != m.author_username` to prevent inflating reviewer counts with self-comments. Reviews mode already does this (`m.author_username != ?1`). +9. **Deterministic output.** All list-valued fields in robot JSON must be deterministically ordered. `GROUP_CONCAT` results are sorted after parsing. `HashSet`-derived vectors are sorted before output. All sort comparisons include stable tie-breakers (username ASC as final key). This prevents robot-mode flakes across runs. +10. **Truncation transparency.** Result types carry a `truncated: bool` flag. Queries request `LIMIT + 1` rows, set `truncated = true` if overflow detected, then trim to the requested limit. Both human and robot output surface this so agents know to retry with a higher `--limit`. +11. **Bounded payloads.** Robot JSON must never emit unbounded arrays (participants, MR refs). Large list fields are capped with `*_total` + `*_truncated` metadata so agents can detect silent truncation and page/retry. Top-level result set size is also bounded via `--limit` (1..=500, enforced at the CLI boundary via `value_parser`) to prevent runaway payloads from misconfigured agents. This is not scope creep — it's defensive output hygiene aligned with robot-first reproducibility. + +## Files to Create/Modify + +| File | Action | Description | +|---|---|---| +| `src/cli/commands/who.rs` | CREATE | All 5 query modes + human/robot output | +| `src/cli/commands/mod.rs` | MODIFY | Add `pub mod who` + re-exports | +| `src/cli/mod.rs` | MODIFY | Add `WhoArgs` struct + `Commands::Who` variant | +| `src/main.rs` | MODIFY | Add dispatch arm + `handle_who` fn + VALID_COMMANDS + robot-docs | +| `src/core/db.rs` | MODIFY | Add migration 017: composite indexes for who query paths | + +## Reusable Utilities (DO NOT reimplement) + +| Utility | Location | Usage | +|---|---|---| +| `Config::load(override)` | `src/core/config.rs` | Load config from file | +| `get_db_path(override)` | `src/core/paths.rs` | Resolve DB file path | +| `create_connection(&path)` | `src/core/db.rs` | Open SQLite connection | +| `resolve_project(&conn, &str)` | `src/core/project.rs` | Fuzzy project name resolution (returns local `id: i64`) | +| `parse_since(&str) -> Option` | `src/core/time.rs` | Parse "7d", "2w", "6m", "2024-01-15" to ms epoch | +| `ms_to_iso(i64) -> String` | `src/core/time.rs` | ms epoch to ISO 8601 string | +| `now_ms() -> i64` | `src/core/time.rs` | Current time as ms epoch | +| `RobotMeta { elapsed_ms }` | `src/cli/robot.rs` | Standard meta field for JSON output | +| `LoreError`, `Result` | `src/core/error.rs` | Error types and Result alias | + +**Note:** `format_relative_time()` in `src/cli/commands/list.rs:595` is private. Duplicate it in `who.rs` rather than refactoring list.rs — keep the blast radius small. + +**Note:** `lookup_project_path()` does not currently exist. Add a small helper in `who.rs` that does a single `SELECT path_with_namespace FROM projects WHERE id = ?1` — it's only called once per invocation and doesn't warrant a shared utility. + +--- + +## Step 0: Migration 017 — Composite Indexes for Who Query Paths + +### Why this step exists + +With 280K notes, the path/timestamp queries will degrade without composite indexes. Existing indexes cover `note_type` and `position_new_path` separately (migration 006) but not as composites aligned to the `who` query patterns. This is a non-breaking, additive-only migration. + +### `src/core/db.rs` — Add migration to MIGRATIONS array + +Add as entry 17 (index 16, since the array is 0-indexed and currently has 16 entries): + +```sql +-- Migration 017: Composite indexes for `who` query paths + +-- Expert/Overlap: DiffNote path prefix + timestamp filter. +-- Column ordering rationale: The partial index predicate already filters +-- note_type = 'DiffNote', so note_type is NOT a leading column. +-- position_new_path is the most selective prefix filter (LIKE 'path/%'), +-- followed by created_at for range scans, and project_id for optional scoping. +CREATE INDEX IF NOT EXISTS idx_notes_diffnote_path_created + ON notes(position_new_path, created_at, project_id) + WHERE note_type = 'DiffNote' AND is_system = 0; + +-- Active/Workload: discussion participation lookups. +-- Covers the EXISTS subquery pattern: WHERE n.discussion_id = d.id +-- AND n.author_username = ?1 AND n.is_system = 0. +-- Also covers the "participants" subquery in Active mode. +CREATE INDEX IF NOT EXISTS idx_notes_discussion_author + ON notes(discussion_id, author_username) + WHERE is_system = 0; + +-- Active (project-scoped): unresolved discussions by recency, scoped by project. +-- Column ordering: project_id first for optional scoping, then last_note_at +-- for ORDER BY DESC. Partial index keeps it tiny (only unresolved resolvable). +CREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent + ON discussions(project_id, last_note_at) + WHERE resolvable = 1 AND resolved = 0; + +-- Active (global): unresolved discussions by recency (no project scope). +-- Supports ORDER BY last_note_at DESC LIMIT N when project_id is unconstrained. +-- Without this, the (project_id, last_note_at) index can't satisfy the ORDER BY +-- efficiently because project_id isn't constrained, forcing a sort+scan. +CREATE INDEX IF NOT EXISTS idx_discussions_unresolved_recent_global + ON discussions(last_note_at) + WHERE resolvable = 1 AND resolved = 0; + +-- Workload: issue assignees by username. +-- Existing indexes are on (issue_id) only, not (username). +CREATE INDEX IF NOT EXISTS idx_issue_assignees_username + ON issue_assignees(username, issue_id); +``` + +After adding, `LATEST_SCHEMA_VERSION` automatically becomes 17 via `MIGRATIONS.len() as i32`. + +**Rationale for each index:** +- `idx_notes_diffnote_path_created`: Covers Expert and Overlap mode's core query pattern — filter DiffNotes by path prefix + creation timestamp. Partial index (`WHERE note_type = 'DiffNote' AND is_system = 0`) keeps index small (~33K of 280K notes) and also covers the `is_system = 0` predicate that applies to all branches. Leading with `position_new_path` (not `note_type`) because the partial index predicate already handles the constant filter — the leading column should be the most selective variable predicate. +- `idx_notes_discussion_author`: Covers the repeated `EXISTS (SELECT 1 FROM notes n WHERE n.discussion_id = d.id AND n.author_username = ?1 AND n.is_system = 0)` pattern in Workload mode, and the participants subquery in Active mode. Without this, every discussion participation check does a full scan of all non-system notes for that discussion. +- `idx_discussions_unresolved_recent`: Covers Active mode with project scoping — find unresolved resolvable discussions ordered by recency within a specific project. Partial index keeps it tiny. +- `idx_discussions_unresolved_recent_global`: Covers Active mode *without* project scoping — when `project_id` is unconstrained, `(project_id, last_note_at)` can't satisfy `ORDER BY last_note_at DESC` efficiently. This single-column partial index handles the common default case where users run `lore who --active` without `-p`. +- `idx_issue_assignees_username`: Covers Workload mode — look up issues assigned to a specific user. Existing indexes are on `(issue_id)` only, not `(username)`. + +**Not added (already adequate):** +- `merge_requests(author_username)` — already indexed by migration 006 as `idx_mrs_author` +- `mr_reviewers(username)` — already indexed by migration 006 as `idx_mr_reviewers_username` +- `notes(discussion_id)` — already indexed by migration 002 as `idx_notes_discussion` + +--- + +## Step 1: CLI Args + Commands Enum + +### `src/cli/mod.rs` — Add `WhoArgs` struct + +Insert after the `TimelineArgs` struct (which ends around line 195, before the hidden `List` command): + +```rust +/// People intelligence: experts, workload, active discussions, overlap, review patterns +Who(WhoArgs), +``` + +Add the `WhoArgs` struct after `TimelineArgs`: + +```rust +#[derive(Parser)] +#[command(after_help = "\x1b[1mExamples:\x1b[0m + lore who src/features/auth/ # Who knows about this area? + lore who @asmith # What is asmith working on? + lore who @asmith --reviews # What review patterns does asmith have? + lore who --active # What discussions need attention? + lore who --overlap src/features/auth/ # Who else is touching these files? + lore who --path README.md # Expert lookup for a root file + lore who --path Makefile # Expert lookup for a dotless root file")] +pub struct WhoArgs { + /// Username or file path (path if contains /) + pub target: Option, + + /// Force expert mode for a file/directory path. + /// Root files (README.md, LICENSE, Makefile) are treated as exact matches. + /// Use a trailing `/` to force directory-prefix matching. + #[arg(long, help_heading = "Mode", conflicts_with_all = ["active", "overlap", "reviews"])] + pub path: Option, + + /// Show active unresolved discussions + #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "overlap", "reviews", "path"])] + pub active: bool, + + /// Find users with MRs/notes touching this file path + #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "active", "reviews", "path"])] + pub overlap: Option, + + /// Show review pattern analysis (requires username target) + #[arg(long, help_heading = "Mode", requires = "target", conflicts_with_all = ["active", "overlap", "path"])] + pub reviews: bool, + + /// Time window (7d, 2w, 6m, YYYY-MM-DD). Default varies by mode. + #[arg(long, help_heading = "Filters")] + pub since: Option, + + /// Scope to a project (supports fuzzy matching) + #[arg(short = 'p', long, help_heading = "Filters")] + pub project: Option, + + /// Maximum results per section (1..=500, bounded for output safety) + #[arg( + short = 'n', + long = "limit", + default_value = "20", + value_parser = clap::value_parser!(u16).range(1..=500), + help_heading = "Output" + )] + pub limit: u16, +} +``` + +**Key design choice: `--path` flag for root files.** The positional `target` argument discriminates username vs. file path by checking for `/` — a heuristic that works for all nested paths but fails for root files like `README.md`, `LICENSE`, or `Makefile` (which have no `/`). Rather than adding fragile heuristics (e.g., checking for file extensions, which misses extensionless files like `Makefile` and `Dockerfile`), we add `--path` as an explicit escape hatch. This covers the uncommon case cleanly without complicating the common case. + +**Clap `conflicts_with` constraints.** Invalid combinations like `lore who --active --overlap path` are rejected at parse time with clap's built-in error messages, before application code ever runs. This removes the need for manual validation in `resolve_mode`. + +### `src/cli/mod.rs` — Add to `Commands` enum + +Insert `Who(WhoArgs)` in the Commands enum. Place it after `Timeline(TimelineArgs)` and before the hidden `List` command block: + +```rust + /// Show a chronological timeline of events matching a query + Timeline(TimelineArgs), + + /// People intelligence: experts, workload, active discussions, overlap + Who(WhoArgs), + + #[command(hide = true)] + List { +``` + +### `src/cli/commands/mod.rs` — Register module + exports + +Add after `pub mod timeline;`: + +```rust +pub mod who; +``` + +Add re-exports after the `timeline` pub use line: + +```rust +pub use who::{ + run_who, print_who_human, print_who_json, WhoRun, +}; +``` + +### `src/main.rs` — Import additions + +Add to the import block from `lore::cli::commands`: + +```rust + run_who, print_who_human, print_who_json, +``` + +Add to the import block from `lore::cli`: + +```rust + WhoArgs, +``` + +### `src/main.rs` — Dispatch arm + +Insert in the `match cli.command` block, after `Timeline`: + +```rust + Some(Commands::Who(args)) => handle_who(cli.config.as_deref(), args, robot_mode), +``` + +### `src/main.rs` — Handler function + +Add before `handle_list_compat`: + +```rust +fn handle_who( + config_override: Option<&str>, + args: WhoArgs, + robot_mode: bool, +) -> Result<(), Box> { + let start = std::time::Instant::now(); + let config = Config::load(config_override)?; + let run = run_who(&config, &args)?; + let elapsed_ms = start.elapsed().as_millis() as u64; + + if robot_mode { + print_who_json(&run, &args, elapsed_ms); + } else { + print_who_human(&run.result, run.resolved_input.project_path.as_deref()); + } + Ok(()) +} +``` + +**Note:** `print_who_json` takes `&run` (which contains both the result and resolved inputs) plus `&args` to echo raw CLI inputs. + +### `src/main.rs` — VALID_COMMANDS array + +Add `"who"` to the `VALID_COMMANDS` const array in `suggest_similar_command` (around line 471): + +```rust + const VALID_COMMANDS: &[&str] = &[ + "issues", + "mrs", + // ... existing entries ... + "timeline", + "who", // <-- add this + ]; +``` + +### `src/main.rs` — robot-docs manifest + +Add a `"who"` entry to the `commands` JSON in `handle_robot_docs` (around line 2014, after "timeline"): + +```rust + "who": { + "description": "People intelligence: experts, workload, active discussions, overlap, review patterns", + "flags": ["", "--path ", "--active", "--overlap ", "--reviews", "--since ", "-p/--project", "-n/--limit"], + "modes": { + "expert": "lore who — Who knows about this area? (also: --path for root files)", + "workload": "lore who — What is someone working on?", + "reviews": "lore who --reviews — Review pattern analysis", + "active": "lore who --active — Active unresolved discussions", + "overlap": "lore who --overlap — Who else is touching these files?" + }, + "example": "lore --robot who src/features/auth/", + "response_schema": { + "ok": "bool", + "data": { + "mode": "string", + "input": {"target": "string|null", "path": "string|null", "project": "string|null", "since": "string|null", "limit": "int"}, + "resolved_input": {"mode": "string", "project_id": "int|null", "project_path": "string|null", "since_ms": "int", "since_iso": "string", "since_mode": "string (default|explicit|none)", "limit": "int"}, + "...": "mode-specific fields" + }, + "meta": {"elapsed_ms": "int"} + } + }, +``` + +Also add to the `workflows` JSON: + +```rust + "people_intelligence": [ + "lore --robot who src/path/to/feature/", + "lore --robot who @username", + "lore --robot who @username --reviews", + "lore --robot who --active --since 7d", + "lore --robot who --overlap src/path/", + "lore --robot who --path README.md" + ] +``` + +--- + +## Step 2: `src/cli/commands/who.rs` — Complete Implementation + +This is the main file. Full code follows. + +### File Header and Imports + +```rust +use console::style; +use rusqlite::Connection; +use serde::Serialize; +use std::collections::{HashMap, HashSet}; + +use crate::Config; +use crate::cli::robot::RobotMeta; +use crate::cli::WhoArgs; +use crate::core::db::create_connection; +use crate::core::error::{LoreError, Result}; +use crate::core::paths::get_db_path; +use crate::core::project::resolve_project; +use crate::core::time::{ms_to_iso, now_ms, parse_since}; +``` + +### Mode Discrimination + +```rust +/// Determines which query mode to run based on args. +/// Path variants own their strings because path normalization produces new `String`s. +/// Username variants borrow from args since no normalization is needed. +enum WhoMode<'a> { + /// lore who OR lore who --path + Expert { path: String }, + /// lore who + Workload { username: &'a str }, + /// lore who --reviews + Reviews { username: &'a str }, + /// lore who --active + Active, + /// lore who --overlap + Overlap { path: String }, +} + +fn resolve_mode<'a>(args: &'a WhoArgs) -> Result> { + // Explicit --path flag always wins (handles root files like README.md, + // LICENSE, Makefile — anything without a / that can't be auto-detected) + if let Some(p) = &args.path { + return Ok(WhoMode::Expert { path: normalize_repo_path(p) }); + } + if args.active { + return Ok(WhoMode::Active); + } + if let Some(path) = &args.overlap { + return Ok(WhoMode::Overlap { path: normalize_repo_path(path) }); + } + if let Some(target) = &args.target { + let clean = target.strip_prefix('@').unwrap_or(target); + if args.reviews { + return Ok(WhoMode::Reviews { username: clean }); + } + // Disambiguation: if target contains '/', it's a file path. + // GitLab usernames never contain '/'. + // Root files (no '/') require --path. + if target.contains('/') { + return Ok(WhoMode::Expert { path: normalize_repo_path(target) }); + } + return Ok(WhoMode::Workload { username: clean }); + } + Err(LoreError::Other( + "Provide a username, file path, --active, or --overlap .\n\n\ + Examples:\n \ + lore who src/features/auth/\n \ + lore who @username\n \ + lore who --active\n \ + lore who --overlap src/features/\n \ + lore who --path README.md\n \ + lore who --path Makefile" + .to_string(), + )) +} + +/// Normalize user-supplied repo paths to match stored DiffNote paths. +/// - trims whitespace +/// - strips leading "./" and "/" (repo-relative paths) +/// - converts '\' to '/' when no '/' present (Windows paste) +/// - collapses repeated "//" +fn normalize_repo_path(input: &str) -> String { + let mut s = input.trim().to_string(); + // Windows backslash normalization (only when no forward slashes present) + if s.contains('\\') && !s.contains('/') { + s = s.replace('\\', "/"); + } + // Strip leading ./ + while s.starts_with("./") { + s = s[2..].to_string(); + } + // Strip leading / + s = s.trim_start_matches('/').to_string(); + // Collapse repeated // + while s.contains("//") { + s = s.replace("//", "/"); + } + s +} +``` + +### Result Types + +```rust +/// Top-level run result: carries resolved inputs + the mode-specific result. +pub struct WhoRun { + pub resolved_input: WhoResolvedInput, + pub result: WhoResult, +} + +/// Resolved query parameters — computed once, used for robot JSON reproducibility. +pub struct WhoResolvedInput { + pub mode: String, + pub project_id: Option, + pub project_path: Option, + pub since_ms: Option, + pub since_iso: Option, + /// "default" (mode default applied), "explicit" (user provided --since), "none" (no window) + pub since_mode: String, + pub limit: u16, +} + +/// Top-level result enum — one variant per mode. +pub enum WhoResult { + Expert(ExpertResult), + Workload(WorkloadResult), + Reviews(ReviewsResult), + Active(ActiveResult), + Overlap(OverlapResult), +} + +// --- Expert --- + +pub struct ExpertResult { + pub path_query: String, + /// "exact" or "prefix" — how the path was matched in SQL. + /// Helps agents and humans understand zero-result cases. + pub path_match: String, + pub experts: Vec, + pub truncated: bool, +} + +pub struct Expert { + pub username: String, + pub score: i64, + pub review_mr_count: u32, + pub review_note_count: u32, + pub author_mr_count: u32, + pub last_seen_ms: i64, +} + +// --- Workload --- + +pub struct WorkloadResult { + pub username: String, + pub assigned_issues: Vec, + pub authored_mrs: Vec, + pub reviewing_mrs: Vec, + pub unresolved_discussions: Vec, + pub assigned_issues_truncated: bool, + pub authored_mrs_truncated: bool, + pub reviewing_mrs_truncated: bool, + pub unresolved_discussions_truncated: bool, +} + +pub struct WorkloadIssue { + pub iid: i64, + /// Canonical reference: `group/project#iid` + pub ref_: String, + pub title: String, + pub project_path: String, + pub updated_at: i64, +} + +pub struct WorkloadMr { + pub iid: i64, + /// Canonical reference: `group/project!iid` + pub ref_: String, + pub title: String, + pub draft: bool, + pub project_path: String, + pub author_username: Option, + pub updated_at: i64, +} + +pub struct WorkloadDiscussion { + pub entity_type: String, + pub entity_iid: i64, + /// Canonical reference: `group/project!iid` or `group/project#iid` + pub ref_: String, + pub entity_title: String, + pub project_path: String, + pub last_note_at: i64, +} + +// --- Reviews --- + +pub struct ReviewsResult { + pub username: String, + pub total_diffnotes: u32, + pub categorized_count: u32, + pub mrs_reviewed: u32, + pub categories: Vec, +} + +pub struct ReviewCategory { + pub name: String, + pub count: u32, + pub percentage: f64, +} + +// --- Active --- + +pub struct ActiveResult { + pub discussions: Vec, + /// Count of unresolved discussions *within the time window*, not total across all time. + pub total_unresolved_in_window: u32, + pub truncated: bool, +} + +pub struct ActiveDiscussion { + pub discussion_id: i64, + pub entity_type: String, + pub entity_iid: i64, + pub entity_title: String, + pub project_path: String, + pub last_note_at: i64, + pub note_count: u32, + pub participants: Vec, + pub participants_total: u32, + pub participants_truncated: bool, +} + +// --- Overlap --- + +pub struct OverlapResult { + pub path_query: String, + /// "exact" or "prefix" — how the path was matched in SQL. + pub path_match: String, + pub users: Vec, + pub truncated: bool, +} + +pub struct OverlapUser { + pub username: String, + pub author_touch_count: u32, + pub review_touch_count: u32, + pub touch_count: u32, + pub last_seen_at: i64, + /// Stable MR references like "group/project!123" + pub mr_refs: Vec, + pub mr_refs_total: u32, + pub mr_refs_truncated: bool, +} +``` + +**Overlap role tracking:** `OverlapUser` tracks `author_touch_count` and `review_touch_count` separately instead of collapsing to a single `role: String`. This preserves valuable information — a user who is both author and reviewer on different MRs at the same path is a stronger signal than either role alone. The human output renders this as `A`, `R`, or `A+R`. + +**`mr_refs` instead of `mr_iids`:** In a multi-project database, bare `!123` is ambiguous — iid 123 could exist in multiple projects. `mr_refs` stores stable project-qualified references like `group/project!123` that are globally unique and directly actionable. + +**`discussion_id` in ActiveDiscussion:** Agents need stable IDs to dereference entities (e.g., follow up on a specific discussion). Without this, the combination of `entity_type + entity_iid + project_path` is the only way to identify a discussion, and that's fragile. + +**`since_mode` in WhoResolvedInput:** Agents replaying queries need to distinguish "user explicitly asked for 6 months" (`"explicit"`) from "6 months was the mode default" (`"default"`) from "no time window at all" (`"none"`, Workload mode). The boolean `since_was_default` was ambiguous for Workload, which has no default window — `true` incorrectly implied a default was applied. The tri-state resolves this: an explicit `--since 6m` means "the user cares about this window"; `"default"` means "use whatever's reasonable"; `"none"` means "query is unbounded". + +### Entry Point + +```rust +/// Main entry point. Resolves mode + resolved inputs once, then dispatches. +pub fn run_who(config: &Config, args: &WhoArgs) -> Result { + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + + let project_id = args + .project + .as_deref() + .map(|p| resolve_project(&conn, p)) + .transpose()?; + + let project_path = project_id + .map(|id| lookup_project_path(&conn, id)) + .transpose()?; + + let mode = resolve_mode(args)?; + + // since_mode semantics: + // - expert/reviews/active/overlap: default window applies if args.since is None -> "default" + // - workload: no default window; args.since None => "none" + let since_mode_for_defaulted = if args.since.is_some() { "explicit" } else { "default" }; + let since_mode_for_workload = if args.since.is_some() { "explicit" } else { "none" }; + + match mode { + WhoMode::Expert { path } => { + let since_ms = resolve_since(args.since.as_deref(), "6m")?; + let limit = usize::from(args.limit); + let result = query_expert(&conn, &path, project_id, since_ms, limit)?; + Ok(WhoRun { + resolved_input: WhoResolvedInput { + mode: "expert".to_string(), + project_id, + project_path, + since_ms: Some(since_ms), + since_iso: Some(ms_to_iso(since_ms)), + since_mode: since_mode_for_defaulted.to_string(), + limit: args.limit, + }, + result: WhoResult::Expert(result), + }) + } + WhoMode::Workload { username } => { + let since_ms = args + .since + .as_deref() + .map(|s| resolve_since_required(s)) + .transpose()?; + let limit = usize::from(args.limit); + let result = query_workload(&conn, username, project_id, since_ms, limit)?; + Ok(WhoRun { + resolved_input: WhoResolvedInput { + mode: "workload".to_string(), + project_id, + project_path, + since_ms, + since_iso: since_ms.map(ms_to_iso), + since_mode: since_mode_for_workload.to_string(), + limit: args.limit, + }, + result: WhoResult::Workload(result), + }) + } + WhoMode::Reviews { username } => { + let since_ms = resolve_since(args.since.as_deref(), "6m")?; + let result = query_reviews(&conn, username, project_id, since_ms)?; + Ok(WhoRun { + resolved_input: WhoResolvedInput { + mode: "reviews".to_string(), + project_id, + project_path, + since_ms: Some(since_ms), + since_iso: Some(ms_to_iso(since_ms)), + since_mode: since_mode_for_defaulted.to_string(), + limit: args.limit, + }, + result: WhoResult::Reviews(result), + }) + } + WhoMode::Active => { + let since_ms = resolve_since(args.since.as_deref(), "7d")?; + let limit = usize::from(args.limit); + let result = query_active(&conn, project_id, since_ms, limit)?; + Ok(WhoRun { + resolved_input: WhoResolvedInput { + mode: "active".to_string(), + project_id, + project_path, + since_ms: Some(since_ms), + since_iso: Some(ms_to_iso(since_ms)), + since_mode: since_mode_for_defaulted.to_string(), + limit: args.limit, + }, + result: WhoResult::Active(result), + }) + } + WhoMode::Overlap { path } => { + let since_ms = resolve_since(args.since.as_deref(), "30d")?; + let limit = usize::from(args.limit); + let result = query_overlap(&conn, &path, project_id, since_ms, limit)?; + Ok(WhoRun { + resolved_input: WhoResolvedInput { + mode: "overlap".to_string(), + project_id, + project_path, + since_ms: Some(since_ms), + since_iso: Some(ms_to_iso(since_ms)), + since_mode: since_mode_for_defaulted.to_string(), + limit: args.limit, + }, + result: WhoResult::Overlap(result), + }) + } + } +} + +/// Look up the project path for a resolved project ID. +fn lookup_project_path(conn: &Connection, project_id: i64) -> Result { + conn.query_row( + "SELECT path_with_namespace FROM projects WHERE id = ?1", + rusqlite::params![project_id], + |row| row.get(0), + ) + .map_err(|e| LoreError::Other(format!("Failed to look up project path: {e}"))) +} + +/// Parse --since with a default fallback. +fn resolve_since(input: Option<&str>, default: &str) -> Result { + let s = input.unwrap_or(default); + parse_since(s).ok_or_else(|| { + LoreError::Other(format!( + "Invalid --since value: '{s}'. Use a duration (7d, 2w, 6m) or date (2024-01-15)" + )) + }) +} + +/// Parse --since without a default (returns error if invalid). +fn resolve_since_required(input: &str) -> Result { + parse_since(input).ok_or_else(|| { + LoreError::Other(format!( + "Invalid --since value: '{input}'. Use a duration (7d, 2w, 6m) or date (2024-01-15)" + )) + }) +} +``` + +### Helper: Path Query Construction + +All path-based queries (Expert, Overlap) need to convert user input into a SQL match. This logic is centralized in one helper to avoid duplication and ensure consistent behavior. The helper produces a `PathQuery` struct that indicates whether to use `=` (exact file match) or `LIKE` (directory prefix match), enabling the caller to select the appropriate static SQL string. + +```rust +/// Describes how to match a user-supplied path in SQL. +struct PathQuery { + /// The parameter value to bind. + value: String, + /// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`. + is_prefix: bool, +} + +/// Build a path query from a user-supplied path, with project-scoped DB probes. +/// +/// Rules: +/// - If the path ends with `/`, it's a directory prefix -> `escaped_path/%` (LIKE) +/// - If the path is a root path (no `/`) and does NOT end with `/`, treat as exact (=) +/// (this makes `--path Makefile` and `--path LICENSE` work as intended) +/// - Else if the last path segment contains `.`, heuristic suggests file (=) +/// - **Two-way DB probe** (project-scoped): when heuristics are ambiguous, +/// probe the DB to resolve. First probe exact path (scoped to project if +/// `-p` was given), then probe prefix (scoped). Only fall back to heuristics +/// if both probes fail. This prevents cross-project misclassification: +/// a path that exists as a file in project A shouldn't force exact-match +/// when the user runs `-p project_B` where it's a directory (or absent). +/// Each probe uses the existing `idx_notes_diffnote_path_created` partial +/// index and costs at most one indexed existence query. +/// - Otherwise, treat as directory prefix -> `escaped_path/%` (LIKE) +/// +/// LIKE metacharacters (`%`, `_`, `\`) in the path are escaped with `\`. +/// All LIKE queries using this pattern MUST include `ESCAPE '\'`. +/// +/// **IMPORTANT:** `escape_like()` is ONLY called for prefix (LIKE) matches. +/// For exact matches (`=`), the raw trimmed path is used directly — LIKE +/// metacharacters are not special in `=` comparisons, so escaping them +/// would break the match (e.g., `README\_with\_underscore.md` != stored +/// `README_with_underscore.md`). +/// +/// Root file paths passed via `--path` (including dotless files like Makefile/LICENSE) +/// are treated as exact matches unless they end with `/`. +fn build_path_query(conn: &Connection, path: &str, project_id: Option) -> Result { + let trimmed = path.trim_end_matches('/'); + let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); + let is_root = !trimmed.contains('/'); + let forced_dir = path.ends_with('/'); + // Heuristic is now only a fallback; probes decide first when ambiguous. + let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); + + // Probe 1: exact file exists (project-scoped via nullable binding) + // Runs for ALL non-forced-dir paths, not just heuristically ambiguous ones. + // This catches dotless files in subdirectories (src/Dockerfile, infra/Makefile) + // that heuristics would misclassify as directories. + let exact_exists = conn.query_row( + "SELECT 1 FROM notes + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND position_new_path = ?1 + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![trimmed, project_id], + |_| Ok(()), + ).is_ok(); + + // Probe 2: directory prefix exists (project-scoped) + // Only probed when not forced-dir and exact didn't match — avoids + // redundant work when we already know the answer. + let prefix_exists = if !forced_dir && !exact_exists { + let escaped = escape_like(trimmed); + let pat = format!("{escaped}/%"); + conn.query_row( + "SELECT 1 FROM notes + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND position_new_path LIKE ?1 ESCAPE '\\' + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![pat, project_id], + |_| Ok(()), + ).is_ok() + } else { + false + }; + + // Forced directory always wins; otherwise: exact > prefix > heuristic + let is_file = if forced_dir { + false + } else if exact_exists { + true + } else if prefix_exists { + false + } else { + looks_like_file + }; + + if is_file { + // IMPORTANT: do NOT escape for exact match (=). LIKE metacharacters + // are not special in `=`, so escaping would produce wrong values. + Ok(PathQuery { + value: trimmed.to_string(), + is_prefix: false, + }) + } else { + let escaped = escape_like(trimmed); + Ok(PathQuery { + value: format!("{escaped}/%"), + is_prefix: true, + }) + } +} + +/// Escape LIKE metacharacters. All queries using this must include `ESCAPE '\'`. +fn escape_like(input: &str) -> String { + input + .replace('\\', "\\\\") + .replace('%', "\\%") + .replace('_', "\\_") +} +``` + +**Path classification rationale:** The original plan used `path.contains('.')` to detect files, which misclassifies directories like `.github/workflows/` or `src/v1.2/auth/`. The improved approach checks only the *last* path segment for a dot, and respects trailing `/` as an explicit directory signal. Root files without `/` (like `README.md`) can't be passed as a positional arg (would be treated as a username); they require `--path`. + +**Root path handling (dotless files like LICENSE, Makefile):** Root paths — those with no `/` separator — passed via `--path` are treated as exact matches by default. This makes `--path Makefile` and `--path LICENSE` produce correct results instead of the previous behavior of generating a prefix `Makefile/%` that would return zero results. Users can append `/` (e.g., `--path Makefile/`) to force prefix matching if desired. + +**Dotless subdirectory file handling (src/Dockerfile, infra/Makefile):** Files like `src/Dockerfile` have no extension in their last segment, making them indistinguishable from directories via heuristics alone. Rather than maintaining a brittle allowlist of known extensionless filenames, `build_path_query` performs project-scoped two-way DB probes — first exact, then prefix — each a `SELECT 1 ... LIMIT 1` against the existing partial index. If the exact path exists in DiffNotes (within the scoped project, when `-p` is given), it's treated as a file (exact match). If only prefix matches exist, it's treated as a directory. If neither probe finds data, heuristics take over. This costs at most two cheap indexed queries per invocation and prevents cross-project misclassification: a path that exists as a dotless file in project A shouldn't force exact-match behavior when running `-p project_B` where it doesn't exist. + +**Exact vs prefix SQL selection:** ChatGPT feedback-3 correctly identified that for exact file paths (e.g., `src/auth/login.rs`), using `LIKE ?1 ESCAPE '\'` where `?1` has no wildcard is logically correct but gives the query planner a weaker signal than `=`. The `PathQuery.is_prefix` flag lets callers select between two static SQL strings at prepare time — no dynamic SQL assembly, just `if pq.is_prefix { conn.prepare_cached(sql_prefix)? } else { conn.prepare_cached(sql_exact)? }`. This gives SQLite the best possible information for both cases. + +### Query: Expert Mode + +```rust +fn query_expert( + conn: &Connection, + path: &str, + project_id: Option, + since_ms: i64, + limit: usize, +) -> Result { + let pq = build_path_query(conn, path, project_id)?; + let limit_plus_one = (limit + 1) as i64; + + // Two static SQL strings: one for prefix (LIKE), one for exact (=). + // Both produce identical output columns; only the path predicate differs. + // Scoring is done entirely in SQL via CTE, so no Rust-side merge/sort needed. + // Reviewer branch: JOINs through discussions -> merge_requests to: + // 1. COUNT(DISTINCT m.id) — MR breadth, the primary scoring signal + // 2. COUNT(*) — note intensity, a secondary signal + // 3. Exclude self-reviews (n.author_username != m.author_username) + // + // Author branch: COUNT(DISTINCT m.id) for MR breadth. + // + // Scoring uses integer arithmetic: + // review_mr_count * 20 + author_mr_count * 12 + review_note_count * 1 + // This weights MR breadth heavily (prevents "comment storm" gaming) + // while still giving a small bonus for review intensity. + let sql_prefix = " + WITH activity AS ( + SELECT + n.author_username AS username, + 'reviewer' AS role, + COUNT(DISTINCT m.id) AS mr_cnt, + COUNT(*) AS note_cnt, + MAX(n.created_at) AS last_seen_at + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND n.author_username IS NOT NULL + AND (m.author_username IS NULL OR n.author_username != m.author_username) + AND m.state IN ('opened','merged') + AND n.position_new_path LIKE ?1 ESCAPE '\\' + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY n.author_username + + UNION ALL + + SELECT + m.author_username AS username, + 'author' AS role, + COUNT(DISTINCT m.id) AS mr_cnt, + 0 AS note_cnt, + MAX(n.created_at) AS last_seen_at + FROM merge_requests m + JOIN discussions d ON d.merge_request_id = m.id + JOIN notes n ON n.discussion_id = d.id + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND m.author_username IS NOT NULL + AND n.position_new_path LIKE ?1 ESCAPE '\\' + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY m.author_username + ) + SELECT + username, + SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) AS review_mr_count, + SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) AS review_note_count, + SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) AS author_mr_count, + MAX(last_seen_at) AS last_seen_at, + ( + (SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) * 20) + + (SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) * 12) + + (SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) * 1) + ) AS score + FROM activity + GROUP BY username + ORDER BY score DESC, last_seen_at DESC, username ASC + LIMIT ?4 + "; + + let sql_exact = " + WITH activity AS ( + SELECT + n.author_username AS username, + 'reviewer' AS role, + COUNT(DISTINCT m.id) AS mr_cnt, + COUNT(*) AS note_cnt, + MAX(n.created_at) AS last_seen_at + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND n.author_username IS NOT NULL + AND (m.author_username IS NULL OR n.author_username != m.author_username) + AND m.state IN ('opened','merged') + AND n.position_new_path = ?1 + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY n.author_username + + UNION ALL + + SELECT + m.author_username AS username, + 'author' AS role, + COUNT(DISTINCT m.id) AS mr_cnt, + 0 AS note_cnt, + MAX(n.created_at) AS last_seen_at + FROM merge_requests m + JOIN discussions d ON d.merge_request_id = m.id + JOIN notes n ON n.discussion_id = d.id + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND m.author_username IS NOT NULL + AND n.position_new_path = ?1 + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY m.author_username + ) + SELECT + username, + SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) AS review_mr_count, + SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) AS review_note_count, + SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) AS author_mr_count, + MAX(last_seen_at) AS last_seen_at, + ( + (SUM(CASE WHEN role = 'reviewer' THEN mr_cnt ELSE 0 END) * 20) + + (SUM(CASE WHEN role = 'author' THEN mr_cnt ELSE 0 END) * 12) + + (SUM(CASE WHEN role = 'reviewer' THEN note_cnt ELSE 0 END) * 1) + ) AS score + FROM activity + GROUP BY username + ORDER BY score DESC, last_seen_at DESC, username ASC + LIMIT ?4 + "; + + let mut stmt = if pq.is_prefix { + conn.prepare_cached(sql_prefix)? + } else { + conn.prepare_cached(sql_exact)? + }; + + let experts: Vec = stmt + .query_map( + rusqlite::params![pq.value, since_ms, project_id, limit_plus_one], + |row| { + Ok(Expert { + username: row.get(0)?, + review_mr_count: row.get(1)?, + review_note_count: row.get(2)?, + author_mr_count: row.get(3)?, + last_seen_ms: row.get(4)?, + score: row.get(5)?, + }) + }, + )? + .collect::, _>>()?; + + let truncated = experts.len() > limit; + let experts: Vec = experts.into_iter().take(limit).collect(); + + Ok(ExpertResult { + path_query: path.to_string(), + path_match: if pq.is_prefix { "prefix" } else { "exact" }.to_string(), + experts, + truncated, + }) +} +``` + +**Key design decisions:** +1. **`note_type = 'DiffNote'` filter** — Only DiffNotes have reliable `position_new_path`. Including other note types skews counts and scans more rows. +2. **Two static SQL strings (prefix vs exact)** — `PathQuery.is_prefix` selects the right one at prepare time. For exact file paths, `=` gives the planner a stronger signal than `LIKE` without wildcards. Both strings are `&str` literals — no `format!()`. +3. **`(?3 IS NULL OR n.project_id = ?3)`** — Static SQL with nullable binding, matching the `timeline_seed.rs` pattern. Eliminates dynamic SQL assembly and param vector juggling. **Critically, scoping is on `n.project_id` (not `m.project_id`)** so SQLite can use `idx_notes_diffnote_path_created` which has `project_id` as a trailing column. +4. **`n.is_system = 0` on BOTH branches** — Author branch filters system notes too, matching reviewer branch for consistency. System-generated DiffNotes (bot activity, CI annotations) would skew "author touch" counts. +5. **Author branch uses `n.created_at`** instead of `m.updated_at` — Anchors "last seen" to actual review activity at the path, not just MR update time (which could be a label change, rebase, etc.). +5b. **MR state filtering on reviewer branches** — `m.state IN ('opened','merged')` excludes closed/unmerged noise from reviewer branches, matching the existing filter on Overlap author branches. Closed MRs that were never merged are low-signal; if archaeology across abandoned MRs is needed later, a separate flag can enable it. +6. **SQL-level aggregation and scoring via CTE** — All aggregation (reviewer + author merge), scoring, sorting, and LIMIT happens in SQL. No Rust-side HashMap merge/sort/truncate needed. This reduces materialized rows, makes LIMIT effective at the DB level, and is deterministic (tiebreaker: `last_seen_at DESC, username ASC`). +7. **`prepare_cached()`** — Static SQL enables SQLite prepared statement caching for free. +8. **Self-review exclusion** — Reviewer branch JOINs through `discussions -> merge_requests` and filters `n.author_username != m.author_username` (with `IS NULL` guard). MR authors replying to DiffNotes on their own diffs (clarifications, followups) are NOT reviewers. Without this, an author who leaves 30 clarification comments on their own MR would appear as the top "reviewer" for that path. Reviews mode already does this (`m.author_username != ?1`); now Expert and Overlap are consistent. +9. **MR-breadth scoring** — Reviewer branch uses `COUNT(DISTINCT m.id)` as the primary metric (how many MRs reviewed) instead of raw DiffNote count (how many comments). This prevents "comment storm" gaming — someone who writes 30 comments on one MR shouldn't outrank someone who reviewed 5 different MRs. Note count is kept as a secondary intensity signal (`review_note_count * 1` in the score formula). Author branch was already counting distinct MRs. +10. **Truncation** — Queries request `LIMIT + 1` rows. If more than `limit` rows are returned, `truncated: true` is set and results are trimmed. Both human and robot output surface this so consumers know to retry with a higher `--limit`. + +### Query: Workload Mode + +```rust +fn query_workload( + conn: &Connection, + username: &str, + project_id: Option, + since_ms: Option, + limit: usize, +) -> Result { + let limit_plus_one = (limit + 1) as i64; + + // Query 1: Open issues assigned to user + // SQL computes canonical ref (group/project#iid) for direct copy-paste use + let issues_sql = + "SELECT i.iid, + (p.path_with_namespace || '#' || i.iid) AS ref, + i.title, p.path_with_namespace, i.updated_at + FROM issues i + JOIN issue_assignees ia ON ia.issue_id = i.id + JOIN projects p ON i.project_id = p.id + WHERE ia.username = ?1 + AND i.state = 'opened' + AND (?2 IS NULL OR i.project_id = ?2) + AND (?3 IS NULL OR i.updated_at >= ?3) + ORDER BY i.updated_at DESC + LIMIT ?4"; + + let mut stmt = conn.prepare_cached(issues_sql)?; + let assigned_issues: Vec = stmt + .query_map(rusqlite::params![username, project_id, since_ms, limit_plus_one], |row| { + Ok(WorkloadIssue { + iid: row.get(0)?, + ref_: row.get(1)?, + title: row.get(2)?, + project_path: row.get(3)?, + updated_at: row.get(4)?, + }) + })? + .collect::, _>>()?; + + // Query 2: Open MRs authored + // SQL computes canonical ref (group/project!iid) for direct copy-paste use + let authored_sql = + "SELECT m.iid, + (p.path_with_namespace || '!' || m.iid) AS ref, + m.title, m.draft, p.path_with_namespace, m.updated_at + FROM merge_requests m + JOIN projects p ON m.project_id = p.id + WHERE m.author_username = ?1 + AND m.state = 'opened' + AND (?2 IS NULL OR m.project_id = ?2) + AND (?3 IS NULL OR m.updated_at >= ?3) + ORDER BY m.updated_at DESC + LIMIT ?4"; + let mut stmt = conn.prepare_cached(authored_sql)?; + let authored_mrs: Vec = stmt + .query_map(rusqlite::params![username, project_id, since_ms, limit_plus_one], |row| { + Ok(WorkloadMr { + iid: row.get(0)?, + ref_: row.get(1)?, + title: row.get(2)?, + draft: row.get::<_, i32>(3)? != 0, + project_path: row.get(4)?, + author_username: None, + updated_at: row.get(5)?, + }) + })? + .collect::, _>>()?; + + // Query 3: Open MRs where user is reviewer + let reviewing_sql = + "SELECT m.iid, + (p.path_with_namespace || '!' || m.iid) AS ref, + m.title, m.draft, p.path_with_namespace, + m.author_username, m.updated_at + FROM merge_requests m + JOIN mr_reviewers r ON r.merge_request_id = m.id + JOIN projects p ON m.project_id = p.id + WHERE r.username = ?1 + AND m.state = 'opened' + AND (?2 IS NULL OR m.project_id = ?2) + AND (?3 IS NULL OR m.updated_at >= ?3) + ORDER BY m.updated_at DESC + LIMIT ?4"; + let mut stmt = conn.prepare_cached(reviewing_sql)?; + let reviewing_mrs: Vec = stmt + .query_map(rusqlite::params![username, project_id, since_ms, limit_plus_one], |row| { + Ok(WorkloadMr { + iid: row.get(0)?, + ref_: row.get(1)?, + title: row.get(2)?, + draft: row.get::<_, i32>(3)? != 0, + project_path: row.get(4)?, + author_username: row.get(5)?, + updated_at: row.get(6)?, + }) + })? + .collect::, _>>()?; + + // Query 4: Unresolved discussions where user participated + // Note: --since is intentionally NOT applied by default to discussions. + // Unresolved threads remain relevant regardless of age. When --since is + // explicitly provided, it filters on d.last_note_at to show only threads + // with recent activity. + // Canonical ref uses CASE to pick '#' for issues, '!' for MRs + let disc_sql = + "SELECT d.noteable_type, + COALESCE(i.iid, m.iid) AS entity_iid, + (p.path_with_namespace || + CASE WHEN d.noteable_type = 'MergeRequest' THEN '!' ELSE '#' END || + COALESCE(i.iid, m.iid)) AS ref, + COALESCE(i.title, m.title) AS entity_title, + p.path_with_namespace, + d.last_note_at + FROM discussions d + JOIN projects p ON d.project_id = p.id + LEFT JOIN issues i ON d.issue_id = i.id + LEFT JOIN merge_requests m ON d.merge_request_id = m.id + WHERE d.resolvable = 1 AND d.resolved = 0 + AND EXISTS ( + SELECT 1 FROM notes n + WHERE n.discussion_id = d.id + AND n.author_username = ?1 + AND n.is_system = 0 + ) + AND (?2 IS NULL OR d.project_id = ?2) + AND (?3 IS NULL OR d.last_note_at >= ?3) + ORDER BY d.last_note_at DESC + LIMIT ?4"; + + let mut stmt = conn.prepare_cached(disc_sql)?; + let unresolved_discussions: Vec = stmt + .query_map(rusqlite::params![username, project_id, since_ms, limit_plus_one], |row| { + let noteable_type: String = row.get(0)?; + let entity_type = if noteable_type == "MergeRequest" { + "MR" + } else { + "Issue" + }; + Ok(WorkloadDiscussion { + entity_type: entity_type.to_string(), + entity_iid: row.get(1)?, + ref_: row.get(2)?, + entity_title: row.get(3)?, + project_path: row.get(4)?, + last_note_at: row.get(5)?, + }) + })? + .collect::, _>>()?; + + // Truncation detection: each section queried LIMIT+1 rows. + // Detect overflow, set truncated flags, trim to requested limit. + let assigned_issues_truncated = assigned_issues.len() > limit; + let authored_mrs_truncated = authored_mrs.len() > limit; + let reviewing_mrs_truncated = reviewing_mrs.len() > limit; + let unresolved_discussions_truncated = unresolved_discussions.len() > limit; + + let assigned_issues: Vec = assigned_issues.into_iter().take(limit).collect(); + let authored_mrs: Vec = authored_mrs.into_iter().take(limit).collect(); + let reviewing_mrs: Vec = reviewing_mrs.into_iter().take(limit).collect(); + let unresolved_discussions: Vec = unresolved_discussions.into_iter().take(limit).collect(); + + Ok(WorkloadResult { + username: username.to_string(), + assigned_issues, + authored_mrs, + reviewing_mrs, + unresolved_discussions, + assigned_issues_truncated, + authored_mrs_truncated, + reviewing_mrs_truncated, + unresolved_discussions_truncated, + }) +} +``` + +**Key design decisions:** +1. **Fully static SQL** — All four queries are `&str` literals with `LIMIT ?4` bound as a parameter. No `format!()` anywhere. This enables SQLite prepared statement caching and eliminates dynamic SQL creep. +2. **`prepare_cached()`** — All four queries use `prepare_cached()` for statement caching. +3. **Unified parameter binding** — All four queries use `rusqlite::params![username, project_id, since_ms, limit_plus_one]` with `(?N IS NULL OR ...)`. Eliminates the `disc_params` / `disc_param_refs` duplication. +4. **Consistent `--since` behavior** — The `since_ms` param (which is `Option`) is bound in all four queries. When `None` (user didn't pass `--since`), the `(?3 IS NULL OR ...)` clause is a no-op. This means `--since` consistently filters *everything* when provided, but doesn't artificially restrict when omitted. Unresolved discussions still show regardless of age by default, but respect `--since` when it's explicitly set. +5. **Canonical `ref_` field** — SQL computes project-qualified references (`group/project#iid` for issues, `group/project!iid` for MRs) directly in the SELECT. This single copy-pasteable token replaces the need to display `iid` + `project_path` separately in human output, and reduces agent stitching work in robot JSON. Both `ref` and `project_path` are included in robot output for flexibility. + +### Query: Reviews Mode + +```rust +fn query_reviews( + conn: &Connection, + username: &str, + project_id: Option, + since_ms: i64, +) -> Result { + // Count total DiffNotes by this user on MRs they didn't author + let total_sql = + "SELECT COUNT(*) FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + WHERE n.author_username = ?1 + AND n.note_type = 'DiffNote' + AND n.is_system = 0 + AND m.author_username != ?1 + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3)"; + + let total_diffnotes: u32 = + conn.query_row(total_sql, rusqlite::params![username, since_ms, project_id], |row| row.get(0))?; + + // Count distinct MRs reviewed + let mrs_sql = + "SELECT COUNT(DISTINCT m.id) FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + WHERE n.author_username = ?1 + AND n.note_type = 'DiffNote' + AND n.is_system = 0 + AND m.author_username != ?1 + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3)"; + + let mrs_reviewed: u32 = + conn.query_row(mrs_sql, rusqlite::params![username, since_ms, project_id], |row| row.get(0))?; + + // Extract prefixed categories: body starts with **prefix** + // ltrim(n.body) tolerates leading whitespace (e.g., " **suggestion**: ...") + // which is common in practice but would be missed by a strict LIKE '**%**%'. + let cat_sql = + "SELECT + SUBSTR(ltrim(n.body), 3, INSTR(SUBSTR(ltrim(n.body), 3), '**') - 1) AS raw_prefix, + COUNT(*) AS cnt + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + WHERE n.author_username = ?1 + AND n.note_type = 'DiffNote' + AND n.is_system = 0 + AND m.author_username != ?1 + AND ltrim(n.body) LIKE '**%**%' + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY raw_prefix + ORDER BY cnt DESC"; + + let mut stmt = conn.prepare_cached(cat_sql)?; + let raw_categories: Vec<(String, u32)> = stmt + .query_map(rusqlite::params![username, since_ms, project_id], |row| { + Ok((row.get::<_, String>(0)?, row.get(1)?)) + })? + .collect::, _>>()?; + + // Normalize categories: lowercase, strip trailing colon/space, + // merge nit/nitpick variants, merge (non-blocking) variants + let mut merged: HashMap = HashMap::new(); + for (raw, count) in &raw_categories { + let normalized = normalize_review_prefix(raw); + if !normalized.is_empty() { + *merged.entry(normalized).or_insert(0) += count; + } + } + + let categorized_count: u32 = merged.values().sum(); + + let mut categories: Vec = merged + .into_iter() + .map(|(name, count)| { + let percentage = if categorized_count > 0 { + f64::from(count) / f64::from(categorized_count) * 100.0 + } else { + 0.0 + }; + ReviewCategory { + name, + count, + percentage, + } + }) + .collect(); + + categories.sort_by(|a, b| b.count.cmp(&a.count)); + + Ok(ReviewsResult { + username: username.to_string(), + total_diffnotes, + categorized_count, + mrs_reviewed, + categories, + }) +} + +/// Normalize a raw review prefix like "Suggestion (non-blocking):" into "suggestion". +fn normalize_review_prefix(raw: &str) -> String { + let s = raw + .trim() + .trim_end_matches(':') + .trim() + .to_lowercase(); + + // Strip "(non-blocking)" and similar parentheticals + let s = if let Some(idx) = s.find('(') { + s[..idx].trim().to_string() + } else { + s + }; + + // Merge nit/nitpick variants + match s.as_str() { + "nitpick" | "nit" => "nit".to_string(), + other => other.to_string(), + } +} +``` + +### Query: Active Mode + +```rust +fn query_active( + conn: &Connection, + project_id: Option, + since_ms: i64, + limit: usize, +) -> Result { + let limit_plus_one = (limit + 1) as i64; + + // Total unresolved count — two static variants to avoid nullable-OR planner ambiguity. + // The (?N IS NULL OR ...) pattern prevents SQLite from knowing at prepare time whether + // the constraint is active, potentially choosing a suboptimal index. Separate statements + // ensure the intended index (global vs project-scoped) is used. + let total_sql_global = + "SELECT COUNT(*) FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.last_note_at >= ?1"; + let total_sql_scoped = + "SELECT COUNT(*) FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.last_note_at >= ?1 + AND d.project_id = ?2"; + + let total_unresolved_in_window: u32 = match project_id { + None => conn.query_row(total_sql_global, rusqlite::params![since_ms], |row| row.get(0))?, + Some(pid) => conn.query_row(total_sql_scoped, rusqlite::params![since_ms, pid], |row| row.get(0))?, + }; + + // Active discussions with context — two static SQL variants (global vs project-scoped). + // + // CTE-based approach: first select the limited set of discussions (using + // idx_discussions_unresolved_recent or idx_discussions_unresolved_recent_global), + // then join notes ONCE to aggregate counts and participants. This avoids two + // correlated subqueries per discussion row (note_count + participants), which can + // create spiky performance if the planner chooses poorly. + // + // NOTE on GROUP_CONCAT: SQLite does not support + // GROUP_CONCAT(DISTINCT col, separator) + // with both DISTINCT and a custom separator. The participants CTE uses a + // subquery with SELECT DISTINCT first, then GROUP_CONCAT with the separator. + // + // Why two variants instead of (?2 IS NULL OR d.project_id = ?2): + // SQLite evaluates `IS NULL` at runtime, not prepare time. With the nullable-OR + // pattern, the planner can't assume project_id is constrained and may choose + // idx_discussions_unresolved_recent (which leads with project_id) even for the + // global case, forcing a full scan + sort. Separate statements let each use + // the optimal index: global uses idx_discussions_unresolved_recent_global, + // scoped uses idx_discussions_unresolved_recent. + let sql_global = " + WITH picked AS ( + SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, + d.project_id, d.last_note_at + FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.last_note_at >= ?1 + ORDER BY d.last_note_at DESC + LIMIT ?2 + ), + note_counts AS ( + SELECT + n.discussion_id, + COUNT(*) AS note_count + FROM notes n + JOIN picked p ON p.id = n.discussion_id + WHERE n.is_system = 0 + GROUP BY n.discussion_id + ), + participants AS ( + SELECT + x.discussion_id, + GROUP_CONCAT(x.author_username, X'1F') AS participants + FROM ( + SELECT DISTINCT n.discussion_id, n.author_username + FROM notes n + JOIN picked p ON p.id = n.discussion_id + WHERE n.is_system = 0 AND n.author_username IS NOT NULL + ) x + GROUP BY x.discussion_id + ) + SELECT + p.id AS discussion_id, + p.noteable_type, + COALESCE(i.iid, m.iid) AS entity_iid, + COALESCE(i.title, m.title) AS entity_title, + proj.path_with_namespace, + p.last_note_at, + COALESCE(nc.note_count, 0) AS note_count, + COALESCE(pa.participants, '') AS participants + FROM picked p + JOIN projects proj ON p.project_id = proj.id + LEFT JOIN issues i ON p.issue_id = i.id + LEFT JOIN merge_requests m ON p.merge_request_id = m.id + LEFT JOIN note_counts nc ON nc.discussion_id = p.id + LEFT JOIN participants pa ON pa.discussion_id = p.id + ORDER BY p.last_note_at DESC + "; + + let sql_scoped = " + WITH picked AS ( + SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, + d.project_id, d.last_note_at + FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.last_note_at >= ?1 + AND d.project_id = ?2 + ORDER BY d.last_note_at DESC + LIMIT ?3 + ), + note_counts AS ( + SELECT + n.discussion_id, + COUNT(*) AS note_count + FROM notes n + JOIN picked p ON p.id = n.discussion_id + WHERE n.is_system = 0 + GROUP BY n.discussion_id + ), + participants AS ( + SELECT + x.discussion_id, + GROUP_CONCAT(x.author_username, X'1F') AS participants + FROM ( + SELECT DISTINCT n.discussion_id, n.author_username + FROM notes n + JOIN picked p ON p.id = n.discussion_id + WHERE n.is_system = 0 AND n.author_username IS NOT NULL + ) x + GROUP BY x.discussion_id + ) + SELECT + p.id AS discussion_id, + p.noteable_type, + COALESCE(i.iid, m.iid) AS entity_iid, + COALESCE(i.title, m.title) AS entity_title, + proj.path_with_namespace, + p.last_note_at, + COALESCE(nc.note_count, 0) AS note_count, + COALESCE(pa.participants, '') AS participants + FROM picked p + JOIN projects proj ON p.project_id = proj.id + LEFT JOIN issues i ON p.issue_id = i.id + LEFT JOIN merge_requests m ON p.merge_request_id = m.id + LEFT JOIN note_counts nc ON nc.discussion_id = p.id + LEFT JOIN participants pa ON pa.discussion_id = p.id + ORDER BY p.last_note_at DESC + "; + + // Row-mapping closure shared between both variants + let map_row = |row: &rusqlite::Row| -> rusqlite::Result { + let noteable_type: String = row.get(1)?; + let entity_type = if noteable_type == "MergeRequest" { + "MR" + } else { + "Issue" + }; + let participants_csv: Option = row.get(7)?; + // Sort participants for deterministic output — GROUP_CONCAT order is undefined + let mut participants: Vec = participants_csv + .as_deref() + .filter(|s| !s.is_empty()) + .map(|csv| csv.split('\x1F').map(String::from).collect()) + .unwrap_or_default(); + participants.sort(); + + const MAX_PARTICIPANTS: usize = 50; + let participants_total = participants.len() as u32; + let participants_truncated = participants.len() > MAX_PARTICIPANTS; + if participants_truncated { + participants.truncate(MAX_PARTICIPANTS); + } + + Ok(ActiveDiscussion { + discussion_id: row.get(0)?, + entity_type: entity_type.to_string(), + entity_iid: row.get(2)?, + entity_title: row.get(3)?, + project_path: row.get(4)?, + last_note_at: row.get(5)?, + note_count: row.get(6)?, + participants, + participants_total, + participants_truncated, + }) + }; + + // Select variant first, then prepare exactly one statement + let discussions: Vec = match project_id { + None => { + let mut stmt = conn.prepare_cached(sql_global)?; + stmt.query_map(rusqlite::params![since_ms, limit_plus_one], &map_row)? + .collect::, _>>()? + } + Some(pid) => { + let mut stmt = conn.prepare_cached(sql_scoped)?; + stmt.query_map(rusqlite::params![since_ms, pid, limit_plus_one], &map_row)? + .collect::, _>>()? + } + }; + + let truncated = discussions.len() > limit; + let discussions: Vec = discussions.into_iter().take(limit).collect(); + + Ok(ActiveResult { + discussions, + total_unresolved_in_window, + truncated, + }) +} +``` + +**CTE-based refactor (from feedback-3 Change 5, corrected in feedback-5 Change 2):** The original plan used two correlated subqueries per discussion row (one for `note_count`, one for `participants`). While not catastrophic with LIMIT 20, it creates "spiky" behavior and unnecessary work if the planner chooses poorly. The CTE approach: +1. `picked` — selects the limited set of discussions first (hits `idx_discussions_unresolved_recent` or `idx_discussions_unresolved_recent_global`) +2. `note_counts` — counts all non-system notes per picked discussion (actual note count, not participant count) +3. `participants` — distinct usernames per picked discussion, then `GROUP_CONCAT` +4. Final SELECT — joins everything together + +**Why `note_counts` and `participants` are separate CTEs:** The iteration-4 plan used a single `note_agg` CTE that first did `SELECT DISTINCT discussion_id, author_username` then `COUNT(*)`. This produced a *participant count*, not a *note count*. A discussion with 5 notes from 2 people would show `note_count: 2` instead of `note_count: 5`. Splitting into two CTEs fixes the semantic mismatch while keeping both scoped to only the picked discussions. + +This is structurally cleaner and more predictable than correlated subqueries. + +**Two static SQL variants (global vs scoped):** The `(?N IS NULL OR d.project_id = ?N)` nullable-OR pattern undermines index selection for Active mode because SQLite evaluates the `IS NULL` check at runtime, not prepare time. With both `idx_discussions_unresolved_recent` (leading with `project_id`) and `idx_discussions_unresolved_recent_global` (single-column `last_note_at`), the planner may choose a "good enough for both" plan that is optimal for neither. Using two static SQL strings — `sql_global` (no project predicate, uses global index) and `sql_scoped` (explicit `d.project_id = ?2`, uses scoped index) — selected at runtime via `match project_id` ensures the intended index is always used. Both are prepared via `prepare_cached()` and only one is prepared per invocation. + +**LIMIT is parameterized** — not assembled via `format!()`. This keeps the SQL fully static and enables prepared statement caching. In the global variant it's `?2`, in the scoped variant it's `?3`. + +### Query: Overlap Mode + +```rust +fn query_overlap( + conn: &Connection, + path: &str, + project_id: Option, + since_ms: i64, + limit: usize, +) -> Result { + let pq = build_path_query(conn, path, project_id)?; + + // Two static SQL strings: prefix (LIKE) vs exact (=). + // Both produce project-qualified MR references (group/project!iid) + // via JOIN to projects table, eliminating multi-project ambiguity. + // Reviewer branch excludes self-reviews (n.author_username != m.author_username) + // to prevent MR authors from inflating reviewer counts. + let sql_prefix = "SELECT username, role, touch_count, last_seen_at, mr_refs FROM ( + -- Reviewers who left DiffNotes on files matching the path. + -- Self-reviews excluded: MR authors commenting on their own diffs + -- are clarifications, not reviews. + SELECT + n.author_username AS username, + 'reviewer' AS role, + COUNT(DISTINCT m.id) AS touch_count, + MAX(n.created_at) AS last_seen_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + JOIN projects p ON m.project_id = p.id + WHERE n.note_type = 'DiffNote' + AND n.position_new_path LIKE ?1 ESCAPE '\\' + AND n.is_system = 0 + AND n.author_username IS NOT NULL + AND (m.author_username IS NULL OR n.author_username != m.author_username) + AND m.state IN ('opened','merged') + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY n.author_username + + UNION ALL + + -- Authors who wrote MRs with reviews at this path + SELECT + m.author_username AS username, + 'author' AS role, + COUNT(DISTINCT m.id) AS touch_count, + MAX(n.created_at) AS last_seen_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM merge_requests m + JOIN discussions d ON d.merge_request_id = m.id + JOIN notes n ON n.discussion_id = d.id + JOIN projects p ON m.project_id = p.id + WHERE n.note_type = 'DiffNote' + AND n.position_new_path LIKE ?1 ESCAPE '\\' + AND n.is_system = 0 + AND m.state IN ('opened', 'merged') + AND m.author_username IS NOT NULL + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY m.author_username + )"; + + let sql_exact = "SELECT username, role, touch_count, last_seen_at, mr_refs FROM ( + -- Self-reviews excluded for reviewer counts + SELECT + n.author_username AS username, + 'reviewer' AS role, + COUNT(DISTINCT m.id) AS touch_count, + MAX(n.created_at) AS last_seen_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + JOIN merge_requests m ON d.merge_request_id = m.id + JOIN projects p ON m.project_id = p.id + WHERE n.note_type = 'DiffNote' + AND n.position_new_path = ?1 + AND n.is_system = 0 + AND n.author_username IS NOT NULL + AND (m.author_username IS NULL OR n.author_username != m.author_username) + AND m.state IN ('opened','merged') + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY n.author_username + + UNION ALL + + SELECT + m.author_username AS username, + 'author' AS role, + COUNT(DISTINCT m.id) AS touch_count, + MAX(n.created_at) AS last_seen_at, + GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs + FROM merge_requests m + JOIN discussions d ON d.merge_request_id = m.id + JOIN notes n ON n.discussion_id = d.id + JOIN projects p ON m.project_id = p.id + WHERE n.note_type = 'DiffNote' + AND n.position_new_path = ?1 + AND n.is_system = 0 + AND m.state IN ('opened', 'merged') + AND m.author_username IS NOT NULL + AND n.created_at >= ?2 + AND (?3 IS NULL OR n.project_id = ?3) + GROUP BY m.author_username + )"; + + let mut stmt = if pq.is_prefix { + conn.prepare_cached(sql_prefix)? + } else { + conn.prepare_cached(sql_exact)? + }; + let rows: Vec<(String, String, u32, i64, Option)> = stmt + .query_map(rusqlite::params![pq.value, since_ms, project_id], |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + )) + })? + .collect::, _>>()?; + + // Internal accumulator uses HashSet for MR refs from the start — avoids + // expensive drain-rebuild per row that the old Vec + HashSet pattern did. + struct OverlapAcc { + username: String, + author_touch_count: u32, + review_touch_count: u32, + touch_count: u32, + last_seen_at: i64, + mr_refs: HashSet, + } + + let mut user_map: HashMap = HashMap::new(); + for (username, role, count, last_seen, mr_refs_csv) in &rows { + let mr_refs: Vec = mr_refs_csv + .as_deref() + .map(|csv| csv.split(',').map(|s| s.trim().to_string()).collect()) + .unwrap_or_default(); + + let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapAcc { + username: username.clone(), + author_touch_count: 0, + review_touch_count: 0, + touch_count: 0, + last_seen_at: 0, + mr_refs: HashSet::new(), + }); + entry.touch_count += count; + if role == "author" { + entry.author_touch_count += count; + } else { + entry.review_touch_count += count; + } + if *last_seen > entry.last_seen_at { + entry.last_seen_at = *last_seen; + } + for r in mr_refs { + entry.mr_refs.insert(r); + } + } + + // Convert accumulators to output structs. Sort + bound mr_refs for deterministic, + // bounded output. Agents get total + truncated metadata per principle #11. + const MAX_MR_REFS_PER_USER: usize = 50; + let mut users: Vec = user_map + .into_values() + .map(|a| { + let mut mr_refs: Vec = a.mr_refs.into_iter().collect(); + mr_refs.sort(); + let mr_refs_total = mr_refs.len() as u32; + let mr_refs_truncated = mr_refs.len() > MAX_MR_REFS_PER_USER; + if mr_refs_truncated { + mr_refs.truncate(MAX_MR_REFS_PER_USER); + } + OverlapUser { + username: a.username, + author_touch_count: a.author_touch_count, + review_touch_count: a.review_touch_count, + touch_count: a.touch_count, + last_seen_at: a.last_seen_at, + mr_refs, + mr_refs_total, + mr_refs_truncated, + } + }) + .collect(); + + // Stable sort with full tie-breakers for deterministic output + users.sort_by(|a, b| { + b.touch_count + .cmp(&a.touch_count) + .then_with(|| b.last_seen_at.cmp(&a.last_seen_at)) + .then_with(|| a.username.cmp(&b.username)) + }); + + let truncated = users.len() > limit; + users.truncate(limit); + + Ok(OverlapResult { + path_query: path.to_string(), + path_match: if pq.is_prefix { "prefix" } else { "exact" }.to_string(), + users, + truncated, + }) +} + +/// Format overlap role for display: "A", "R", or "A+R". +fn format_overlap_role(user: &OverlapUser) -> &'static str { + match (user.author_touch_count > 0, user.review_touch_count > 0) { + (true, true) => "A+R", + (true, false) => "A", + (false, true) => "R", + (false, false) => "-", + } +} +``` + +**Key design decisions:** +1. **Dual role tracking** — `OverlapUser` tracks `author_touch_count` and `review_touch_count` separately. A user who both authored and reviewed at the same path gets `A+R` — a stronger signal than either alone. +2. **Coherent `touch_count` units** — Both reviewer and author branches use `COUNT(DISTINCT m.id)` to count *MRs*, not DiffNotes. This makes `touch_count` a consistent, comparable metric across roles — summing author + reviewer MR counts is meaningful, whereas mixing DiffNote counts (reviewer) with MR counts (author) produces misleading totals. The human output header says "MRs" (not "Touches") to reflect this. +3. **`n.created_at` for author branch** — Anchors "last seen" to actual review note timestamps at the path, not MR-level `updated_at` which can change for unrelated reasons. +4. **`n.is_system = 0` on BOTH branches** — Consistent system note exclusion across all DiffNote queries. This matches Expert mode and prevents bot/CI annotation noise. +4b. **`m.state IN ('opened','merged')` on reviewer branches** — Consistent with author branches. Closed/unmerged MRs are noise. Matches Expert mode. +4c. **Bounded `mr_refs`** — `mr_refs` are capped at 50 per user with `mr_refs_total` + `mr_refs_truncated` metadata. Prevents unbounded JSON payloads per principle #11. +5. **`n.project_id` for scoping** — Same as Expert mode, scoping on the notes table to maximize index usage. +6. **Project-qualified MR refs (`group/project!iid`)** — The SQL joins the `projects` table and concatenates `p.path_with_namespace || '!' || m.iid` to produce globally unique references. In a multi-project database, bare `!123` is ambiguous — the same iid can exist in multiple projects. This makes overlap output directly actionable without needing to cross-reference project context. +7. **Accumulator pattern** — `OverlapAcc` uses `HashSet` for MR refs from the start, avoiding the expensive drain-rebuild per row that the old `Vec + HashSet` pattern did. Convert once to `Vec` at the end with deterministic sorting. +8. **Two static SQL strings (prefix vs exact)** — Same pattern as Expert mode. +9. **Self-review exclusion** — Same as Expert: reviewer branch filters `n.author_username != m.author_username` to prevent MR authors from inflating reviewer counts. +10. **Deterministic sorting** — Users are sorted by `touch_count DESC, last_seen_at DESC, username ASC`. All three tie-breaker fields ensure identical output across runs. MR refs within each user are also sorted alphabetically. +11. **Truncation** — Same `LIMIT + 1` pattern as Expert and Active modes. `truncated: bool` exposed in both human and robot output. + +### Human Output + +```rust +pub fn print_who_human(result: &WhoResult, project_path: Option<&str>) { + match result { + WhoResult::Expert(r) => print_expert_human(r, project_path), + WhoResult::Workload(r) => print_workload_human(r), + WhoResult::Reviews(r) => print_reviews_human(r), + WhoResult::Active(r) => print_active_human(r, project_path), + WhoResult::Overlap(r) => print_overlap_human(r, project_path), + } +} + +/// Print a dim hint when results aggregate across all projects. +/// Modes that could be ambiguous without project context (Expert, Active, Overlap) +/// call this to prevent misinterpretation in multi-project databases. +fn print_scope_hint(project_path: Option<&str>) { + if project_path.is_none() { + println!( + " {}", + style("(aggregated across all projects; use -p to scope)").dim() + ); + } +} + +fn print_expert_human(r: &ExpertResult, project_path: Option<&str>) { + println!(); + println!( + "{}", + style(format!("Experts for {}", r.path_query)).bold() + ); + println!("{}", "─".repeat(60)); + println!( + " {}", + style(format!("(matching {} {})", r.path_match, if r.path_match == "exact" { "file" } else { "directory prefix" })).dim() + ); + print_scope_hint(project_path); + println!(); + + if r.experts.is_empty() { + println!(" {}", style("No experts found for this path.").dim()); + println!(); + return; + } + + println!( + " {:<16} {:>6} {:>12} {:>6} {:>12} {}", + style("Username").bold(), + style("Score").bold(), + style("Reviewed(MRs)").bold(), + style("Notes").bold(), + style("Authored(MRs)").bold(), + style("Last Seen").bold(), + ); + + for expert in &r.experts { + let reviews = if expert.review_mr_count > 0 { + expert.review_mr_count.to_string() + } else { + "-".to_string() + }; + let notes = if expert.review_note_count > 0 { + expert.review_note_count.to_string() + } else { + "-".to_string() + }; + let authored = if expert.author_mr_count > 0 { + expert.author_mr_count.to_string() + } else { + "-".to_string() + }; + println!( + " {:<16} {:>6} {:>12} {:>6} {:>12} {}", + style(format!("@{}", expert.username)).cyan(), + expert.score, + reviews, + notes, + authored, + style(format_relative_time(expert.last_seen_ms)).dim(), + ); + } + if r.truncated { + println!(" {}", style("(showing first -n; rerun with a higher --limit)").dim()); + } + println!(); +} + +fn print_workload_human(r: &WorkloadResult) { + println!(); + println!( + "{}", + style(format!("@{} -- Workload Summary", r.username)).bold() + ); + println!("{}", "─".repeat(60)); + + if !r.assigned_issues.is_empty() { + println!(); + println!( + " {} ({})", + style("Assigned Issues").bold(), + r.assigned_issues.len() + ); + for item in &r.assigned_issues { + println!( + " {} {} {}", + style(&item.ref_).cyan(), + truncate_str(&item.title, 40), + style(format_relative_time(item.updated_at)).dim(), + ); + } + if r.assigned_issues_truncated { + println!(" {}", style("(truncated; rerun with a higher --limit)").dim()); + } + } + + if !r.authored_mrs.is_empty() { + println!(); + println!( + " {} ({})", + style("Authored MRs").bold(), + r.authored_mrs.len() + ); + for mr in &r.authored_mrs { + let draft = if mr.draft { " [draft]" } else { "" }; + println!( + " {} {}{} {}", + style(&mr.ref_).cyan(), + truncate_str(&mr.title, 35), + style(draft).dim(), + style(format_relative_time(mr.updated_at)).dim(), + ); + } + if r.authored_mrs_truncated { + println!(" {}", style("(truncated; rerun with a higher --limit)").dim()); + } + } + + if !r.reviewing_mrs.is_empty() { + println!(); + println!( + " {} ({})", + style("Reviewing MRs").bold(), + r.reviewing_mrs.len() + ); + for mr in &r.reviewing_mrs { + let author = mr + .author_username + .as_deref() + .map(|a| format!(" by @{a}")) + .unwrap_or_default(); + println!( + " {} {}{} {}", + style(&mr.ref_).cyan(), + truncate_str(&mr.title, 30), + style(author).dim(), + style(format_relative_time(mr.updated_at)).dim(), + ); + } + if r.reviewing_mrs_truncated { + println!(" {}", style("(truncated; rerun with a higher --limit)").dim()); + } + } + + if !r.unresolved_discussions.is_empty() { + println!(); + println!( + " {} ({})", + style("Unresolved Discussions").bold(), + r.unresolved_discussions.len() + ); + for disc in &r.unresolved_discussions { + println!( + " {} {} {} {}", + style(&disc.entity_type).dim(), + style(&disc.ref_).cyan(), + truncate_str(&disc.entity_title, 35), + style(format_relative_time(disc.last_note_at)).dim(), + ); + } + if r.unresolved_discussions_truncated { + println!(" {}", style("(truncated; rerun with a higher --limit)").dim()); + } + } + + if r.assigned_issues.is_empty() + && r.authored_mrs.is_empty() + && r.reviewing_mrs.is_empty() + && r.unresolved_discussions.is_empty() + { + println!(); + println!( + " {}", + style("No open work items found for this user.").dim() + ); + } + + println!(); +} + +fn print_reviews_human(r: &ReviewsResult) { + println!(); + println!( + "{}", + style(format!("@{} -- Review Patterns", r.username)).bold() + ); + println!("{}", "─".repeat(60)); + println!(); + + if r.total_diffnotes == 0 { + println!( + " {}", + style("No review comments found for this user.").dim() + ); + println!(); + return; + } + + println!( + " {} DiffNotes across {} MRs ({} categorized)", + style(r.total_diffnotes).bold(), + style(r.mrs_reviewed).bold(), + style(r.categorized_count).bold(), + ); + println!(); + + if !r.categories.is_empty() { + println!( + " {:<16} {:>6} {:>6}", + style("Category").bold(), + style("Count").bold(), + style("%").bold(), + ); + + for cat in &r.categories { + println!( + " {:<16} {:>6} {:>5.1}%", + style(&cat.name).cyan(), + cat.count, + cat.percentage, + ); + } + } + + let uncategorized = r.total_diffnotes - r.categorized_count; + if uncategorized > 0 { + println!(); + println!( + " {} {} uncategorized (no **prefix** convention)", + style("Note:").dim(), + uncategorized, + ); + } + + println!(); +} + +fn print_active_human(r: &ActiveResult, project_path: Option<&str>) { + println!(); + println!( + "{}", + style(format!( + "Active Discussions ({} unresolved in window)", + r.total_unresolved_in_window + )) + .bold() + ); + println!("{}", "─".repeat(60)); + print_scope_hint(project_path); + println!(); + + if r.discussions.is_empty() { + println!( + " {}", + style("No active unresolved discussions in this time window.").dim() + ); + println!(); + return; + } + + for disc in &r.discussions { + let prefix = if disc.entity_type == "MR" { "!" } else { "#" }; + let participants_str = disc + .participants + .iter() + .map(|p| format!("@{p}")) + .collect::>() + .join(", "); + + println!( + " {} {} {} {} notes {}", + style(format!("{}{}", prefix, disc.entity_iid)).cyan(), + truncate_str(&disc.entity_title, 40), + style(format_relative_time(disc.last_note_at)).dim(), + disc.note_count, + style(&disc.project_path).dim(), + ); + if !participants_str.is_empty() { + println!(" {}", style(participants_str).dim()); + } + } + if r.truncated { + println!(" {}", style("(showing first -n; rerun with a higher --limit)").dim()); + } + println!(); +} + +fn print_overlap_human(r: &OverlapResult, project_path: Option<&str>) { + println!(); + println!( + "{}", + style(format!("Overlap for {}", r.path_query)).bold() + ); + println!("{}", "─".repeat(60)); + println!( + " {}", + style(format!("(matching {} {})", r.path_match, if r.path_match == "exact" { "file" } else { "directory prefix" })).dim() + ); + print_scope_hint(project_path); + println!(); + + if r.users.is_empty() { + println!( + " {}", + style("No overlapping users found for this path.").dim() + ); + println!(); + return; + } + + println!( + " {:<16} {:<6} {:>7} {:<12} {}", + style("Username").bold(), + style("Role").bold(), + style("MRs").bold(), + style("Last Seen").bold(), + style("MR Refs").bold(), + ); + + for user in &r.users { + let mr_str = user + .mr_refs + .iter() + .take(5) + .cloned() + .collect::>() + .join(", "); + let overflow = if user.mr_refs.len() > 5 { + format!(" +{}", user.mr_refs.len() - 5) + } else { + String::new() + }; + + println!( + " {:<16} {:<6} {:>7} {:<12} {}{}", + style(format!("@{}", user.username)).cyan(), + format_overlap_role(user), + user.touch_count, + format_relative_time(user.last_seen_at), + mr_str, + overflow, + ); + } + if r.truncated { + println!(" {}", style("(showing first -n; rerun with a higher --limit)").dim()); + } + println!(); +} +``` + +**Human output multi-project clarity (from feedback-3 Change 7, enhanced in feedback-5 Change 5):** When not project-scoped, `#42` and `!100` aren't unique. The workload output now uses canonical refs (`group/project#iid` for issues, `group/project!iid` for MRs) — the same token is copy-pasteable by humans and directly parseable by agents. This eliminates the need for a separate `project_path` column since the ref already contains the project context. Robot JSON includes both `ref` and `project_path` for flexibility. Overlap output uses the project-qualified `mr_refs` directly. + +### Robot JSON Output + +```rust +pub fn print_who_json(run: &WhoRun, args: &WhoArgs, elapsed_ms: u64) { + let (mode, data) = match &run.result { + WhoResult::Expert(r) => ("expert", expert_to_json(r)), + WhoResult::Workload(r) => ("workload", workload_to_json(r)), + WhoResult::Reviews(r) => ("reviews", reviews_to_json(r)), + WhoResult::Active(r) => ("active", active_to_json(r)), + WhoResult::Overlap(r) => ("overlap", overlap_to_json(r)), + }; + + // Raw CLI args — what the user typed + let input = serde_json::json!({ + "target": args.target, + "path": args.path, + "project": args.project, + "since": args.since, + "limit": args.limit, + }); + + // Resolved/computed values — what actually ran + let resolved_input = serde_json::json!({ + "mode": run.resolved_input.mode, + "project_id": run.resolved_input.project_id, + "project_path": run.resolved_input.project_path, + "since_ms": run.resolved_input.since_ms, + "since_iso": run.resolved_input.since_iso, + "since_mode": run.resolved_input.since_mode, + "limit": run.resolved_input.limit, + }); + + let output = WhoJsonEnvelope { + ok: true, + data: WhoJsonData { + mode: mode.to_string(), + input, + resolved_input, + result: data, + }, + meta: RobotMeta { elapsed_ms }, + }; + + println!("{}", serde_json::to_string(&output).unwrap()); +} + +#[derive(Serialize)] +struct WhoJsonEnvelope { + ok: bool, + data: WhoJsonData, + meta: RobotMeta, +} + +#[derive(Serialize)] +struct WhoJsonData { + mode: String, + input: serde_json::Value, + resolved_input: serde_json::Value, + #[serde(flatten)] + result: serde_json::Value, +} + +fn expert_to_json(r: &ExpertResult) -> serde_json::Value { + serde_json::json!({ + "path_query": r.path_query, + "path_match": r.path_match, + "truncated": r.truncated, + "experts": r.experts.iter().map(|e| serde_json::json!({ + "username": e.username, + "score": e.score, + "review_mr_count": e.review_mr_count, + "review_note_count": e.review_note_count, + "author_mr_count": e.author_mr_count, + "last_seen_at": ms_to_iso(e.last_seen_ms), + })).collect::>(), + }) +} + +fn workload_to_json(r: &WorkloadResult) -> serde_json::Value { + serde_json::json!({ + "username": r.username, + "assigned_issues": r.assigned_issues.iter().map(|i| serde_json::json!({ + "iid": i.iid, + "ref": i.ref_, + "title": i.title, + "project_path": i.project_path, + "updated_at": ms_to_iso(i.updated_at), + })).collect::>(), + "authored_mrs": r.authored_mrs.iter().map(|m| serde_json::json!({ + "iid": m.iid, + "ref": m.ref_, + "title": m.title, + "draft": m.draft, + "project_path": m.project_path, + "updated_at": ms_to_iso(m.updated_at), + })).collect::>(), + "reviewing_mrs": r.reviewing_mrs.iter().map(|m| serde_json::json!({ + "iid": m.iid, + "ref": m.ref_, + "title": m.title, + "draft": m.draft, + "project_path": m.project_path, + "author_username": m.author_username, + "updated_at": ms_to_iso(m.updated_at), + })).collect::>(), + "unresolved_discussions": r.unresolved_discussions.iter().map(|d| serde_json::json!({ + "entity_type": d.entity_type, + "entity_iid": d.entity_iid, + "ref": d.ref_, + "entity_title": d.entity_title, + "project_path": d.project_path, + "last_note_at": ms_to_iso(d.last_note_at), + })).collect::>(), + "summary": { + "assigned_issue_count": r.assigned_issues.len(), + "authored_mr_count": r.authored_mrs.len(), + "reviewing_mr_count": r.reviewing_mrs.len(), + "unresolved_discussion_count": r.unresolved_discussions.len(), + }, + "truncation": { + "assigned_issues_truncated": r.assigned_issues_truncated, + "authored_mrs_truncated": r.authored_mrs_truncated, + "reviewing_mrs_truncated": r.reviewing_mrs_truncated, + "unresolved_discussions_truncated": r.unresolved_discussions_truncated, + } + }) +} + +fn reviews_to_json(r: &ReviewsResult) -> serde_json::Value { + serde_json::json!({ + "username": r.username, + "total_diffnotes": r.total_diffnotes, + "categorized_count": r.categorized_count, + "mrs_reviewed": r.mrs_reviewed, + "categories": r.categories.iter().map(|c| serde_json::json!({ + "name": c.name, + "count": c.count, + "percentage": (c.percentage * 10.0).round() / 10.0, + })).collect::>(), + }) +} + +fn active_to_json(r: &ActiveResult) -> serde_json::Value { + serde_json::json!({ + "total_unresolved_in_window": r.total_unresolved_in_window, + "truncated": r.truncated, + "discussions": r.discussions.iter().map(|d| serde_json::json!({ + "discussion_id": d.discussion_id, + "entity_type": d.entity_type, + "entity_iid": d.entity_iid, + "entity_title": d.entity_title, + "project_path": d.project_path, + "last_note_at": ms_to_iso(d.last_note_at), + "note_count": d.note_count, + "participants": d.participants, + "participants_total": d.participants_total, + "participants_truncated": d.participants_truncated, + })).collect::>(), + }) +} + +fn overlap_to_json(r: &OverlapResult) -> serde_json::Value { + serde_json::json!({ + "path_query": r.path_query, + "path_match": r.path_match, + "truncated": r.truncated, + "users": r.users.iter().map(|u| serde_json::json!({ + "username": u.username, + "role": format_overlap_role(u), + "author_touch_count": u.author_touch_count, + "review_touch_count": u.review_touch_count, + "touch_count": u.touch_count, + "last_seen_at": ms_to_iso(u.last_seen_at), + "mr_refs": u.mr_refs, + "mr_refs_total": u.mr_refs_total, + "mr_refs_truncated": u.mr_refs_truncated, + })).collect::>(), + }) +} +``` + +**Robot JSON dual-envelope design:** The JSON output now includes BOTH `input` (raw CLI args) AND `resolved_input` (computed values). This serves two distinct purposes: +- `input` lets agents see what was typed (for error reporting, UI display) +- `resolved_input` lets agents reproduce the exact query (fuzzy project resolution -> concrete `project_id` + `project_path`, default since -> concrete `since_ms` + `since_iso`, `since_was_default` flag) + +This is a genuine improvement over the original plan which only echoed raw args. An agent comparing runs across time needs the resolved values — fuzzy project matching can resolve differently as projects are added/renamed. + +**`since_mode`:** Agents replaying intent need to know whether `--since 6m` was explicit (`"explicit"`), defaulted (`"default"`), or absent (`"none"` — Workload). The original boolean `since_was_default` was misleading for Workload mode, which has no default window: `true` incorrectly implied a default was applied. The tri-state eliminates this reproducibility gap. + +**`discussion_id` in active output:** Agents need stable entity IDs to follow up on specific discussions. Without this, the only way to identify a discussion is `entity_type + entity_iid + project_path`, which is fragile and requires multi-field matching. + +**`mr_refs` instead of `mr_iids` in overlap output:** Project-qualified references like `group/project!123` are globally unique and directly actionable, unlike bare iids which are ambiguous in a multi-project database. + +**Intentionally excluded from `resolved_input`: `db_path`.** ChatGPT proposed including the database file path. This leaks internal filesystem structure without adding value — agents already know their config and `db_path` is invariant within a session. + +### Helper Functions (duplicated from list.rs) + +```rust +fn format_relative_time(ms_epoch: i64) -> String { + let now = now_ms(); + let diff = now - ms_epoch; + + if diff < 0 { + return "in the future".to_string(); + } + + match diff { + d if d < 60_000 => "just now".to_string(), + d if d < 3_600_000 => format!("{} min ago", d / 60_000), + d if d < 86_400_000 => { + let n = d / 3_600_000; + format!("{n} {} ago", if n == 1 { "hour" } else { "hours" }) + } + d if d < 604_800_000 => { + let n = d / 86_400_000; + format!("{n} {} ago", if n == 1 { "day" } else { "days" }) + } + d if d < 2_592_000_000 => { + let n = d / 604_800_000; + format!("{n} {} ago", if n == 1 { "week" } else { "weeks" }) + } + _ => { + let n = diff / 2_592_000_000; + format!("{n} {} ago", if n == 1 { "month" } else { "months" }) + } + } +} + +fn truncate_str(s: &str, max: usize) -> String { + if s.chars().count() <= max { + s.to_owned() + } else { + let truncated: String = s.chars().take(max.saturating_sub(3)).collect(); + format!("{truncated}...") + } +} +``` + +### Tests + +```rust +#[cfg(test)] +mod tests { + use super::*; + use crate::core::db::{create_connection, run_migrations}; + use std::path::Path; + + fn setup_test_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn + } + + fn insert_project(conn: &Connection, id: i64, path: &str) { + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) + VALUES (?1, ?2, ?3, ?4)", + rusqlite::params![id, id * 100, path, format!("https://git.example.com/{}", path)], + ) + .unwrap(); + } + + fn insert_mr(conn: &Connection, id: i64, project_id: i64, iid: i64, author: &str, state: &str) { + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, author_username, state, last_seen_at, updated_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + rusqlite::params![ + id, id * 10, project_id, iid, + format!("MR {iid}"), author, state, + now_ms(), now_ms() + ], + ).unwrap(); + } + + fn insert_issue(conn: &Connection, id: i64, project_id: i64, iid: i64, author: &str) { + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) + VALUES (?1, ?2, ?3, ?4, ?5, 'opened', ?6, ?7, ?8, ?9)", + rusqlite::params![ + id, id * 10, project_id, iid, + format!("Issue {iid}"), author, + now_ms(), now_ms(), now_ms() + ], + ).unwrap(); + } + + fn insert_discussion(conn: &Connection, id: i64, project_id: i64, mr_id: Option, issue_id: Option, resolvable: bool, resolved: bool) { + let noteable_type = if mr_id.is_some() { "MergeRequest" } else { "Issue" }; + conn.execute( + "INSERT INTO discussions (id, gitlab_discussion_id, project_id, merge_request_id, issue_id, noteable_type, resolvable, resolved, last_seen_at, last_note_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)", + rusqlite::params![ + id, format!("disc-{id}"), project_id, mr_id, issue_id, + noteable_type, + i32::from(resolvable), i32::from(resolved), + now_ms(), now_ms() + ], + ).unwrap(); + } + + #[allow(clippy::too_many_arguments)] + fn insert_diffnote(conn: &Connection, id: i64, discussion_id: i64, project_id: i64, author: &str, file_path: &str, body: &str) { + conn.execute( + "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, note_type, is_system, author_username, body, created_at, updated_at, last_seen_at, position_new_path) + VALUES (?1, ?2, ?3, ?4, 'DiffNote', 0, ?5, ?6, ?7, ?8, ?9, ?10)", + rusqlite::params![ + id, id * 10, discussion_id, project_id, + author, body, + now_ms(), now_ms(), now_ms(), file_path + ], + ).unwrap(); + } + + fn insert_assignee(conn: &Connection, issue_id: i64, username: &str) { + conn.execute( + "INSERT INTO issue_assignees (issue_id, username) VALUES (?1, ?2)", + rusqlite::params![issue_id, username], + ) + .unwrap(); + } + + fn insert_reviewer(conn: &Connection, mr_id: i64, username: &str) { + conn.execute( + "INSERT INTO mr_reviewers (merge_request_id, username) VALUES (?1, ?2)", + rusqlite::params![mr_id, username], + ) + .unwrap(); + } + + #[test] + fn test_is_file_path_discrimination() { + // Contains '/' -> file path + assert!(matches!( + resolve_mode(&WhoArgs { + target: Some("src/auth/".to_string()), + path: None, active: false, overlap: None, reviews: false, + since: None, project: None, limit: 20, + }).unwrap(), + WhoMode::Expert { .. } + )); + + // No '/' -> username + assert!(matches!( + resolve_mode(&WhoArgs { + target: Some("asmith".to_string()), + path: None, active: false, overlap: None, reviews: false, + since: None, project: None, limit: 20, + }).unwrap(), + WhoMode::Workload { .. } + )); + + // With @ prefix -> username (stripped) + assert!(matches!( + resolve_mode(&WhoArgs { + target: Some("@asmith".to_string()), + path: None, active: false, overlap: None, reviews: false, + since: None, project: None, limit: 20, + }).unwrap(), + WhoMode::Workload { .. } + )); + + // --reviews flag -> reviews mode + assert!(matches!( + resolve_mode(&WhoArgs { + target: Some("asmith".to_string()), + path: None, active: false, overlap: None, reviews: true, + since: None, project: None, limit: 20, + }).unwrap(), + WhoMode::Reviews { .. } + )); + + // --path flag -> expert mode (handles root files) + assert!(matches!( + resolve_mode(&WhoArgs { + target: None, + path: Some("README.md".to_string()), + active: false, overlap: None, reviews: false, + since: None, project: None, limit: 20, + }).unwrap(), + WhoMode::Expert { .. } + )); + + // --path flag with dotless file -> expert mode + assert!(matches!( + resolve_mode(&WhoArgs { + target: None, + path: Some("Makefile".to_string()), + active: false, overlap: None, reviews: false, + since: None, project: None, limit: 20, + }).unwrap(), + WhoMode::Expert { .. } + )); + } + + #[test] + fn test_build_path_query() { + let conn = setup_test_db(); + + // Directory with trailing slash -> prefix + let pq = build_path_query(&conn, "src/auth/", None).unwrap(); + assert_eq!(pq.value, "src/auth/%"); + assert!(pq.is_prefix); + + // Directory without trailing slash (no dot in last segment) -> prefix + let pq = build_path_query(&conn, "src/auth", None).unwrap(); + assert_eq!(pq.value, "src/auth/%"); + assert!(pq.is_prefix); + + // File with extension -> exact + let pq = build_path_query(&conn, "src/auth/login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); + + // Root file -> exact + let pq = build_path_query(&conn, "README.md", None).unwrap(); + assert_eq!(pq.value, "README.md"); + assert!(!pq.is_prefix); + + // Directory with dots in non-leaf segment -> prefix + let pq = build_path_query(&conn, ".github/workflows/", None).unwrap(); + assert_eq!(pq.value, ".github/workflows/%"); + assert!(pq.is_prefix); + + // Versioned directory path -> prefix + let pq = build_path_query(&conn, "src/v1.2/auth/", None).unwrap(); + assert_eq!(pq.value, "src/v1.2/auth/%"); + assert!(pq.is_prefix); + + // Path with LIKE metacharacters -> prefix, escaped + let pq = build_path_query(&conn, "src/test_files/", None).unwrap(); + assert_eq!(pq.value, "src/test\\_files/%"); + assert!(pq.is_prefix); + + // Dotless root file -> exact match (root path without '/') + let pq = build_path_query(&conn, "Makefile", None).unwrap(); + assert_eq!(pq.value, "Makefile"); + assert!(!pq.is_prefix); + + let pq = build_path_query(&conn, "LICENSE", None).unwrap(); + assert_eq!(pq.value, "LICENSE"); + assert!(!pq.is_prefix); + + // Dotless root path with trailing '/' -> directory prefix (explicit override) + let pq = build_path_query(&conn, "Makefile/", None).unwrap(); + assert_eq!(pq.value, "Makefile/%"); + assert!(pq.is_prefix); + } + + #[test] + fn test_escape_like() { + assert_eq!(escape_like("normal/path"), "normal/path"); + assert_eq!(escape_like("has_underscore"), "has\\_underscore"); + assert_eq!(escape_like("has%percent"), "has\\%percent"); + assert_eq!(escape_like("has\\backslash"), "has\\\\backslash"); + } + + #[test] + fn test_build_path_query_exact_does_not_escape() { + let conn = setup_test_db(); + // '_' must NOT be escaped for exact match (=). + // escape_like would produce "README\_with\_underscore.md" which + // wouldn't match stored "README_with_underscore.md" via =. + let pq = build_path_query(&conn, "README_with_underscore.md", None).unwrap(); + assert_eq!(pq.value, "README_with_underscore.md"); + assert!(!pq.is_prefix); + } + + #[test] + fn test_path_flag_dotless_root_file_is_exact() { + let conn = setup_test_db(); + // --path Makefile must produce an exact match, not Makefile/% + let pq = build_path_query(&conn, "Makefile", None).unwrap(); + assert_eq!(pq.value, "Makefile"); + assert!(!pq.is_prefix); + + let pq = build_path_query(&conn, "Dockerfile", None).unwrap(); + assert_eq!(pq.value, "Dockerfile"); + assert!(!pq.is_prefix); + } + + #[test] + fn test_expert_query() { + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "merged"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/auth/login.rs", "**suggestion**: use const"); + insert_diffnote(&conn, 2, 1, 1, "reviewer_b", "src/auth/login.rs", "**question**: why?"); + insert_diffnote(&conn, 3, 1, 1, "reviewer_c", "src/auth/session.rs", "looks good"); + + let result = query_expert(&conn, "src/auth/", None, 0, 20).unwrap(); + assert_eq!(result.experts.len(), 3); // reviewer_b, reviewer_c, author_a + assert_eq!(result.experts[0].username, "reviewer_b"); // highest score + } + + #[test] + fn test_workload_query() { + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_issue(&conn, 1, 1, 42, "someone_else"); + insert_assignee(&conn, 1, "dev_a"); + insert_mr(&conn, 1, 1, 100, "dev_a", "opened"); + + let result = query_workload(&conn, "dev_a", None, None, 20).unwrap(); + assert_eq!(result.assigned_issues.len(), 1); + assert_eq!(result.authored_mrs.len(), 1); + } + + #[test] + fn test_reviews_query() { + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "merged"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/foo.rs", "**suggestion**: refactor"); + insert_diffnote(&conn, 2, 1, 1, "reviewer_b", "src/bar.rs", "**question**: why?"); + insert_diffnote(&conn, 3, 1, 1, "reviewer_b", "src/baz.rs", "looks good"); + + let result = query_reviews(&conn, "reviewer_b", None, 0).unwrap(); + assert_eq!(result.total_diffnotes, 3); + assert_eq!(result.categorized_count, 2); + assert_eq!(result.categories.len(), 2); + } + + #[test] + fn test_active_query() { + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "opened"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/foo.rs", "needs work"); + // Second note by same participant — note_count should be 2, participants still ["reviewer_b"] + insert_diffnote(&conn, 2, 1, 1, "reviewer_b", "src/foo.rs", "follow-up"); + + let result = query_active(&conn, None, 0, 20).unwrap(); + assert_eq!(result.total_unresolved_in_window, 1); + assert_eq!(result.discussions.len(), 1); + assert_eq!(result.discussions[0].participants, vec!["reviewer_b"]); + // This was a regression in iteration 4: note_count was counting participants, not notes + assert_eq!(result.discussions[0].note_count, 2); + assert!(result.discussions[0].discussion_id > 0); + } + + #[test] + fn test_overlap_dual_roles() { + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + // User is both author of one MR and reviewer of another at same path + insert_mr(&conn, 1, 1, 100, "dual_user", "opened"); + insert_mr(&conn, 2, 1, 200, "other_author", "opened"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_discussion(&conn, 2, 1, Some(2), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "someone", "src/auth/login.rs", "review of dual_user's MR"); + insert_diffnote(&conn, 2, 2, 1, "dual_user", "src/auth/login.rs", "dual_user reviewing other MR"); + + let result = query_overlap(&conn, "src/auth/", None, 0, 20).unwrap(); + let dual = result.users.iter().find(|u| u.username == "dual_user").unwrap(); + assert!(dual.author_touch_count > 0); + assert!(dual.review_touch_count > 0); + assert_eq!(format_overlap_role(dual), "A+R"); + // MR refs should be project-qualified + assert!(dual.mr_refs.iter().any(|r| r.contains("team/backend!"))); + } + + #[test] + fn test_overlap_multi_project_mr_refs() { + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_project(&conn, 2, "team/frontend"); + insert_mr(&conn, 1, 1, 100, "author_a", "opened"); + insert_mr(&conn, 2, 2, 100, "author_a", "opened"); // Same iid, different project + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_discussion(&conn, 2, 2, Some(2), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "reviewer_x", "src/auth/login.rs", "review"); + insert_diffnote(&conn, 2, 2, 2, "reviewer_x", "src/auth/login.rs", "review"); + + let result = query_overlap(&conn, "src/auth/", None, 0, 20).unwrap(); + let reviewer = result.users.iter().find(|u| u.username == "reviewer_x").unwrap(); + // Should have two distinct refs despite same iid + assert!(reviewer.mr_refs.contains(&"team/backend!100".to_string())); + assert!(reviewer.mr_refs.contains(&"team/frontend!100".to_string())); + } + + #[test] + fn test_normalize_review_prefix() { + assert_eq!(normalize_review_prefix("suggestion"), "suggestion"); + assert_eq!(normalize_review_prefix("Suggestion:"), "suggestion"); + assert_eq!(normalize_review_prefix("suggestion (non-blocking):"), "suggestion"); + assert_eq!(normalize_review_prefix("Nitpick:"), "nit"); + assert_eq!(normalize_review_prefix("nit (non-blocking):"), "nit"); + assert_eq!(normalize_review_prefix("question"), "question"); + assert_eq!(normalize_review_prefix("TODO:"), "todo"); + } + + #[test] + fn test_normalize_repo_path() { + // Strips leading ./ + assert_eq!(normalize_repo_path("./src/foo/"), "src/foo/"); + // Strips leading / + assert_eq!(normalize_repo_path("/src/foo/"), "src/foo/"); + // Strips leading ./ recursively + assert_eq!(normalize_repo_path("././src/foo"), "src/foo"); + // Converts Windows backslashes when no forward slashes + assert_eq!(normalize_repo_path("src\\foo\\bar.rs"), "src/foo/bar.rs"); + // Does NOT convert backslashes when forward slashes present (escaped regex etc.) + assert_eq!(normalize_repo_path("src/foo\\bar"), "src/foo\\bar"); + // Collapses repeated // + assert_eq!(normalize_repo_path("src//foo//bar/"), "src/foo/bar/"); + // Trims whitespace + assert_eq!(normalize_repo_path(" src/foo/ "), "src/foo/"); + // Identity for clean paths + assert_eq!(normalize_repo_path("src/foo/bar.rs"), "src/foo/bar.rs"); + } + + #[test] + fn test_lookup_project_path() { + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + assert_eq!(lookup_project_path(&conn, 1).unwrap(), "team/backend"); + } + + #[test] + fn test_build_path_query_dotless_subdir_file_uses_db_probe() { + // Dotless file in subdirectory (src/Dockerfile) would normally be + // treated as a directory. The DB probe detects it's actually a file. + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "opened"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "reviewer_b", "src/Dockerfile", "note"); + + let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap(); + assert_eq!(pq.value, "src/Dockerfile"); + assert!(!pq.is_prefix); + + // Same path without DB data -> falls through to prefix + let conn2 = setup_test_db(); + let pq2 = build_path_query(&conn2, "src/Dockerfile", None).unwrap(); + assert_eq!(pq2.value, "src/Dockerfile/%"); + assert!(pq2.is_prefix); + } + + #[test] + fn test_build_path_query_probe_is_project_scoped() { + // Path exists as a dotless file in project 1; project 2 should not + // treat it as an exact file unless it exists there too. + let conn = setup_test_db(); + insert_project(&conn, 1, "team/a"); + insert_project(&conn, 2, "team/b"); + insert_mr(&conn, 1, 1, 10, "author_a", "opened"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "rev", "infra/Makefile", "note"); + + // Unscoped: finds exact match in project 1 -> exact + let pq_unscoped = build_path_query(&conn, "infra/Makefile", None).unwrap(); + assert!(!pq_unscoped.is_prefix); + + // Scoped to project 2: no data -> falls back to prefix + let pq_scoped = build_path_query(&conn, "infra/Makefile", Some(2)).unwrap(); + assert!(pq_scoped.is_prefix); + + // Scoped to project 1: finds data -> exact + let pq_scoped1 = build_path_query(&conn, "infra/Makefile", Some(1)).unwrap(); + assert!(!pq_scoped1.is_prefix); + } + + #[test] + fn test_expert_excludes_self_review_notes() { + // MR author commenting on their own diff should not be counted as reviewer + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "opened"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + // author_a comments on their own MR diff (clarification) + insert_diffnote(&conn, 1, 1, 1, "author_a", "src/auth/login.rs", "clarification"); + // reviewer_b also reviews + insert_diffnote(&conn, 2, 1, 1, "reviewer_b", "src/auth/login.rs", "looks good"); + + let result = query_expert(&conn, "src/auth/", None, 0, 20).unwrap(); + // author_a should appear as author only, not as reviewer + let author = result.experts.iter().find(|e| e.username == "author_a").unwrap(); + assert_eq!(author.review_mr_count, 0); + assert!(author.author_mr_count > 0); + + // reviewer_b should be a reviewer + let reviewer = result.experts.iter().find(|e| e.username == "reviewer_b").unwrap(); + assert!(reviewer.review_mr_count > 0); + } + + #[test] + fn test_overlap_excludes_self_review_notes() { + // MR author commenting on their own diff should not inflate reviewer counts + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "opened"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + // author_a comments on their own MR diff (clarification) + insert_diffnote(&conn, 1, 1, 1, "author_a", "src/auth/login.rs", "clarification"); + + let result = query_overlap(&conn, "src/auth/", None, 0, 20).unwrap(); + let u = result.users.iter().find(|u| u.username == "author_a"); + // Should NOT be credited as reviewer touch + assert!(u.map(|x| x.review_touch_count).unwrap_or(0) == 0); + } + + #[test] + fn test_active_participants_sorted() { + // Participants should be sorted alphabetically for deterministic output + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + insert_mr(&conn, 1, 1, 100, "author_a", "opened"); + insert_discussion(&conn, 1, 1, Some(1), None, true, false); + insert_diffnote(&conn, 1, 1, 1, "zebra_user", "src/foo.rs", "note 1"); + insert_diffnote(&conn, 2, 1, 1, "alpha_user", "src/foo.rs", "note 2"); + + let result = query_active(&conn, None, 0, 20).unwrap(); + assert_eq!(result.discussions[0].participants, vec!["alpha_user", "zebra_user"]); + } + + #[test] + fn test_expert_truncation() { + let conn = setup_test_db(); + insert_project(&conn, 1, "team/backend"); + // Create 3 experts + for i in 1..=3 { + insert_mr(&conn, i, 1, 100 + i, &format!("author_{i}"), "opened"); + insert_discussion(&conn, i, 1, Some(i), None, true, false); + insert_diffnote(&conn, i, i, 1, &format!("reviewer_{i}"), "src/auth/login.rs", "note"); + } + + // limit = 2, should return truncated = true + let result = query_expert(&conn, "src/auth/", None, 0, 2).unwrap(); + assert!(result.truncated); + assert_eq!(result.experts.len(), 2); + + // limit = 10, should return truncated = false + let result = query_expert(&conn, "src/auth/", None, 0, 10).unwrap(); + assert!(!result.truncated); + } +} +``` + +--- + +## Implementation Order + +| Step | What | Files | +|---|---|---| +| 1 | Migration 017: composite indexes for who query paths (5 indexes including global active) | `core/db.rs` | +| 2 | CLI skeleton: `WhoArgs` + `Commands::Who` + dispatch + stub | `cli/mod.rs`, `commands/mod.rs`, `main.rs` | +| 3 | Mode resolution + `normalize_repo_path()` + path query helpers (project-scoped two-way DB probe, exact-match escaping fix, `path_match` output) + entry point (`run_who` with `since_mode` tri-state) | `commands/who.rs` | +| 4 | Workload mode (4 SELECT queries, uniform params, canonical `ref_` fields, LIMIT+1 truncation on all 4 sections) | `commands/who.rs` | +| 5 | Active discussions mode (CTE-based: picked + note_counts + participants, sorted participants, truncation, bounded participants, **two SQL variants** global/scoped for index utilization) | `commands/who.rs` | +| 6 | Expert mode (CTE + MR-breadth scoring, self-review exclusion, MR state filter, two SQL variants for prefix/exact, truncation) | `commands/who.rs` | +| 7 | Overlap mode (dual role tracking, self-review exclusion, MR state filter, accumulator pattern, deterministic sort with tie-breakers, project-qualified mr_refs, bounded mr_refs, truncation) | `commands/who.rs` | +| 8 | Reviews mode (prefix extraction with `ltrim()` + normalization) | `commands/who.rs` | +| 9 | Human output for all 5 modes (canonical refs, scope warning, project context, truncation hints) | `commands/who.rs` | +| 10 | Robot JSON output for all 5 modes (input + resolved_input + `ref` + `since_mode` + `path_match` + discussion_id + mr_refs + bounded metadata + workload truncation) | `commands/who.rs` | +| 11 | Tests (mode discrimination, path queries incl. project-scoped probe + exact-no-escape + root-exact, LIKE escaping, path normalization, queries, self-review exclusion, note_count correctness, sorted participants, overlap dual roles, multi-project mr_refs, truncation, project path lookup) | `commands/who.rs` | +| 12 | VALID_COMMANDS + robot-docs manifest | `main.rs` | +| 13 | cargo check + clippy + fmt | (verification) | +| 14 | EXPLAIN QUERY PLAN verification for each mode (including global AND scoped active index variants) | (verification) | + +## Verification + +```bash +# Compile + lint +cargo check --all-targets +cargo clippy --all-targets -- -D warnings +cargo fmt --check + +# Run tests +cargo test + +# Manual verification against real data +cargo run --release -- who src/features/global-search/ +cargo run --release -- who @asmith +cargo run --release -- who @asmith --reviews +cargo run --release -- who --active +cargo run --release -- who --active --since 30d +cargo run --release -- who --overlap libs/shared-frontend/src/features/global-search/ +cargo run --release -- who --path README.md +cargo run --release -- who --path Makefile +cargo run --release -- -J who src/features/global-search/ # robot mode +cargo run --release -- -J who @asmith # robot mode +cargo run --release -- -J who @asmith --reviews # robot mode +cargo run --release -- who src/features/global-search/ -p typescript # project scoped + +# Performance verification (required before merge): +# Confirm indexes are used as intended. One representative query per mode. +sqlite3 path/to/db.sqlite " + EXPLAIN QUERY PLAN + SELECT n.author_username, COUNT(*), MAX(n.created_at) + FROM notes n + WHERE n.note_type = 'DiffNote' + AND n.is_system = 0 + AND n.position_new_path LIKE 'src/features/global-search/%' ESCAPE '\' + AND n.created_at >= 0 + GROUP BY n.author_username; +" +# Expected: SEARCH notes USING INDEX idx_notes_diffnote_path_created + +sqlite3 path/to/db.sqlite " + EXPLAIN QUERY PLAN + SELECT d.id, d.last_note_at + FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.last_note_at >= 0 + ORDER BY d.last_note_at DESC + LIMIT 20; +" +# Expected: SEARCH discussions USING INDEX idx_discussions_unresolved_recent_global +# (uses the global index because project_id is unconstrained) + +sqlite3 path/to/db.sqlite " + EXPLAIN QUERY PLAN + SELECT d.id, d.last_note_at + FROM discussions d + WHERE d.resolvable = 1 AND d.resolved = 0 + AND d.project_id = 1 + AND d.last_note_at >= 0 + ORDER BY d.last_note_at DESC + LIMIT 20; +" +# Expected: SEARCH discussions USING INDEX idx_discussions_unresolved_recent +# (uses the project-scoped index because project_id is constrained) +# +# NOTE: The Active mode query now uses two static SQL variants (sql_global +# and sql_scoped) instead of the nullable-OR pattern. Both EXPLAIN QUERY PLAN +# checks above should confirm that each variant uses the intended index. +# If nullable-OR were used instead, the planner might choose a suboptimal +# "good enough for both" plan. +``` + +--- + +## Revision Changelog + +### Iteration 1 (2026-02-07): Initial plan incorporating ChatGPT feedback-1 + +Changes integrated from first external review: + +| # | Change | Origin | Impact | +|---|--------|--------|--------| +| 1 | Fix `GROUP_CONCAT(DISTINCT x, sep)` -> subquery pattern | ChatGPT feedback-1 (Change 1) | **Bug fix**: prevents SQLite runtime error | +| 2 | Segment-aware path classification + `--path` flag | ChatGPT feedback-1 (Change 2) | **Bug fix**: `.github/`, `v1.2/`, root files now work | +| 3 | `(?N IS NULL OR ...)` nullable binding pattern | ChatGPT feedback-1 (Change 3) | **Simplification**: matches existing `timeline_seed.rs` pattern, eliminates dynamic param vectors | +| 4 | Filter path queries to `DiffNote` + LIKE escaping | ChatGPT feedback-1 (Change 4) | **Correctness + perf**: fewer rows scanned, no metachar edge cases | +| 5 | Use `n.created_at` for author branch timestamps | ChatGPT feedback-1 (Change 5) | **Accuracy**: anchors "last touch" to review activity, not MR metadata changes | +| 6 | Consistent `--since` via nullable binding on all workload queries | ChatGPT feedback-1 (Change 6), modified | **Consistency**: `--since` applies uniformly when provided, no-op when omitted | +| 7 | Dual role tracking in Overlap (`A`, `R`, `A+R`) | ChatGPT feedback-1 (Change 7) | **Information**: preserves reviewer+author signal instead of collapsing | +| 8 | Migration 017: composite indexes for hot paths | ChatGPT feedback-1 (Change 8) | **Performance**: makes queries usable on 280K notes | +| 9 | `input` object in robot JSON envelope | ChatGPT feedback-1 (Change 9) | **Reproducibility**: agents can trace exactly what query ran | +| 10 | Clap `conflicts_with` / `requires` constraints | ChatGPT feedback-1 (Change 10) | **UX**: invalid combos caught at parse time with good error messages | +| 11 | `escape_like()` + `build_path_pattern()` helpers | Original + ChatGPT synthesis | **Correctness**: centralized, tested path handling | +| 12 | Additional tests: `build_path_pattern`, `escape_like`, overlap dual roles | New | **Coverage**: validates new path logic and role tracking | + +### Iteration 1.1 (2026-02-07): Incorporating ChatGPT feedback-2 + +Changes integrated from second external review: + +| # | Change | Origin | Impact | +|---|--------|--------|--------| +| 13 | `resolved_input` in robot JSON (mode, project_id, project_path, since_ms, since_iso) | ChatGPT feedback-2 (Change 1) | **Reproducibility**: agents get computed values, not just raw args; fuzzy project resolution becomes stable | +| 14 | `WhoRun` wrapper struct carrying resolved inputs + result | ChatGPT feedback-2 (Change 1) | **Architecture**: resolved values computed once in `run_who`, propagated cleanly to JSON output | +| 15 | `lookup_project_path()` helper for resolved project display | ChatGPT feedback-2 (Change 1) | **Completeness**: project_path in resolved_input needs a DB lookup | +| 16 | Parameterize LIMIT as `?N` — eliminate ALL `format!()` in SQL | ChatGPT feedback-2 (Change 2) | **Consistency**: aligns with "fully static SQL" principle; enables prepared statement caching | +| 17 | `is_system = 0` on Expert/Overlap author branches | ChatGPT feedback-2 (Change 4) | **Consistency**: system note exclusion now uniform across all DiffNote queries | +| 18 | Migration 017 index reordering: `position_new_path` leads (not `note_type`) | ChatGPT feedback-2 (Change 5a) | **Performance**: partial index predicate already handles `note_type`; leading column should be most selective variable predicate | +| 19 | New `idx_notes_discussion_author` composite index | ChatGPT feedback-2 (Change 5b) | **Performance**: covers EXISTS subquery in Workload and participants subquery in Active | +| 20 | Partial index on DiffNote index includes `is_system = 0` | ChatGPT feedback-2 (Change 5a), synthesized | **Performance**: index covers the full predicate, not just note_type | +| 21 | `--path` help text updated to mention dotless files (Makefile, LICENSE) | ChatGPT feedback-2 (Change 3), adapted | **UX**: clear documentation of the limitation and escape hatch | +| 22 | Test for `lookup_project_path` + dotless file `build_path_pattern` | New | **Coverage**: validates new helper and documents known behavior | + +### Iteration 2 (2026-02-07): Incorporating ChatGPT feedback-3 + +This iteration integrates the strongest improvements from the third external review, focused on query plan quality, multi-project correctness, reproducibility, and performance. + +**Honest assessment of what feedback-3 got right:** + +The third review identified several real deficiencies in the plan. The project scoping bug (`m.project_id` vs `n.project_id`) was a genuine correctness issue that would have degraded index usage. The stable MR refs proposal addressed a real ambiguity in multi-project databases. The CTE-based Active query was structurally cleaner. And `prepare_cached()` was a free win we should have had from the start. + +| # | Change | Origin | Impact | +|---|--------|--------|--------| +| 23 | Fix project scoping to `n.project_id` in Expert/Overlap author branches | ChatGPT feedback-3 (Change 1) | **Correctness + perf**: scoping on `n.project_id` lets SQLite use `idx_notes_diffnote_path_created` (which has `project_id` as a trailing column). `m.project_id` forces a broader join before filtering. | +| 24 | `PathQuery` struct with `is_prefix` flag for exact vs prefix matching | ChatGPT feedback-3 (Change 2), adapted | **Performance**: exact file paths use `=` (stronger planner signal) instead of `LIKE` without wildcards. Two static SQL strings selected at prepare time — no dynamic SQL. | +| 25 | SQL-level Expert aggregation via CTE | ChatGPT feedback-3 (Change 3), adapted | **Performance + simplicity**: scoring, sorting, and LIMIT all happen in SQL. Eliminates Rust-side HashMap merge/sort/truncate. Makes SQL LIMIT effective (previously meaningless due to post-query aggregation). Deterministic tiebreaker (`last_seen_at DESC, username ASC`). | +| 26 | Project-qualified MR refs (`group/project!iid`) in Overlap | ChatGPT feedback-3 (Change 4) | **Multi-project correctness**: bare `!123` is ambiguous. JOIN to `projects` table produces globally unique, actionable references. | +| 27 | HashSet dedup for Overlap MR refs | ChatGPT feedback-3 (Change 4) | **Performance**: replaces O(n^2) `Vec.contains()` with O(1) `HashSet.insert()`. | +| 28 | CTE-based Active query (eliminate correlated subqueries) | ChatGPT feedback-3 (Change 5) | **Performance**: `picked` CTE selects limited discussions first, `note_agg` CTE joins notes once. Avoids per-row correlated subqueries for note_count and participants. | +| 29 | `prepare_cached()` on all query functions | ChatGPT feedback-3 (Change 6) | **Performance**: free win since all SQL is static. Statement cache avoids re-parsing. | +| 30 | Project path shown in human output (Workload + Overlap) | ChatGPT feedback-3 (Change 7) | **Multi-project UX**: `#42` and `!100` aren't unique without project context. Human output now shows project_path for issues, MRs, and overlap. | +| 31 | `since_was_default` in resolved_input | ChatGPT feedback-3 (Change 8) | **Reproducibility**: agents can distinguish explicit `--since 6m` from defaulted. Important for intent replay. | +| 32 | `discussion_id` in ActiveDiscussion | ChatGPT feedback-3 (Change 8) | **Agent ergonomics**: stable entity ID for follow-up. Without this, discussions require fragile multi-field matching. | +| 33 | EXPLAIN QUERY PLAN verification step | ChatGPT feedback-3 (Change 9) | **Operational**: confirms indexes are actually used. One representative query per mode. | +| 34 | Multi-project overlap test (`test_overlap_multi_project_mr_refs`) | New | **Coverage**: validates project-qualified MR refs with same iid in different projects. | + +### Decisions NOT adopted from feedback-3 + +| Proposal | Why not | +|----------|---------| +| `?4 = 1 AND ... OR ?4 = 0 AND ...` conditional path matching in single SQL | Adds a runtime conditional branch inside SQL that the planner can't optimize statically. Two separate SQL strings selected at prepare time is cleaner — the planner sees `=` or `LIKE` unambiguously, and both strings are independently cacheable. | +| Fully SQL-aggregated Overlap mode | Overlap still needs Rust-side merge because the UNION ALL returns rows per-role per-user, and we need to combine author + reviewer counts + deduplicate MR refs. The CTE approach works for Expert (simple numeric aggregation), but Overlap's `GROUP_CONCAT` of refs across branches requires set operations that are awkward in pure SQL. | + +### Decisions NOT adopted from feedback-2 + +| Proposal | Why not | +|----------|---------| +| `PathMatch` struct with dual exact+prefix SQL matching | Adds branching complexity (`?4 = 1 AND ... OR ...`) to every path query for narrow edge case (dotless root files). `--path` handles this cleanly with zero query complexity. Revisit if real usage shows confusion. | +| `db_path` in `resolved_input` | Leaks internal filesystem details. Agents already know their config. No debugging value over `project_path`. | + +### Feedback-2 items truncated (file cut off at line 302) + +ChatGPT feedback-2 was truncated mid-sentence during index redesign discussion (Change 5c about discussions index column ordering). The surviving feedback was fully evaluated and integrated where appropriate. The truncated portion appeared to be discussing `(project_id, last_note_at)` ordering for the discussions index, which was already adopted in the migration redesign above. + +### Iteration 5 (2026-02-07): Incorporating ChatGPT feedback-5 + +**Honest assessment of what feedback-5 got right:** + +Feedback-5 identified two genuine **correctness bugs** that would have shipped broken behavior. First, `build_path_query` applied `escape_like()` even for exact matches — LIKE metacharacters (`_`, `%`, `\`) are not special in `=` comparisons, so the escaped value wouldn't match the stored path. Second, the Active mode `note_agg` CTE used `SELECT DISTINCT discussion_id, author_username` then `COUNT(*)`, producing a participant count instead of a note count. Both would have been caught in testing, but catching them in the plan is cheaper. + +Beyond bugs, feedback-5 correctly identified: the global Active index gap (existing `(project_id, last_note_at)` can't serve unscoped ordering), incoherent Overlap units (reviewer counting DiffNotes vs author counting MRs), and the value of canonical refs in Workload output. The `ltrim()` suggestion for Reviews was a practical robustness fix. The scope-warning suggestion was pure presentation but prevents misinterpretation in multi-project databases. + +| # | Change | Origin | Impact | +|---|--------|--------|--------| +| 35 | Fix `build_path_query`: don't `escape_like()` for exact match (`=`) | ChatGPT feedback-5 (Change 1) | **Bug fix**: LIKE metacharacters in filenames (`_`, `%`) would cause exact-match misses. Only escape for LIKE, use raw path for `=`. | +| 36 | Root paths (no `/`) via `--path` treated as exact match | ChatGPT feedback-5 (Change 1) | **Bug fix**: `--path Makefile` previously produced `Makefile/%` (prefix), returning zero results. Now produces `Makefile` (exact). | +| 37 | Active mode: split `note_agg` into `note_counts` + `participants` CTEs | ChatGPT feedback-5 (Change 2) | **Bug fix**: `note_count` was counting distinct participants, not notes. A discussion with 5 notes from 2 users showed `note_count: 2`. Now correctly shows `note_count: 5`. | +| 38 | Add `idx_discussions_unresolved_recent_global` index (single-column `last_note_at`) | ChatGPT feedback-5 (Change 3) | **Performance**: `(project_id, last_note_at)` can't satisfy `ORDER BY last_note_at DESC` when `project_id` is unconstrained. The global partial index handles the common `lore who --active` case. | +| 39 | Overlap reviewer branch: `COUNT(*)` -> `COUNT(DISTINCT m.id)` | ChatGPT feedback-5 (Change 4) | **Correctness**: reviewer counted DiffNotes, author counted MRs. Summing into `touch_count` mixed units. Both now count distinct MRs. Human output header changed from "Touches" to "MRs". | +| 40 | Canonical `ref_` field on `WorkloadIssue` and `WorkloadMr` | ChatGPT feedback-5 (Change 5) | **Actionability**: `group/project#iid` / `group/project!iid` in both human and robot output. Single copy-pasteable token. Replaces separate `iid` + `project_path` display in human output. | +| 41 | `ltrim(n.body)` in Reviews category extraction SQL | ChatGPT feedback-5 (Change 6) | **Robustness**: tolerates leading whitespace before `**prefix**` (e.g., ` **suggestion**: ...`), which is common in practice. | +| 42 | Scope warning in human output for Expert/Active/Overlap | ChatGPT feedback-5 (stretch) | **UX**: dim hint "(aggregated across all projects; use -p to scope)" when `project_id` is None. Prevents misinterpretation in multi-project databases. | +| 43 | `test_build_path_query_exact_does_not_escape` test | ChatGPT feedback-5 (Change 7) | **Coverage**: catches the exact-match escaping bug. | +| 44 | `test_path_flag_dotless_root_file_is_exact` test | ChatGPT feedback-5 (Change 7) | **Coverage**: ensures `--path Makefile` produces exact match, not prefix. | +| 45 | Active `test_active_query` extended to verify `note_count` correctness | ChatGPT feedback-5 (Change 7) | **Coverage**: second note by same participant must produce `note_count: 2`, catching the iteration-4 regression. | +| 46 | Updated `test_build_path_query` expectations for root files | Consequential | **Consistency**: Makefile/LICENSE now assert exact match, not prefix. Added trailing `/` override test. | + +### Decisions NOT adopted from feedback-5 + +| Proposal | Why not | +|----------|---------| +| (none rejected) | All feedback-5 changes were adopted. The feedback was tightly scoped, correctness-focused, and every change addressed a real deficiency. | + +### Iteration 6 (2026-02-07): Incorporating ChatGPT feedback-6 + +**Honest assessment of what feedback-6 got right:** + +Feedback-6 identified six distinct improvements, all with genuine merit. The self-review exclusion (#2) was a data quality issue that already existed in Reviews mode but was inconsistently applied to Expert and Overlap — a real "write the same fix everywhere" oversight. The MR-breadth scoring (#3) addresses a signal quality problem: raw DiffNote counts reward comment volume over breadth of engagement, meaning someone who writes 30 comments on one MR would outrank someone who reviewed 5 different MRs. The DB probe for dotless subdir files (#1) closes the last heuristic gap without adding any user-facing complexity. The deterministic ordering (#4) and truncation signaling (#5) are robot-mode correctness issues that would cause real flakes in automated pipelines. The accumulator pattern (#6) is a clean O(n) improvement over the previous drain-rebuild-per-row approach. + +The one area where feedback-6 was *mostly* right but needed adaptation was the scoring weights. The proposed `review_mr_count * 20 + author_mr_count * 12 + review_note_count * 1` is directionally correct (MR breadth dominates, note count is secondary), but the specific coefficients are inherently arbitrary. We adopted the formula as-is since it produces reasonable rankings and can be tuned later with real data. + +| # | Change | Origin | Impact | +|---|--------|--------|--------| +| 47 | DB probe for dotless subdir files (`src/Dockerfile`, `infra/Makefile`) in `build_path_query` | ChatGPT feedback-6 (Change 1) | **Correctness**: dotless files in subdirectories were misclassified as directories, producing `path/%` (zero or wrong hits). DB probe uses existing partial index. | +| 48 | `build_path_query` signature: `(path) -> PathQuery` -> `(conn, path) -> Result` | ChatGPT feedback-6 (Change 1), consequential | **Architecture**: DB probe requires connection. All callers updated. | +| 49 | Self-review exclusion in Expert reviewer branch | ChatGPT feedback-6 (Change 2) | **Data quality**: MR authors commenting on their own diffs were counted as reviewers, inflating scores. Now filtered `n.author_username != m.author_username`. | +| 50 | Self-review exclusion in Overlap reviewer branch | ChatGPT feedback-6 (Change 2) | **Data quality**: same fix for Overlap mode. Consistent with Reviews mode which already filtered `m.author_username != ?1`. | +| 51 | Expert reviewer branch: JOINs through `discussions -> merge_requests` | ChatGPT feedback-6 (Changes 2+3), consequential | **Architecture**: needed for both self-review exclusion and MR-breadth counting. Reviewer branch now matches the JOIN pattern of the author branch. | +| 52 | Expert scoring: MR-breadth primary (`COUNT(DISTINCT m.id)`) with note intensity secondary | ChatGPT feedback-6 (Change 3) | **Signal quality**: prevents "comment storm" gaming. Score formula: `review_mr * 20 + author_mr * 12 + review_notes * 1`. | +| 53 | `Expert` struct: `review_count -> review_mr_count + review_note_count`, `author_count -> author_mr_count`, `score: f64 -> i64` | ChatGPT feedback-6 (Change 3), adapted | **API clarity**: distinct field names reflect distinct metrics. Integer score avoids floating-point display issues. | +| 54 | Deterministic `participants` ordering: sort after `GROUP_CONCAT` parse | ChatGPT feedback-6 (Change 4) | **Robot reproducibility**: `GROUP_CONCAT` order is undefined in SQLite. Sorting eliminates run-to-run flakes. | +| 55 | Deterministic `mr_refs` ordering: sort `HashSet`-derived vectors | ChatGPT feedback-6 (Change 4) | **Robot reproducibility**: `HashSet` iteration order is nondeterministic. Sorting produces stable output. | +| 56 | Overlap sort: full tie-breakers (`touch_count DESC, last_seen_at DESC, username ASC`) | ChatGPT feedback-6 (Change 4) | **Robot reproducibility**: users with equal `touch_count` now have a stable order instead of arbitrary HashMap iteration order. | +| 57 | Truncation signaling: `truncated: bool` on `ExpertResult`, `ActiveResult`, `OverlapResult` | ChatGPT feedback-6 (Change 5) | **Observability**: agents and humans know when results were cut off and can retry with higher `--limit`. | +| 58 | `LIMIT + 1` query pattern for truncation detection | ChatGPT feedback-6 (Change 5) | **Implementation**: query one extra row, detect overflow, trim to requested limit. Applied to Expert, Active, Overlap. | +| 59 | Truncation hints in human output | ChatGPT feedback-6 (Change 5) | **UX**: dim hint "(showing first -n; rerun with a higher --limit)" when results are truncated. | +| 60 | `truncated` field in robot JSON for Expert, Active, Overlap | ChatGPT feedback-6 (Change 5) | **Robot ergonomics**: agents can programmatically detect truncation and adjust limit. | +| 61 | Overlap accumulator pattern: `OverlapAcc` with `HashSet` for `mr_refs` | ChatGPT feedback-6 (Change 6) | **Performance**: replaces expensive `drain -> collect -> insert -> collect` per row with `HashSet.insert()` from the start. Convert to sorted `Vec` once at end. | +| 62 | Test: `test_build_path_query_dotless_subdir_file_uses_db_probe` | ChatGPT feedback-6 (Change 7) | **Coverage**: validates DB probe detects dotless subdir files, and falls through to prefix when no DB data. | +| 63 | Test: `test_expert_excludes_self_review_notes` | ChatGPT feedback-6 (Change 7) | **Coverage**: validates MR author DiffNotes on own MR don't inflate reviewer counts. | +| 64 | Test: `test_overlap_excludes_self_review_notes` | ChatGPT feedback-6 (Change 7) | **Coverage**: validates self-review exclusion in Overlap mode. | +| 65 | Test: `test_active_participants_sorted` | ChatGPT feedback-6 (Change 7) | **Coverage**: validates participants are alphabetically sorted for deterministic output. | +| 66 | Test: `test_expert_truncation` | ChatGPT feedback-6 (Change 7) | **Coverage**: validates truncation flag is set correctly when results exceed limit. | +| 67 | Updated human output headers: `Reviews -> Reviewed(MRs)`, added `Notes` column | Consequential | **Clarity**: column headers now reflect the underlying metrics (MR count vs note count). | + +### Decisions NOT adopted from feedback-6 + +| Proposal | Why not | +|----------|---------| +| (none rejected) | All feedback-6 changes were adopted. Every proposal addressed a real deficiency — correctness (self-review exclusion, dotless files), signal quality (MR-breadth scoring), reproducibility (deterministic ordering, truncation), or performance (accumulator pattern). The feedback was exceptionally well-targeted. | + +### Iteration 7 (2026-02-07): Incorporating ChatGPT feedback-7 + +**Honest assessment of what feedback-7 got right:** + +Feedback-7 identified six improvements, all with genuine merit and no rejected proposals. The strongest was the project-scoped path probe (#1): the original `build_path_query` probed the entire DB, meaning a path that exists as a dotless file in project A would force exact-match behavior even when running `-p project_B` where it doesn't exist. This is a real cross-project misclassification bug. The two-way probe (exact then prefix, both scoped) resolves it cleanly within the existing static-SQL constraint. The bounded payload proposal (#2, #3) addresses a real operational hazard — unbounded `participants` and `mr_refs` arrays in robot JSON can produce enormous payloads that break downstream pipelines. The "Last Seen" rename (#4) is a semantic accuracy fix — for author rows the timestamp comes from review notes on their MR, not their own action. The MR state filter on reviewer branches (#5) was an obvious consistency gap — author branches already filtered to `opened|merged`, but reviewer branches didn't. The design principle (#6) codifies the bounded payload contract. + +| # | Change | Origin | Impact | +|---|--------|--------|--------| +| 68 | Project-scoped two-way probe in `build_path_query(conn, path, project_id)` | ChatGPT feedback-7 (Change 1) | **Correctness**: prevents cross-project misclassification. Exact probe and prefix probe both scoped via `(?2 IS NULL OR project_id = ?2)`. Costs at most 2 indexed queries. | +| 69 | Bounded `participants` in `ActiveDiscussion`: `participants_total` + `participants_truncated`, cap at 50 | ChatGPT feedback-7 (Change 2) | **Robot safety**: prevents unbounded JSON arrays. Agents get metadata to detect truncation. | +| 70 | Bounded `mr_refs` in `OverlapUser`: `mr_refs_total` + `mr_refs_truncated`, cap at 50 | ChatGPT feedback-7 (Change 2) | **Robot safety**: same bounded payload pattern for MR refs. | +| 71 | Workload truncation metadata: `*_truncated` booleans on all 4 sections, LIMIT+1 pattern | ChatGPT feedback-7 (Change 3) | **Consistency**: Workload now has the same truncation contract as Expert/Active/Overlap. Silent truncation was the only mode without this. | +| 72 | Rename `last_active_ms` -> `last_seen_ms` (Expert), `last_touch_at` -> `last_seen_at` (Overlap) | ChatGPT feedback-7 (Change 4) | **Semantic accuracy**: for author rows, the timestamp is derived from review activity on their MR, not their direct action. "Last Seen" is accurate across both reviewer and author branches. | +| 73 | `m.state IN ('opened','merged')` on Expert/Overlap reviewer branches | ChatGPT feedback-7 (Change 5) | **Signal quality**: closed/unmerged MRs are noise in reviewer branches. Matches existing author branch filter. Consistent across all DiffNote queries. | +| 74 | Design principle #11: Bounded payloads | ChatGPT feedback-7 (Change 6) | **Contract**: codifies that robot JSON never emits unbounded arrays, with `*_total` + `*_truncated` metadata. | +| 75 | Truncation hints in Workload human output (per-section) | Consequential | **UX**: each Workload section now shows dim truncation hint when applicable. | +| 76 | Test: `test_build_path_query_probe_is_project_scoped` | ChatGPT feedback-7 (Change 1) | **Coverage**: validates cross-project probe isolation — data in project 1 doesn't force exact-match in project 2. | +| 77 | Updated SQL aliases: `last_seen_at` everywhere (was `last_active_at` / `last_touch_at`) | Consequential | **Consistency**: unified timestamp naming across Expert and Overlap internal SQL and output. | +| 78 | Robot JSON: `truncation` object in Workload output with per-section booleans | Consequential | **Robot ergonomics**: agents can detect which Workload sections were truncated. | + +### Decisions NOT adopted from feedback-7 + +| Proposal | Why not | +|----------|---------| +| (none rejected) | All feedback-7 changes were adopted. Every proposal addressed a real deficiency — correctness (project-scoped probes), operational safety (bounded payloads), consistency (workload truncation, MR state filtering), and semantic accuracy (timestamp naming). The feedback was well-scoped and practical. | + +### Iteration 8 (2026-02-07): Incorporating ChatGPT feedback-8 + +**Honest assessment of what feedback-8 got right:** + +Feedback-8 identified seven improvements, all adopted. The strongest was the Active mode query variant split (#2): the `(?N IS NULL OR d.project_id = ?N)` nullable-OR pattern prevents SQLite from knowing at prepare time whether `project_id` is constrained, potentially choosing a suboptimal index. With two carefully crafted indexes (`idx_discussions_unresolved_recent` for scoped, `idx_discussions_unresolved_recent_global` for global), using nullable-OR undermines the very indexes we built. Splitting into two static SQL variants — selected at runtime via `match project_id` — ensures the planner sees unambiguous predicates and picks the intended index. This was the highest-impact correctness fix. + +The `since_mode` tri-state (#1) fixed a genuine semantic bug: `since_was_default = true` was ambiguous for Workload mode (which has no default window, not a "default was applied" scenario). The path normalization (#3) prevents the most common "silent zero results" footgun (users pasting `./src/foo/` or `/src/foo/`). The `path_match` observability (#4) is low-cost debugging aid that helps both agents and humans understand exact-vs-prefix classification. The `--limit` hard bound (#5) is a safety valve against misconfigured agents producing enormous payloads. The `total_unresolved_in_window` rename (#6) eliminates an ambiguous field name that could be misread as "total unresolved globally." The statement cache discipline note (#7) codifies an implicit best practice. + +| # | Change | Origin | Impact | +|---|--------|--------|--------| +| 79 | `since_mode` tri-state replaces `since_was_default` boolean | ChatGPT feedback-8 (Change 1) | **Semantic correctness**: Workload's "no window" is distinct from "default applied". Tri-state (`"default"` / `"explicit"` / `"none"`) eliminates robot-mode ambiguity for intent replay. | +| 80 | Active mode: split nullable-OR into two static SQL variants (global + scoped) | ChatGPT feedback-8 (Change 2) | **Index utilization**: ensures `idx_discussions_unresolved_recent_global` is used for global queries and `idx_discussions_unresolved_recent` for scoped. Nullable-OR prevented the planner from committing to either index at prepare time. Applied to both total count and main CTE query. | +| 81 | `normalize_repo_path()` input normalization for paths | ChatGPT feedback-8 (Change 3) | **Defensive UX**: strips `./`, leading `/`, collapses `//`, converts `\` -> `/` (Windows paste). Prevents silent zero-result footguns from common path copy-paste patterns. | +| 82 | `WhoMode` path variants own `String` (not `&'a str`) | ChatGPT feedback-8 (Change 3), consequential | **Architecture**: path normalization produces new strings; borrowing from args is no longer possible for path variants. Username variants still borrow. | +| 83 | `path_match` field in `ExpertResult` / `OverlapResult` + robot JSON | ChatGPT feedback-8 (Change 4) | **Debuggability**: `"exact"` or `"prefix"` in both human (dim hint line) and robot output. Helps diagnose zero-result cases caused by path classification. Minimal runtime cost. | +| 84 | Hard upper bound on `--limit` via `value_parser!(u16).range(1..=500)` | ChatGPT feedback-8 (Change 5) | **Output safety**: prevents `--limit 50000` from producing enormous payloads, slow queries, or memory spikes. Clamped at the CLI boundary before execution. Type changed to `u16`. | +| 85 | Rename `total_unresolved` -> `total_unresolved_in_window` in `ActiveResult` | ChatGPT feedback-8 (Change 6) | **Semantic clarity**: the count is scoped to the time window, not a global total. Prevents misinterpretation in human header and robot JSON. | +| 86 | Design principle #1 amended: nullable-OR caveat + statement cache discipline | ChatGPT feedback-8 (Changes 2, 7) | **Plan hygiene**: codifies when to split SQL variants vs use nullable-OR, and mandates preparing exactly one statement per invocation when multiple variants exist. | +| 87 | Test: `test_normalize_repo_path` | ChatGPT feedback-8 (Change 3) | **Coverage**: validates `./`, `/`, `\\`, `//`, whitespace normalization and identity for clean paths. | + +### Decisions NOT adopted from feedback-8 + +| Proposal | Why not | +|----------|---------| +| `Box::leak(norm.into_boxed_str())` for path ownership in `WhoMode` | Memory leak (even if small and short-lived in a CLI). Changed `WhoMode` path variants to own `String` directly instead, which is clean and zero-cost for a CLI that exits after one invocation. |