Below are the highest-leverage revisions I’d make to this plan. I’m focusing on correctness pitfalls, SQLite gotchas, query performance on 280K notes, and reducing “dynamic SQL + param juggling” complexity—without turning this into a new ingestion project. Change 1 — Fix a hard SQLite bug in --active (GROUP_CONCAT DISTINCT + separator) Why SQLite does not allow GROUP_CONCAT(DISTINCT x, sep). With DISTINCT, SQLite only permits a single argument (GROUP_CONCAT(DISTINCT x)). Your current query will error at runtime in many SQLite versions. Revision Use a subquery that selects distinct participants, then GROUP_CONCAT with your separator. diff Copy code diff --git a/Plan.md b/Plan.md @@ fn query_active(...) - (SELECT GROUP_CONCAT(DISTINCT n.author_username, X'1F') - FROM notes n - WHERE n.discussion_id = d.id - AND n.is_system = 0 - AND n.author_username IS NOT NULL) AS participants + (SELECT GROUP_CONCAT(username, X'1F') FROM ( + SELECT DISTINCT n.author_username AS username + FROM notes n + WHERE n.discussion_id = d.id + AND n.is_system = 0 + AND n.author_username IS NOT NULL + ORDER BY username + )) AS participants Change 2 — Replace “contains('.') => exact file match” with segment-aware path classification Why path.contains('.') misclassifies directories like: .github/workflows/ src/v1.2/auth/ It also fails the “root file” case (README.md) because your mode discriminator only treats paths as paths if they contain /. Revision Add explicit --path to force Expert mode (covers root files cleanly). Classify file-vs-dir by checking last path segment for a dot, and whether the input ends with /. diff Copy code diff --git a/Plan.md b/Plan.md @@ pub struct WhoArgs { - /// Username or file path (path if contains /) - pub target: Option, + /// Username or file path shorthand (ambiguous for root files like README.md) + pub target: Option, + + /// Force expert mode for a file/directory path (supports root files like README.md) + #[arg(long, help_heading = "Mode", conflicts_with_all = ["active", "overlap", "reviews"])] + pub path: Option, @@ fn resolve_mode<'a>(args: &'a WhoArgs) -> Result> { - if let Some(target) = &args.target { + if let Some(p) = &args.path { + return Ok(WhoMode::Expert { path: p }); + } + if let Some(target) = &args.target { let clean = target.strip_prefix('@').unwrap_or(target); if args.reviews { return Ok(WhoMode::Reviews { username: clean }); } - // Disambiguation: if target contains '/', it's a file path. - // GitLab usernames never contain '/'. - if target.contains('/') { + // Disambiguation: + // - treat as path if it contains '/' + // - otherwise treat as username (root files require --path) + if target.contains('/') { return Ok(WhoMode::Expert { path: target }); } return Ok(WhoMode::Workload { username: clean }); } And update the path pattern logic used by Expert/Overlap: diff Copy code diff --git a/Plan.md b/Plan.md @@ fn query_expert(...) - // Normalize path for LIKE matching: add trailing % if no extension - let path_pattern = if path.contains('.') { - path.to_string() // Exact file match - } else { - let trimmed = path.trim_end_matches('/'); - format!("{trimmed}/%") - }; + // Normalize: + // - if ends_with('/') => directory prefix + // - else if last segment contains '.' => file exact match + // - else => directory prefix + let trimmed = path.trim_end_matches('/'); + let last = trimmed.rsplit('/').next().unwrap_or(trimmed); + let is_file = !path.ends_with('/') && last.contains('.'); + let path_pattern = if is_file { trimmed.to_string() } else { format!("{trimmed}/%") }; Change 3 — Stop building dynamic SQL strings for optional filters; always bind params Why Right now you’re mixing: dynamic project_clause string fragments ad-hoc param vectors placeholder renumbering by branch That’s brittle and easy to regress (especially when you add more conditions later). SQLite/rusqlite can bind Option to NULL, which enables a simple pattern: sql Copy code AND (?3 IS NULL OR n.project_id = ?3) Revision (representative; apply to all queries) diff Copy code diff --git a/Plan.md b/Plan.md @@ fn query_expert(...) - let project_clause = if project_id.is_some() { - "AND n.project_id = ?3" - } else { - "" - }; - - let sql = format!( + let sql = format!( "SELECT username, role, activity_count, last_active_at FROM ( @@ FROM notes n WHERE n.position_new_path LIKE ?1 AND n.is_system = 0 AND n.author_username IS NOT NULL AND n.created_at >= ?2 - {project_clause} + AND (?3 IS NULL OR n.project_id = ?3) @@ WHERE n.position_new_path LIKE ?1 AND m.author_username IS NOT NULL AND m.updated_at >= ?2 - {project_clause} + AND (?3 IS NULL OR n.project_id = ?3) GROUP BY m.author_username - )" + ) t" ); - - let mut params: Vec> = Vec::new(); - params.push(Box::new(path_pattern.clone())); - params.push(Box::new(since_ms)); - if let Some(pid) = project_id { - params.push(Box::new(pid)); - } - let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + let param_refs = rusqlite::params![path_pattern, since_ms, project_id]; Notes: Adds required derived-table alias t (some SQLite configurations are stricter). Eliminates the dynamic param vector and placeholder gymnastics. Change 4 — Filter “path touch” queries to DiffNotes and escape LIKE properly Why Only DiffNotes reliably have position_new_path; including other note types can skew counts and harm performance. LIKE treats % and _ as wildcards—rare in file paths, but not impossible (generated files, templates). Escaping is a low-cost robustness win. Revision Add note_type='DiffNote' and LIKE ... ESCAPE '\' plus a tiny escape helper. diff Copy code diff --git a/Plan.md b/Plan.md @@ fn query_expert(...) - FROM notes n - WHERE n.position_new_path LIKE ?1 + FROM notes n + WHERE n.note_type = 'DiffNote' + AND n.position_new_path LIKE ?1 ESCAPE '\' AND n.is_system = 0 @@ diff --git a/Plan.md b/Plan.md @@ Helper Functions +fn escape_like(input: &str) -> String { + input.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_") +} And when building patterns: diff Copy code - let path_pattern = if is_file { trimmed.to_string() } else { format!("{trimmed}/%") }; + let base = escape_like(trimmed); + let path_pattern = if is_file { base } else { format!("{base}/%") }; Apply the same changes to query_overlap and any other position_new_path LIKE .... Change 5 — Use note timestamps for “touch since” semantics (Expert/Overlap author branch) Why In Expert/Overlap “author” branches you filter by m.updated_at >= since. That answers “MR updated recently” rather than “MR touched at this path recently”, which can surface stale ownership. Revision Filter by the note creation time (and use it for “last touch” where relevant). You can still compute author activity, but anchor it to note activity. diff Copy code diff --git a/Plan.md b/Plan.md @@ fn query_overlap(...) - WHERE n.position_new_path LIKE ?1 + WHERE n.note_type = 'DiffNote' + AND n.position_new_path LIKE ?1 ESCAPE '\' AND m.state IN ('opened', 'merged') AND m.author_username IS NOT NULL - AND m.updated_at >= ?2 + AND n.created_at >= ?2 AND (?3 IS NULL OR m.project_id = ?3) Same idea in Expert mode’s “MR authors” branch. Change 6 — Workload mode: apply --since consistently to unresolved discussions Why Workload’s unresolved discussions ignore since_ms. That makes --since partially misleading and can dump very old threads. Revision Filter on d.last_note_at when since_ms is set. diff Copy code diff --git a/Plan.md b/Plan.md @@ fn query_workload(...) - let disc_sql = format!( + let disc_since = if since_ms.is_some() { + "AND d.last_note_at >= ?2" + } else { "" }; + let disc_sql = format!( "SELECT d.noteable_type, @@ WHERE d.resolvable = 1 AND d.resolved = 0 AND EXISTS ( @@ ) {disc_project_filter} + {disc_since} ORDER BY d.last_note_at DESC LIMIT {limit}" ); @@ - // Rebuild params for discussion query (only username + optional project_id) - let mut disc_params: Vec> = Vec::new(); - disc_params.push(Box::new(username.to_string())); - if let Some(pid) = project_id { - disc_params.push(Box::new(pid)); - } + // Params: username, since_ms, project_id (NULLs ok) + let disc_param_refs = rusqlite::params![username, since_ms, project_id]; (If you adopt Change 3 fully, this becomes very clean.) Change 7 — Make Overlap results represent “both roles” instead of collapsing to one Why Collapsing to a single role loses valuable info (“they authored and reviewed”). Also your current “prefer author” rule is arbitrary for the “who else is touching this” question. Revision Track role counts separately and render as A, R, or A+R. diff Copy code diff --git a/Plan.md b/Plan.md @@ pub struct OverlapUser { pub username: String, - pub role: String, - pub touch_count: u32, + pub author_touch_count: u32, + pub review_touch_count: u32, + pub touch_count: u32, pub last_touch_at: i64, pub mr_iids: Vec, } @@ fn query_overlap(...) - let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser { + let entry = user_map.entry(username.clone()).or_insert_with(|| OverlapUser { username: username.clone(), - role: role.clone(), + author_touch_count: 0, + review_touch_count: 0, touch_count: 0, last_touch_at: 0, mr_iids: Vec::new(), }); entry.touch_count += count; + if role == "author" { entry.author_touch_count += count; } + if role == "reviewer" { entry.review_touch_count += count; } @@ human output - println!( - " {:<16} {:<8} {:>7} {:<12} {}", + println!( + " {:<16} {:<6} {:>7} {:<12} {}", ... ); @@ - user.role, + format_roles(user.author_touch_count, user.review_touch_count), Change 8 — Add an “Index Audit + optional migration” step (big perf win, low blast radius) Why With 280K notes, the path/timestamp queries will degrade quickly without indexes. This isn’t “scope creep”; it’s making the feature usable. Revision (plan-level) Add a non-breaking migration that only creates indexes if missing. Optionally add a runtime check: if EXPLAIN QUERY PLAN indicates full table scan on notes, print a dim warning in human mode. diff Copy code diff --git a/Plan.md b/Plan.md @@ Implementation Order -| Step | What | Files | +| Step | What | Files | | 1 | CLI skeleton: `WhoArgs` + `Commands::Who` + dispatch + stub | `cli/mod.rs`, `commands/mod.rs`, `main.rs` | +| 1.5 | Index audit + add `CREATE INDEX IF NOT EXISTS` migration for who hot paths | `migrations/0xx_who_indexes.sql` | @@ Suggested indexes (tune names to your conventions): notes(note_type, position_new_path, created_at) notes(discussion_id, is_system, author_username) discussions(resolvable, resolved, last_note_at, project_id) merge_requests(project_id, state, updated_at, author_username) issue_assignees(username, issue_id) Even if SQLite can’t perfectly index LIKE, these still help with join and timestamp filters. Change 9 — Make robot JSON reproducible by echoing the effective query inputs Why Agent workflows benefit from a stable “query record”: what mode ran, what path/user, resolved project, effective since, limit. Revision Include an input object in JSON output. diff Copy code diff --git a/Plan.md b/Plan.md @@ struct WhoJsonData { mode: String, + input: serde_json::Value, #[serde(flatten)] result: serde_json::Value, } @@ pub fn print_who_json(...) - let output = WhoJsonEnvelope { + let input = serde_json::json!({ + "project": /* resolved or raw args.project */, + "since": /* resolved since ISO */, + "limit": /* args.limit */, + }); + let output = WhoJsonEnvelope { ok: true, data: WhoJsonData { mode: mode.to_string(), + input, result: data, }, meta: RobotMeta { elapsed_ms }, }; Change 10 — Tighten clap constraints so invalid combinations never reach resolve_mode Why Right now conflicts are enforced manually (or not at all). Clamp the invalid combos at the CLI layer: --active should conflict with target, --overlap, --reviews, --path --reviews should require a username (and should conflict with Expert path modes) diff Copy code diff --git a/Plan.md b/Plan.md @@ pub struct WhoArgs { - pub active: bool, + #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "overlap", "reviews", "path"])] + pub active: bool, @@ - pub overlap: Option, + #[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "active", "reviews", "path"])] + pub overlap: Option, @@ - pub reviews: bool, + #[arg(long, help_heading = "Mode", requires = "target", conflicts_with_all = ["active", "overlap", "path"])] + pub reviews: bool, Summary of what I’d definitely change If you do nothing else, do these first: Fix GROUP_CONCAT(DISTINCT ..., sep) in Active mode (runtime error). Path classification: add --path, and stop using contains('.') globally. Remove dynamic SQL + param vectors: always bind project_id as nullable and use (? IS NULL OR ...). Filter to DiffNotes + LIKE escaping for correctness and fewer rows scanned. Optional index migration: otherwise this will feel slow/non-deterministically slow depending on local DB state. If you want, I can also provide a consolidated “v2 plan” as a single unified patch (one diff) rather than per-change snippets.