docs: add lore who command design plan with 8 iterations of review feedback
Design document for `lore who` — a people intelligence query layer over existing GitLab data (280K notes, 210K discussions, 33K DiffNotes, 53 participants). Answers five collaboration questions: expert lookup by file/path, workload summary, review pattern analysis, active discussion tracking, and file overlap detection. Key design decisions refined across 8 feedback iterations: - All SQL is fully static (no format!()) with prepare_cached() throughout - Exact vs prefix path matching via PathQuery struct (two static SQL variants) - Self-review exclusion (author != reviewer) on all DiffNote branches - Deterministic output: sorted GROUP_CONCAT results, stable tie-breakers - Bounded payloads with *_total/*_truncated metadata for robot consumers - Truncation transparency via LIMIT+1 overflow detection pattern - Robot JSON includes resolved_input for reproducibility (since_mode tri-state) - Multi-project correctness with project-qualified entity references - Composite migration indexes designed for query selectivity on hot paths Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
353
docs/who-command-design.feedback-7.md
Normal file
353
docs/who-command-design.feedback-7.md
Normal file
@@ -0,0 +1,353 @@
|
||||
Below are the highest-leverage revisions I’d make to iteration 6 to improve correctness (multi-project edge cases), robot-mode reliability (bounded payloads + truncation), and signal quality—without changing the fundamental scope (still pure SQL over existing tables).
|
||||
|
||||
1) Make build_path_query project-aware and two-way probe (exact and prefix)
|
||||
Why
|
||||
|
||||
Your DB probe currently answers: “does this exact file exist anywhere in DiffNotes?” That can misclassify in a project-scoped run:
|
||||
|
||||
Path exists as a dotless file in Project A → probe returns true
|
||||
|
||||
User runs -p Project B where the path is a directory (or different shape) → you switch to exact, return empty, and miss valid prefix hits.
|
||||
|
||||
Also, you still have a minor heuristic fragility for dot directories when the user omits trailing / (e.g., .github/workflows): last segment has a dot → you treat as file unless forced dir.
|
||||
|
||||
Revision
|
||||
|
||||
Thread project_id into build_path_query(conn, path, project_id)
|
||||
|
||||
Probe exact first (scoped), then probe prefix (scoped)
|
||||
|
||||
Only fall back to heuristics if both probes fail
|
||||
|
||||
This keeps “static SQL, no dynamic assembly,” and costs at most 2 indexed existence queries per invocation.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
- fn build_path_query(conn: &Connection, path: &str) -> Result<PathQuery> {
|
||||
+ fn build_path_query(conn: &Connection, path: &str, project_id: Option<i64>) -> Result<PathQuery> {
|
||||
let trimmed = path.trim_end_matches('/');
|
||||
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
|
||||
let is_root = !trimmed.contains('/');
|
||||
let forced_dir = path.ends_with('/');
|
||||
- let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
|
||||
+ // Heuristic is now only a fallback; probes decide first.
|
||||
+ let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
|
||||
|
||||
- let exact_exists = if !looks_like_file && !forced_dir {
|
||||
- conn.query_row(
|
||||
- "SELECT 1 FROM notes
|
||||
- WHERE note_type = 'DiffNote'
|
||||
- AND is_system = 0
|
||||
- AND position_new_path = ?1
|
||||
- LIMIT 1",
|
||||
- rusqlite::params![trimmed],
|
||||
- |_| Ok(()),
|
||||
- )
|
||||
- .is_ok()
|
||||
- } else {
|
||||
- false
|
||||
- };
|
||||
+ // Probe 1: exact file exists (scoped)
|
||||
+ let exact_exists = conn.query_row(
|
||||
+ "SELECT 1 FROM notes
|
||||
+ WHERE note_type = 'DiffNote'
|
||||
+ AND is_system = 0
|
||||
+ AND position_new_path = ?1
|
||||
+ AND (?2 IS NULL OR project_id = ?2)
|
||||
+ LIMIT 1",
|
||||
+ rusqlite::params![trimmed, project_id],
|
||||
+ |_| Ok(()),
|
||||
+ ).is_ok();
|
||||
+
|
||||
+ // Probe 2: directory prefix exists (scoped)
|
||||
+ let prefix_exists = if !forced_dir {
|
||||
+ let escaped = escape_like(trimmed);
|
||||
+ let pat = format!("{escaped}/%");
|
||||
+ conn.query_row(
|
||||
+ "SELECT 1 FROM notes
|
||||
+ WHERE note_type = 'DiffNote'
|
||||
+ AND is_system = 0
|
||||
+ AND position_new_path LIKE ?1 ESCAPE '\\'
|
||||
+ AND (?2 IS NULL OR project_id = ?2)
|
||||
+ LIMIT 1",
|
||||
+ rusqlite::params![pat, project_id],
|
||||
+ |_| Ok(()),
|
||||
+ ).is_ok()
|
||||
+ } else { false };
|
||||
|
||||
- let is_file = looks_like_file || exact_exists;
|
||||
+ // Forced directory always wins; otherwise: exact > prefix > heuristic
|
||||
+ let is_file = if forced_dir { false }
|
||||
+ else if exact_exists { true }
|
||||
+ else if prefix_exists { false }
|
||||
+ else { looks_like_file };
|
||||
|
||||
if is_file {
|
||||
Ok(PathQuery { value: trimmed.to_string(), is_prefix: false })
|
||||
} else {
|
||||
let escaped = escape_like(trimmed);
|
||||
Ok(PathQuery { value: format!("{escaped}/%"), is_prefix: true })
|
||||
}
|
||||
}
|
||||
@@
|
||||
- let pq = build_path_query(conn, path)?;
|
||||
+ let pq = build_path_query(conn, path, project_id)?;
|
||||
|
||||
|
||||
Add test coverage for the multi-project misclassification case:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
#[test]
|
||||
fn test_build_path_query_dotless_subdir_file_uses_db_probe() {
|
||||
@@
|
||||
- let pq = build_path_query(&conn, "src/Dockerfile").unwrap();
|
||||
+ let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap();
|
||||
@@
|
||||
- let pq2 = build_path_query(&conn2, "src/Dockerfile").unwrap();
|
||||
+ let pq2 = build_path_query(&conn2, "src/Dockerfile", None).unwrap();
|
||||
}
|
||||
+
|
||||
+ #[test]
|
||||
+ fn test_build_path_query_probe_is_project_scoped() {
|
||||
+ // Path exists as a dotless file in project 1; project 2 should not
|
||||
+ // treat it as an exact file unless it exists there too.
|
||||
+ let conn = setup_test_db();
|
||||
+ insert_project(&conn, 1, "team/a");
|
||||
+ insert_project(&conn, 2, "team/b");
|
||||
+ insert_mr(&conn, 1, 1, 10, "author_a", "opened");
|
||||
+ insert_discussion(&conn, 1, 1, Some(1), None, true, false);
|
||||
+ insert_diffnote(&conn, 1, 1, 1, "rev", "infra/Makefile", "note");
|
||||
+
|
||||
+ let pq_scoped = build_path_query(&conn, "infra/Makefile", Some(2)).unwrap();
|
||||
+ assert!(pq_scoped.is_prefix); // should fall back to prefix in project 2
|
||||
+ }
|
||||
|
||||
2) Bound robot payload sizes for participants and mr_refs (with totals + truncation)
|
||||
Why
|
||||
|
||||
mr_refs and participants can become unbounded arrays in robot mode, which is a real operational hazard:
|
||||
|
||||
huge JSON → slow, noisy diffs, brittle downstream pipelines
|
||||
|
||||
potential SQLite group_concat truncation becomes invisible (and you can’t distinguish “no refs” vs “refs truncated”)
|
||||
|
||||
Revision
|
||||
|
||||
Introduce hard caps and explicit metadata:
|
||||
|
||||
participants_total, participants_truncated
|
||||
|
||||
mr_refs_total, mr_refs_truncated
|
||||
|
||||
This is not scope creep—it’s defensive output hygiene.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
pub struct ActiveDiscussion {
|
||||
@@
|
||||
pub participants: Vec<String>,
|
||||
+ pub participants_total: u32,
|
||||
+ pub participants_truncated: bool,
|
||||
}
|
||||
@@
|
||||
pub struct OverlapUser {
|
||||
@@
|
||||
pub mr_refs: Vec<String>,
|
||||
+ pub mr_refs_total: u32,
|
||||
+ pub mr_refs_truncated: bool,
|
||||
}
|
||||
|
||||
|
||||
Implementation sketch (Rust-side, deterministic):
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
fn query_active(...) -> Result<ActiveResult> {
|
||||
+ const MAX_PARTICIPANTS: usize = 50;
|
||||
@@
|
||||
- participants.sort();
|
||||
+ participants.sort();
|
||||
+ let participants_total = participants.len() as u32;
|
||||
+ let participants_truncated = participants.len() > MAX_PARTICIPANTS;
|
||||
+ if participants_truncated {
|
||||
+ participants.truncate(MAX_PARTICIPANTS);
|
||||
+ }
|
||||
@@
|
||||
Ok(ActiveDiscussion {
|
||||
@@
|
||||
participants,
|
||||
+ participants_total,
|
||||
+ participants_truncated,
|
||||
})
|
||||
@@
|
||||
fn query_overlap(...) -> Result<OverlapResult> {
|
||||
+ const MAX_MR_REFS_PER_USER: usize = 50;
|
||||
@@
|
||||
.map(|a| {
|
||||
let mut mr_refs: Vec<String> = a.mr_refs.into_iter().collect();
|
||||
mr_refs.sort();
|
||||
+ let mr_refs_total = mr_refs.len() as u32;
|
||||
+ let mr_refs_truncated = mr_refs.len() > MAX_MR_REFS_PER_USER;
|
||||
+ if mr_refs_truncated {
|
||||
+ mr_refs.truncate(MAX_MR_REFS_PER_USER);
|
||||
+ }
|
||||
OverlapUser {
|
||||
@@
|
||||
mr_refs,
|
||||
+ mr_refs_total,
|
||||
+ mr_refs_truncated,
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
Update robot JSON accordingly:
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
fn active_to_json(r: &ActiveResult) -> serde_json::Value {
|
||||
@@
|
||||
"participants": d.participants,
|
||||
+ "participants_total": d.participants_total,
|
||||
+ "participants_truncated": d.participants_truncated,
|
||||
}))
|
||||
@@
|
||||
fn overlap_to_json(r: &OverlapResult) -> serde_json::Value {
|
||||
@@
|
||||
"mr_refs": u.mr_refs,
|
||||
+ "mr_refs_total": u.mr_refs_total,
|
||||
+ "mr_refs_truncated": u.mr_refs_truncated,
|
||||
}))
|
||||
|
||||
|
||||
Also update robot-docs manifest schema snippet for who.active.discussions[] and who.overlap.users[].
|
||||
|
||||
3) Add truncation metadata to Workload sections (same LIMIT+1 pattern)
|
||||
Why
|
||||
|
||||
Workload is the mode most likely to be consumed by agents, and right now it has silent truncation (each section is LIMIT N with no signal). Your plan already treats truncation as a first-class contract elsewhere; Workload should match.
|
||||
|
||||
Revision
|
||||
|
||||
For each workload query:
|
||||
|
||||
request LIMIT + 1
|
||||
|
||||
set *_truncated booleans
|
||||
|
||||
trim to requested limit
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
pub struct WorkloadResult {
|
||||
pub username: String,
|
||||
pub assigned_issues: Vec<WorkloadIssue>,
|
||||
pub authored_mrs: Vec<WorkloadMr>,
|
||||
pub reviewing_mrs: Vec<WorkloadMr>,
|
||||
pub unresolved_discussions: Vec<WorkloadDiscussion>,
|
||||
+ pub assigned_issues_truncated: bool,
|
||||
+ pub authored_mrs_truncated: bool,
|
||||
+ pub reviewing_mrs_truncated: bool,
|
||||
+ pub unresolved_discussions_truncated: bool,
|
||||
}
|
||||
|
||||
|
||||
And in JSON include the booleans (plus you already have summary.counts).
|
||||
|
||||
This is mechanically repetitive but extremely valuable for automation.
|
||||
|
||||
4) Rename “Last Active” → “Last Seen” for Expert/Overlap
|
||||
Why
|
||||
|
||||
For “author” rows, the timestamp is derived from review activity on their MR (via MAX(n.created_at)), not necessarily that person’s direct action. Calling that “active” is semantically misleading. “Last seen” is accurate across both reviewer+author branches.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
pub struct Expert {
|
||||
@@
|
||||
- pub last_active_ms: i64,
|
||||
+ pub last_seen_ms: i64,
|
||||
}
|
||||
@@
|
||||
pub struct OverlapUser {
|
||||
@@
|
||||
- pub last_touch_at: i64,
|
||||
+ pub last_seen_at: i64,
|
||||
@@
|
||||
fn print_expert_human(...) {
|
||||
@@
|
||||
- style("Last Active").bold(),
|
||||
+ style("Last Seen").bold(),
|
||||
@@
|
||||
- style(format_relative_time(expert.last_active_ms)).dim(),
|
||||
+ style(format_relative_time(expert.last_seen_ms)).dim(),
|
||||
|
||||
|
||||
(Keep internal SQL aliases consistent: last_seen_at everywhere.)
|
||||
|
||||
5) Make MR state filtering consistent in Expert/Overlap reviewer branches
|
||||
Why
|
||||
|
||||
You already restrict Overlap author branch to opened|merged, but reviewer branches can include closed/unmerged noise. Consistency improves signal quality and can reduce scan churn.
|
||||
|
||||
Low-risk revision: apply the same state filter to reviewer branches (Expert + Overlap). You can keep “closed” excluded by default without adding new flags.
|
||||
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
WHERE n.note_type = 'DiffNote'
|
||||
AND n.is_system = 0
|
||||
@@
|
||||
- AND n.created_at >= ?2
|
||||
+ AND m.state IN ('opened','merged')
|
||||
+ AND n.created_at >= ?2
|
||||
|
||||
|
||||
This is a semantic choice; if you later want archaeology across closed/unmerged, that belongs in a separate mode/flag, but I would not add it now.
|
||||
|
||||
6) Add a design principle for bounded outputs (aligns with robot-first reproducibility)
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
10. **Truncation transparency.** Result types carry a `truncated: bool` flag...
|
||||
+11. **Bounded payloads.** Robot JSON must never emit unbounded arrays (participants, refs).
|
||||
+ Large list fields are capped with `*_total` + `*_truncated` so agents can page/retry.
|
||||
|
||||
Consolidated plan metadata bump (Iteration 7)
|
||||
diff
|
||||
Copy code
|
||||
diff --git a/who-command-design.md b/who-command-design.md
|
||||
@@
|
||||
-iteration: 6
|
||||
+iteration: 7
|
||||
updated: 2026-02-07
|
||||
|
||||
Net effect (what you get)
|
||||
|
||||
Correct path classification under -p scoping (no cross-project probe leakage)
|
||||
|
||||
Deterministic + bounded robot payloads (no giant JSON surprises)
|
||||
|
||||
Uniform truncation contract across all modes (Workload no longer silently truncates)
|
||||
|
||||
Clearer semantics (“Last Seen” avoids misinterpretation)
|
||||
|
||||
Cleaner signals (reviewer branches ignore closed/unmerged by default)
|
||||
|
||||
If you want, I can also produce a second diff that updates the robot-docs schema block and the Verification EXPLAIN expectations to reflect the new probe queries and the state filter.
|
||||
Reference in New Issue
Block a user