feat(who): expand expert + overlap queries with mr_file_changes and mr_reviewers

Chain: bd-jec (config flag) -> bd-2yo (fetch MR diffs) -> bd-3qn6 (rewrite who queries)

- Add fetch_mr_file_changes config option and --no-file-changes CLI flag
- Add GitLab MR diffs API fetch pipeline with watermark-based sync
- Create migration 020 for diffs_synced_for_updated_at watermark column
- Rewrite query_expert() and query_overlap() to use 4-signal UNION ALL:
  DiffNote reviewers, DiffNote MR authors, file-change authors, file-change reviewers
- Deduplicate across signal types via COUNT(DISTINCT CASE WHEN ... THEN mr_id END)
- Add insert_file_change test helper, 8 new who tests, all 397 tests pass
- Also includes: list performance migration 019, autocorrect module, README updates

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-08 13:35:14 -05:00
parent 435a208c93
commit 95b7183add
19 changed files with 2139 additions and 291 deletions

802
src/cli/autocorrect.rs Normal file
View File

@@ -0,0 +1,802 @@
use serde::Serialize;
use strsim::jaro_winkler;
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
/// A single correction applied to one argument.
#[derive(Debug, Clone, Serialize)]
pub struct Correction {
pub original: String,
pub corrected: String,
pub rule: CorrectionRule,
pub confidence: f64,
}
/// Which rule triggered the correction.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum CorrectionRule {
SingleDashLongFlag,
CaseNormalization,
FuzzyFlag,
}
/// Result of the correction pass over raw args.
#[derive(Debug, Clone)]
pub struct CorrectionResult {
pub args: Vec<String>,
pub corrections: Vec<Correction>,
}
// ---------------------------------------------------------------------------
// Flag registry
// ---------------------------------------------------------------------------
/// Global flags accepted by every command (from `Cli` struct).
const GLOBAL_FLAGS: &[&str] = &[
"--config",
"--robot",
"--json",
"--color",
"--quiet",
"--no-quiet",
"--verbose",
"--no-verbose",
"--log-format",
];
/// Per-subcommand flags. Each entry is `(command_name, &[flags])`.
/// Hidden `--no-*` variants are included so they can be fuzzy-matched too.
const COMMAND_FLAGS: &[(&str, &[&str])] = &[
(
"issues",
&[
"--limit",
"--fields",
"--state",
"--project",
"--author",
"--assignee",
"--label",
"--milestone",
"--since",
"--due-before",
"--has-due",
"--no-has-due",
"--sort",
"--asc",
"--no-asc",
"--open",
"--no-open",
],
),
(
"mrs",
&[
"--limit",
"--fields",
"--state",
"--project",
"--author",
"--assignee",
"--reviewer",
"--label",
"--since",
"--draft",
"--no-draft",
"--target",
"--source",
"--sort",
"--asc",
"--no-asc",
"--open",
"--no-open",
],
),
(
"ingest",
&[
"--project",
"--force",
"--no-force",
"--full",
"--no-full",
"--dry-run",
"--no-dry-run",
],
),
(
"sync",
&[
"--full",
"--no-full",
"--force",
"--no-force",
"--no-embed",
"--no-docs",
"--no-events",
"--no-file-changes",
"--dry-run",
"--no-dry-run",
],
),
(
"search",
&[
"--mode",
"--type",
"--author",
"--project",
"--label",
"--path",
"--after",
"--updated-after",
"--limit",
"--explain",
"--no-explain",
"--fts-mode",
],
),
(
"embed",
&["--full", "--no-full", "--retry-failed", "--no-retry-failed"],
),
(
"stats",
&[
"--check",
"--no-check",
"--repair",
"--dry-run",
"--no-dry-run",
],
),
("count", &["--for"]),
(
"timeline",
&[
"--project",
"--since",
"--depth",
"--expand-mentions",
"--limit",
"--max-seeds",
"--max-entities",
"--max-evidence",
],
),
(
"who",
&[
"--path",
"--active",
"--overlap",
"--reviews",
"--since",
"--project",
"--limit",
],
),
(
"init",
&[
"--force",
"--non-interactive",
"--gitlab-url",
"--token-env-var",
"--projects",
],
),
("generate-docs", &["--full", "--project"]),
("completions", &[]),
(
"list",
&[
"--limit",
"--project",
"--state",
"--author",
"--assignee",
"--label",
"--milestone",
"--since",
"--due-before",
"--has-due-date",
"--sort",
"--order",
"--open",
"--draft",
"--no-draft",
"--reviewer",
"--target-branch",
"--source-branch",
],
),
("show", &["--project"]),
("reset", &["--yes"]),
];
/// Valid values for enum-like flags, used for post-clap error enhancement.
pub const ENUM_VALUES: &[(&str, &[&str])] = &[
("--state", &["opened", "closed", "merged", "locked", "all"]),
("--mode", &["lexical", "hybrid", "semantic"]),
("--sort", &["updated", "created", "iid"]),
("--type", &["issue", "mr", "discussion"]),
("--fts-mode", &["safe", "raw"]),
("--color", &["auto", "always", "never"]),
("--log-format", &["text", "json"]),
("--for", &["issue", "mr"]),
];
// ---------------------------------------------------------------------------
// Correction thresholds
// ---------------------------------------------------------------------------
const FUZZY_FLAG_THRESHOLD: f64 = 0.8;
// ---------------------------------------------------------------------------
// Core logic
// ---------------------------------------------------------------------------
/// Detect which subcommand is being invoked by finding the first positional
/// arg (not a flag, not a flag value).
fn detect_subcommand(args: &[String]) -> Option<&str> {
// Skip args[0] (binary name). Walk forward looking for the first
// arg that isn't a flag and isn't the value to a flag that takes one.
let mut skip_next = false;
for arg in args.iter().skip(1) {
if skip_next {
skip_next = false;
continue;
}
if arg.starts_with('-') {
// Flags that take a value: we know global ones; for simplicity
// skip the next arg for any `--flag=value` form (handled inline)
// or known value-taking global flags.
if arg.contains('=') {
continue;
}
if matches!(arg.as_str(), "--config" | "-c" | "--color" | "--log-format") {
skip_next = true;
}
continue;
}
// First non-flag positional = subcommand
return Some(arg.as_str());
}
None
}
/// Build the set of valid long flags for the detected subcommand.
fn valid_flags_for(subcommand: Option<&str>) -> Vec<&'static str> {
let mut flags: Vec<&str> = GLOBAL_FLAGS.to_vec();
if let Some(cmd) = subcommand {
for (name, cmd_flags) in COMMAND_FLAGS {
if *name == cmd {
flags.extend_from_slice(cmd_flags);
break;
}
}
} else {
// No subcommand detected — include all flags for maximum matching
for (_, cmd_flags) in COMMAND_FLAGS {
for flag in *cmd_flags {
if !flags.contains(flag) {
flags.push(flag);
}
}
}
}
flags
}
/// Run the pre-clap correction pass on raw args.
///
/// Returns the (possibly modified) args and any corrections applied.
pub fn correct_args(raw: Vec<String>) -> CorrectionResult {
let subcommand = detect_subcommand(&raw);
let valid = valid_flags_for(subcommand);
let mut corrected = Vec::with_capacity(raw.len());
let mut corrections = Vec::new();
for arg in raw {
if let Some(fixed) = try_correct(&arg, &valid) {
let s = fixed.corrected.clone();
corrections.push(fixed);
corrected.push(s);
} else {
corrected.push(arg);
}
}
CorrectionResult {
args: corrected,
corrections,
}
}
/// Try to correct a single arg. Returns `None` if no correction needed.
fn try_correct(arg: &str, valid_flags: &[&str]) -> Option<Correction> {
// Only attempt correction on flag-like args (starts with `-`)
if !arg.starts_with('-') {
return None;
}
// Skip short flags — they're unambiguous single chars (-p, -n, -v, -J)
// Also skip stacked short flags (-vvv)
if !arg.starts_with("--") {
// Rule 1: Single-dash long flag — e.g. `-robot` (len > 2, not a valid short flag)
// A short flag is `-` + single char, optionally stacked (-vvv).
// If it's `-` + multiple chars and NOT all the same char, it's likely a single-dash long flag.
let after_dash = &arg[1..];
// Check if it's a stacked short flag like -vvv (all same char)
let all_same_char = after_dash.len() > 1
&& after_dash
.chars()
.all(|c| c == after_dash.chars().next().unwrap_or('\0'));
if all_same_char {
return None;
}
// Single char = valid short flag, don't touch
if after_dash.len() == 1 {
return None;
}
// It looks like a single-dash long flag (e.g. `-robot`, `-state`)
let candidate = format!("--{after_dash}");
// Check exact match first (case-sensitive)
if valid_flags.contains(&candidate.as_str()) {
return Some(Correction {
original: arg.to_string(),
corrected: candidate,
rule: CorrectionRule::SingleDashLongFlag,
confidence: 0.95,
});
}
// Check case-insensitive exact match
let lower = candidate.to_lowercase();
if let Some(&flag) = valid_flags.iter().find(|f| f.to_lowercase() == lower) {
return Some(Correction {
original: arg.to_string(),
corrected: flag.to_string(),
rule: CorrectionRule::SingleDashLongFlag,
confidence: 0.95,
});
}
// Try fuzzy on the single-dash candidate
if let Some((best_flag, score)) = best_fuzzy_match(&lower, valid_flags)
&& score >= FUZZY_FLAG_THRESHOLD
{
return Some(Correction {
original: arg.to_string(),
corrected: best_flag.to_string(),
rule: CorrectionRule::SingleDashLongFlag,
confidence: score * 0.95, // discount slightly for compound correction
});
}
return None;
}
// For `--flag` or `--flag=value` forms: only correct the flag name
let (flag_part, value_suffix) = if let Some(eq_pos) = arg.find('=') {
(&arg[..eq_pos], Some(&arg[eq_pos..]))
} else {
(arg, None)
};
// Already valid? No correction needed.
if valid_flags.contains(&flag_part) {
return None;
}
// Rule 2: Case normalization — `--Robot` -> `--robot`
let lower = flag_part.to_lowercase();
if lower != flag_part
&& let Some(&flag) = valid_flags.iter().find(|f| f.to_lowercase() == lower)
{
let corrected = match value_suffix {
Some(suffix) => format!("{flag}{suffix}"),
None => flag.to_string(),
};
return Some(Correction {
original: arg.to_string(),
corrected,
rule: CorrectionRule::CaseNormalization,
confidence: 0.9,
});
}
// Rule 3: Fuzzy flag match — `--staate` -> `--state`
if let Some((best_flag, score)) = best_fuzzy_match(&lower, valid_flags)
&& score >= FUZZY_FLAG_THRESHOLD
{
let corrected = match value_suffix {
Some(suffix) => format!("{best_flag}{suffix}"),
None => best_flag.to_string(),
};
return Some(Correction {
original: arg.to_string(),
corrected,
rule: CorrectionRule::FuzzyFlag,
confidence: score,
});
}
None
}
/// Find the best fuzzy match among valid flags for a given (lowercased) input.
fn best_fuzzy_match<'a>(input: &str, valid_flags: &[&'a str]) -> Option<(&'a str, f64)> {
valid_flags
.iter()
.map(|&flag| (flag, jaro_winkler(input, flag)))
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
}
// ---------------------------------------------------------------------------
// Post-clap suggestion helpers
// ---------------------------------------------------------------------------
/// Given an unrecognized flag (from a clap error), suggest the most similar
/// valid flag for the detected subcommand.
pub fn suggest_similar_flag(invalid_flag: &str, raw_args: &[String]) -> Option<String> {
let subcommand = detect_subcommand(raw_args);
let valid = valid_flags_for(subcommand);
let lower = invalid_flag.to_lowercase();
let (best_flag, score) = best_fuzzy_match(&lower, &valid)?;
if score >= 0.6 {
Some(best_flag.to_string())
} else {
None
}
}
/// Given a flag name, return its valid enum values (if known).
pub fn valid_values_for_flag(flag: &str) -> Option<&'static [&'static str]> {
let lower = flag.to_lowercase();
ENUM_VALUES
.iter()
.find(|(f, _)| f.to_lowercase() == lower)
.map(|(_, vals)| *vals)
}
/// Format a human/robot teaching note for a correction.
pub fn format_teaching_note(correction: &Correction) -> String {
match correction.rule {
CorrectionRule::SingleDashLongFlag => {
format!(
"Use double-dash for long flags: {} (not {})",
correction.corrected, correction.original
)
}
CorrectionRule::CaseNormalization => {
format!(
"Flags are lowercase: {} (not {})",
correction.corrected, correction.original
)
}
CorrectionRule::FuzzyFlag => {
format!(
"Correct spelling: {} (not {})",
correction.corrected, correction.original
)
}
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
fn args(s: &str) -> Vec<String> {
s.split_whitespace().map(String::from).collect()
}
// ---- Single-dash long flag ----
#[test]
fn single_dash_robot() {
let result = correct_args(args("lore -robot issues -n 5"));
assert_eq!(result.corrections.len(), 1);
assert_eq!(result.corrections[0].original, "-robot");
assert_eq!(result.corrections[0].corrected, "--robot");
assert_eq!(
result.corrections[0].rule,
CorrectionRule::SingleDashLongFlag
);
assert_eq!(result.args, args("lore --robot issues -n 5"));
}
#[test]
fn single_dash_state() {
let result = correct_args(args("lore --robot issues -state opened"));
assert_eq!(result.corrections.len(), 1);
assert_eq!(result.corrections[0].corrected, "--state");
}
// ---- Case normalization ----
#[test]
fn case_robot() {
let result = correct_args(args("lore --Robot issues"));
assert_eq!(result.corrections.len(), 1);
assert_eq!(result.corrections[0].corrected, "--robot");
assert_eq!(
result.corrections[0].rule,
CorrectionRule::CaseNormalization
);
}
#[test]
fn case_state_upper() {
let result = correct_args(args("lore --robot issues --State opened"));
assert_eq!(result.corrections.len(), 1);
assert_eq!(result.corrections[0].corrected, "--state");
assert_eq!(
result.corrections[0].rule,
CorrectionRule::CaseNormalization
);
}
#[test]
fn case_all_upper() {
let result = correct_args(args("lore --ROBOT issues --STATE opened"));
assert_eq!(result.corrections.len(), 2);
assert_eq!(result.corrections[0].corrected, "--robot");
assert_eq!(result.corrections[1].corrected, "--state");
}
// ---- Fuzzy flag match ----
#[test]
fn fuzzy_staate() {
let result = correct_args(args("lore --robot issues --staate opened"));
assert_eq!(result.corrections.len(), 1);
assert_eq!(result.corrections[0].corrected, "--state");
assert_eq!(result.corrections[0].rule, CorrectionRule::FuzzyFlag);
}
#[test]
fn fuzzy_projct() {
let result = correct_args(args("lore --robot issues --projct group/repo"));
assert_eq!(result.corrections.len(), 1);
assert_eq!(result.corrections[0].corrected, "--project");
assert_eq!(result.corrections[0].rule, CorrectionRule::FuzzyFlag);
}
// ---- No corrections ----
#[test]
fn already_correct() {
let original = args("lore --robot issues --state opened -n 10");
let result = correct_args(original.clone());
assert!(result.corrections.is_empty());
assert_eq!(result.args, original);
}
#[test]
fn short_flags_untouched() {
let original = args("lore -J issues -n 10 -s opened -p group/repo");
let result = correct_args(original.clone());
assert!(result.corrections.is_empty());
}
#[test]
fn stacked_short_flags_untouched() {
let original = args("lore -vvv issues");
let result = correct_args(original.clone());
assert!(result.corrections.is_empty());
}
#[test]
fn positional_args_untouched() {
let result = correct_args(args("lore --robot search authentication"));
assert!(result.corrections.is_empty());
}
#[test]
fn wildly_wrong_flag_not_corrected() {
// `--xyzzy` shouldn't match anything above 0.8
let result = correct_args(args("lore --robot issues --xyzzy foo"));
assert!(result.corrections.is_empty());
}
// ---- Flag with = value ----
#[test]
fn flag_eq_value_case_correction() {
let result = correct_args(args("lore --robot issues --State=opened"));
assert_eq!(result.corrections.len(), 1);
assert_eq!(result.corrections[0].corrected, "--state=opened");
}
// ---- Multiple corrections in one invocation ----
#[test]
fn multiple_corrections() {
let result = correct_args(args(
"lore -robot issues --State opened --projct group/repo",
));
assert_eq!(result.corrections.len(), 3);
assert_eq!(result.args[1], "--robot");
assert_eq!(result.args[3], "--state");
assert_eq!(result.args[5], "--project");
}
// ---- Teaching notes ----
#[test]
fn teaching_note_single_dash() {
let c = Correction {
original: "-robot".to_string(),
corrected: "--robot".to_string(),
rule: CorrectionRule::SingleDashLongFlag,
confidence: 0.95,
};
let note = format_teaching_note(&c);
assert!(note.contains("double-dash"));
assert!(note.contains("--robot"));
}
#[test]
fn teaching_note_case() {
let c = Correction {
original: "--State".to_string(),
corrected: "--state".to_string(),
rule: CorrectionRule::CaseNormalization,
confidence: 0.9,
};
let note = format_teaching_note(&c);
assert!(note.contains("lowercase"));
}
#[test]
fn teaching_note_fuzzy() {
let c = Correction {
original: "--staate".to_string(),
corrected: "--state".to_string(),
rule: CorrectionRule::FuzzyFlag,
confidence: 0.85,
};
let note = format_teaching_note(&c);
assert!(note.contains("spelling"));
}
// ---- Post-clap suggestion helpers ----
#[test]
fn suggest_similar_flag_works() {
let raw = args("lore --robot issues --xstat opened");
let suggestion = suggest_similar_flag("--xstat", &raw);
// Should suggest --state (close enough with lower threshold 0.6)
assert!(suggestion.is_some());
}
#[test]
fn valid_values_for_state() {
let vals = valid_values_for_flag("--state");
assert!(vals.is_some());
let vals = vals.unwrap();
assert!(vals.contains(&"opened"));
assert!(vals.contains(&"closed"));
}
#[test]
fn valid_values_unknown_flag() {
assert!(valid_values_for_flag("--xyzzy").is_none());
}
// ---- Subcommand detection ----
#[test]
fn detect_subcommand_basic() {
assert_eq!(
detect_subcommand(&args("lore issues -n 10")),
Some("issues")
);
}
#[test]
fn detect_subcommand_with_globals() {
assert_eq!(
detect_subcommand(&args("lore --robot --config /tmp/c.json mrs")),
Some("mrs")
);
}
#[test]
fn detect_subcommand_with_color() {
assert_eq!(
detect_subcommand(&args("lore --color never issues")),
Some("issues")
);
}
#[test]
fn detect_subcommand_none() {
assert_eq!(detect_subcommand(&args("lore --robot")), None);
}
// ---- Registry drift test ----
// This test uses clap introspection to verify our static registry covers
// all long flags defined in the Cli struct.
#[test]
fn registry_covers_global_flags() {
use clap::CommandFactory;
let cmd = crate::cli::Cli::command();
let clap_globals: Vec<String> = cmd
.get_arguments()
.filter_map(|a| a.get_long().map(|l| format!("--{l}")))
.collect();
for flag in &clap_globals {
// Skip help/version — clap adds these automatically
if flag == "--help" || flag == "--version" {
continue;
}
assert!(
GLOBAL_FLAGS.contains(&flag.as_str()),
"Clap global flag {flag} is missing from GLOBAL_FLAGS registry. \
Add it to GLOBAL_FLAGS in autocorrect.rs."
);
}
}
#[test]
fn registry_covers_command_flags() {
use clap::CommandFactory;
let cmd = crate::cli::Cli::command();
for sub in cmd.get_subcommands() {
let sub_name = sub.get_name().to_string();
// Find our registry entry
let registry_entry = COMMAND_FLAGS.iter().find(|(name, _)| *name == sub_name);
// Not all subcommands need entries (e.g., version, auth, status
// with no subcommand-specific flags)
let clap_flags: Vec<String> = sub
.get_arguments()
.filter_map(|a| a.get_long().map(|l| format!("--{l}")))
.filter(|f| !GLOBAL_FLAGS.contains(&f.as_str()))
.filter(|f| f != "--help" && f != "--version")
.collect();
if clap_flags.is_empty() {
continue;
}
let registry_flags = registry_entry.map(|(_, flags)| *flags);
let registry_flags = registry_flags.unwrap_or_else(|| {
panic!(
"Subcommand '{sub_name}' has clap flags {clap_flags:?} but no COMMAND_FLAGS \
registry entry. Add it to COMMAND_FLAGS in autocorrect.rs."
)
});
for flag in &clap_flags {
assert!(
registry_flags.contains(&flag.as_str()),
"Clap flag {flag} on subcommand '{sub_name}' is missing from \
COMMAND_FLAGS registry. Add it to the '{sub_name}' entry in autocorrect.rs."
);
}
}
}
}