feat(autocorrect): add fuzzy subcommand matching and flag-as-subcommand detection

Extend the CLI autocorrection pipeline with two new correction rules that
help agents recover from common typos and misunderstandings:

1. SubcommandFuzzy (threshold 0.85): Fuzzy-matches typo'd subcommands
   against the canonical list. Examples:
   - "issuess" → "issues"
   - "timline" → "timeline"
   - "serach" → "search"
   
   Guards prevent false positives:
   - Words that look like misplaced global flags are skipped
   - Valid command prefixes are left to clap's infer_subcommands

2. FlagAsSubcommand: Detects when agents type subcommands as flags.
   Some agents (especially Codex) assume `--robot-docs` is a flag rather
   than a subcommand. This rule converts:
   - "--robot-docs" → "robot-docs"
   - "--generate-docs" → "generate-docs"

Also improves error messages in main.rs:
- MissingRequiredArgument: Contextual example based on detected subcommand
- MissingSubcommand: Lists common commands
- TooFewValues/TooManyValues: Command-specific help hints

Added CANONICAL_SUBCOMMANDS constant enumerating all valid subcommands
(including hidden ones) for fuzzy matching. This ensures agents that know
about hidden commands still get typo correction.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-03-06 11:15:15 -05:00
parent ffbd1e2dce
commit 1dfcfd3f83
2 changed files with 461 additions and 22 deletions

View File

@@ -22,6 +22,10 @@ pub enum CorrectionRule {
CaseNormalization,
FuzzyFlag,
SubcommandAlias,
/// Fuzzy subcommand match: "issuess" → "issues"
SubcommandFuzzy,
/// Flag-style subcommand: "--robot-docs" → "robot-docs"
FlagAsSubcommand,
ValueNormalization,
ValueFuzzy,
FlagPrefix,
@@ -294,6 +298,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
"--issues",
"--mrs",
"--activity",
"--mentions",
"--since",
"--project",
"--all",
@@ -351,6 +356,51 @@ const FUZZY_FLAG_THRESHOLD: f64 = 0.8;
/// avoid misleading agents. Still catches obvious typos like `--projct`.
const FUZZY_FLAG_THRESHOLD_STRICT: f64 = 0.9;
/// Fuzzy subcommand threshold — higher than flags because subcommand names
/// are shorter words where JW scores inflate more easily.
const FUZZY_SUBCMD_THRESHOLD: f64 = 0.85;
/// All canonical subcommand names for fuzzy matching and flag-as-subcommand
/// detection. Includes hidden commands so agents that know about them can
/// still benefit from typo correction.
const CANONICAL_SUBCOMMANDS: &[&str] = &[
"issues",
"mrs",
"notes",
"ingest",
"count",
"status",
"auth",
"doctor",
"version",
"init",
"search",
"stats",
"generate-docs",
"embed",
"sync",
"migrate",
"health",
"robot-docs",
"completions",
"timeline",
"who",
"me",
"file-history",
"trace",
"drift",
"related",
"cron",
"token",
// Hidden but still valid
"backup",
"reset",
"list",
"show",
"auth-test",
"sync-status",
];
// ---------------------------------------------------------------------------
// Core logic
// ---------------------------------------------------------------------------
@@ -474,13 +524,15 @@ pub fn correct_args(raw: Vec<String>, strict: bool) -> CorrectionResult {
}
}
/// Phase A: Replace subcommand aliases with their canonical names.
/// Phase A: Replace subcommand aliases with their canonical names, fuzzy-match
/// typo'd subcommands, and detect flag-style subcommands (`--robot-docs`).
///
/// Handles forms that can't be expressed as clap `alias`/`visible_alias`
/// (underscores, no-separator forms). Case-insensitive matching.
/// Three-step pipeline:
/// - A1: Exact alias match (underscore/no-separator forms)
/// - A2: Fuzzy subcommand match ("issuess" → "issues")
/// - A3: Flag-as-subcommand ("--robot-docs" → "robot-docs")
fn correct_subcommand(mut args: Vec<String>, corrections: &mut Vec<Correction>) -> Vec<String> {
// Find the subcommand position index, then check the alias map.
// Can't use iterators easily because we need to mutate args[i].
// Find the subcommand position index.
let mut skip_next = false;
let mut subcmd_idx = None;
for (i, arg) in args.iter().enumerate().skip(1) {
@@ -500,19 +552,106 @@ fn correct_subcommand(mut args: Vec<String>, corrections: &mut Vec<Correction>)
subcmd_idx = Some(i);
break;
}
if let Some(i) = subcmd_idx
&& let Some((_, canonical)) = SUBCOMMAND_ALIASES
if let Some(i) = subcmd_idx {
// A1: Exact alias match (existing logic)
if let Some((_, canonical)) = SUBCOMMAND_ALIASES
.iter()
.find(|(alias, _)| alias.eq_ignore_ascii_case(&args[i]))
{
corrections.push(Correction {
original: args[i].clone(),
corrected: (*canonical).to_string(),
rule: CorrectionRule::SubcommandAlias,
confidence: 1.0,
});
args[i] = (*canonical).to_string();
{
corrections.push(Correction {
original: args[i].clone(),
corrected: (*canonical).to_string(),
rule: CorrectionRule::SubcommandAlias,
confidence: 1.0,
});
args[i] = (*canonical).to_string();
}
// A2: Fuzzy subcommand match — only if not already a canonical name
else {
let lower = args[i].to_lowercase();
if !CANONICAL_SUBCOMMANDS.contains(&lower.as_str()) {
// Guard: don't fuzzy-match words that look like misplaced global flags
// (e.g., "robot" should not match "robot-docs")
let as_flag = format!("--{lower}");
let is_flag_word = GLOBAL_FLAGS
.iter()
.any(|f| f.eq_ignore_ascii_case(&as_flag));
// Guard: don't fuzzy-match if it's a valid prefix of a canonical command
// (clap's infer_subcommands handles prefix resolution)
let is_prefix = CANONICAL_SUBCOMMANDS
.iter()
.any(|cmd| cmd.starts_with(&*lower) && *cmd != lower);
if !is_flag_word && !is_prefix {
let best = CANONICAL_SUBCOMMANDS
.iter()
.map(|cmd| (*cmd, jaro_winkler(&lower, cmd)))
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
if let Some((cmd, score)) = best
&& score >= FUZZY_SUBCMD_THRESHOLD
{
corrections.push(Correction {
original: args[i].clone(),
corrected: cmd.to_string(),
rule: CorrectionRule::SubcommandFuzzy,
confidence: score,
});
args[i] = cmd.to_string();
}
}
}
}
} else {
// A3: No subcommand detected — check for flag-style subcommands.
// Agents sometimes type `--robot-docs` or `--generate-docs` as flags.
let mut flag_as_subcmd: Option<(usize, String)> = None;
let mut flag_skip = false;
for (i, arg) in args.iter().enumerate().skip(1) {
if flag_skip {
flag_skip = false;
continue;
}
if !arg.starts_with("--") || arg.contains('=') {
continue;
}
let arg_lower = arg.to_lowercase();
// Skip clap built-in flags (--help, --version)
if CLAP_BUILTINS
.iter()
.any(|b| b.eq_ignore_ascii_case(&arg_lower))
{
continue;
}
// Skip known global flags
if GLOBAL_FLAGS.iter().any(|f| f.to_lowercase() == arg_lower) {
if matches!(arg_lower.as_str(), "--config" | "--color" | "--log-format") {
flag_skip = true;
}
continue;
}
let stripped = arg_lower[2..].to_string();
if CANONICAL_SUBCOMMANDS.contains(&stripped.as_str()) {
flag_as_subcmd = Some((i, stripped));
break;
}
}
if let Some((i, subcmd)) = flag_as_subcmd {
corrections.push(Correction {
original: args[i].clone(),
corrected: subcmd.clone(),
rule: CorrectionRule::FlagAsSubcommand,
confidence: 1.0,
});
args[i] = subcmd;
}
}
args
}
@@ -888,6 +1027,18 @@ pub fn format_teaching_note(correction: &Correction) -> String {
correction.corrected, correction.original
)
}
CorrectionRule::SubcommandFuzzy => {
format!(
"Correct command spelling: lore {} (not lore {})",
correction.corrected, correction.original
)
}
CorrectionRule::FlagAsSubcommand => {
format!(
"Commands are positional, not flags: lore {} (not lore --{})",
correction.corrected, correction.corrected
)
}
CorrectionRule::ValueNormalization => {
format!(
"Values are lowercase: {} (not {})",
@@ -1451,6 +1602,198 @@ mod tests {
assert_eq!(detect_subcommand(&args("lore --robot")), None);
}
// ---- Fuzzy subcommand matching (A2) ----
#[test]
fn fuzzy_subcommand_issuess() {
let result = correct_args(args("lore --robot issuess -n 10"), false);
assert!(
result
.corrections
.iter()
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy && c.corrected == "issues"),
"expected 'issuess' to fuzzy-match 'issues'"
);
assert!(result.args.contains(&"issues".to_string()));
}
#[test]
fn fuzzy_subcommand_timline() {
let result = correct_args(args("lore timline \"auth\""), false);
assert!(
result.corrections.iter().any(|c| c.corrected == "timeline"),
"expected 'timline' to fuzzy-match 'timeline'"
);
}
#[test]
fn fuzzy_subcommand_serach() {
let result = correct_args(args("lore --robot serach \"auth bug\""), false);
assert!(
result.corrections.iter().any(|c| c.corrected == "search"),
"expected 'serach' to fuzzy-match 'search'"
);
}
#[test]
fn fuzzy_subcommand_already_valid_untouched() {
let result = correct_args(args("lore issues -n 10"), false);
assert!(
!result
.corrections
.iter()
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy)
);
}
#[test]
fn fuzzy_subcommand_robot_not_matched_to_robot_docs() {
// "robot" looks like a misplaced --robot flag, not a typo for "robot-docs"
let result = correct_args(args("lore robot issues"), false);
assert!(
!result
.corrections
.iter()
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
"expected 'robot' NOT to fuzzy-match 'robot-docs' (it's a misplaced flag)"
);
}
#[test]
fn fuzzy_subcommand_prefix_deferred_to_clap() {
// "iss" is a prefix of "issues" — clap's infer_subcommands handles this
let result = correct_args(args("lore iss -n 10"), false);
assert!(
!result
.corrections
.iter()
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
"expected prefix 'iss' NOT to be fuzzy-matched (clap handles it)"
);
}
#[test]
fn fuzzy_subcommand_wildly_wrong_not_matched() {
let result = correct_args(args("lore xyzzyplugh"), false);
assert!(
!result
.corrections
.iter()
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
"expected gibberish NOT to fuzzy-match any command"
);
}
// ---- Flag-as-subcommand (A3) ----
#[test]
fn flag_as_subcommand_robot_docs() {
let result = correct_args(args("lore --robot-docs"), false);
assert!(
result
.corrections
.iter()
.any(|c| c.rule == CorrectionRule::FlagAsSubcommand && c.corrected == "robot-docs"),
"expected '--robot-docs' to be corrected to 'robot-docs'"
);
assert!(result.args.contains(&"robot-docs".to_string()));
}
#[test]
fn flag_as_subcommand_generate_docs() {
let result = correct_args(args("lore --generate-docs"), false);
assert!(
result
.corrections
.iter()
.any(|c| c.corrected == "generate-docs"),
"expected '--generate-docs' to be corrected to 'generate-docs'"
);
}
#[test]
fn flag_as_subcommand_with_robot_flag() {
// `lore --robot --robot-docs` — --robot is a valid global flag, --robot-docs is not
let result = correct_args(args("lore --robot --robot-docs"), false);
assert!(
result
.corrections
.iter()
.any(|c| c.corrected == "robot-docs"),
);
assert_eq!(result.args, args("lore --robot robot-docs"));
}
#[test]
fn flag_as_subcommand_does_not_touch_real_flags() {
// --robot is a real global flag, should NOT be rewritten to "robot"
let result = correct_args(args("lore --robot issues"), false);
assert!(
!result
.corrections
.iter()
.any(|c| c.rule == CorrectionRule::FlagAsSubcommand),
);
}
#[test]
fn flag_as_subcommand_not_triggered_when_subcommand_present() {
// A subcommand IS detected, so A3 shouldn't activate
let result = correct_args(args("lore issues --robot-docs"), false);
assert!(
!result
.corrections
.iter()
.any(|c| c.rule == CorrectionRule::FlagAsSubcommand),
"expected A3 not to trigger when subcommand is already present"
);
}
// ---- Teaching notes for new rules ----
#[test]
fn teaching_note_subcommand_fuzzy() {
let c = Correction {
original: "issuess".to_string(),
corrected: "issues".to_string(),
rule: CorrectionRule::SubcommandFuzzy,
confidence: 0.92,
};
let note = format_teaching_note(&c);
assert!(note.contains("spelling"));
assert!(note.contains("issues"));
}
#[test]
fn teaching_note_flag_as_subcommand() {
let c = Correction {
original: "--robot-docs".to_string(),
corrected: "robot-docs".to_string(),
rule: CorrectionRule::FlagAsSubcommand,
confidence: 1.0,
};
let note = format_teaching_note(&c);
assert!(note.contains("positional"));
assert!(note.contains("robot-docs"));
}
// ---- Canonical subcommands registry drift test ----
#[test]
fn canonical_subcommands_covers_clap() {
use clap::CommandFactory;
let cmd = crate::cli::Cli::command();
for sub in cmd.get_subcommands() {
let name = sub.get_name();
assert!(
CANONICAL_SUBCOMMANDS.contains(&name),
"Clap subcommand '{name}' is missing from CANONICAL_SUBCOMMANDS. \
Add it to autocorrect.rs."
);
}
}
// ---- Registry drift test ----
// This test uses clap introspection to verify our static registry covers
// all long flags defined in the Cli struct.