feat(autocorrect): add fuzzy subcommand matching and flag-as-subcommand detection
Extend the CLI autocorrection pipeline with two new correction rules that help agents recover from common typos and misunderstandings: 1. SubcommandFuzzy (threshold 0.85): Fuzzy-matches typo'd subcommands against the canonical list. Examples: - "issuess" → "issues" - "timline" → "timeline" - "serach" → "search" Guards prevent false positives: - Words that look like misplaced global flags are skipped - Valid command prefixes are left to clap's infer_subcommands 2. FlagAsSubcommand: Detects when agents type subcommands as flags. Some agents (especially Codex) assume `--robot-docs` is a flag rather than a subcommand. This rule converts: - "--robot-docs" → "robot-docs" - "--generate-docs" → "generate-docs" Also improves error messages in main.rs: - MissingRequiredArgument: Contextual example based on detected subcommand - MissingSubcommand: Lists common commands - TooFewValues/TooManyValues: Command-specific help hints Added CANONICAL_SUBCOMMANDS constant enumerating all valid subcommands (including hidden ones) for fuzzy matching. This ensures agents that know about hidden commands still get typo correction. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -22,6 +22,10 @@ pub enum CorrectionRule {
|
||||
CaseNormalization,
|
||||
FuzzyFlag,
|
||||
SubcommandAlias,
|
||||
/// Fuzzy subcommand match: "issuess" → "issues"
|
||||
SubcommandFuzzy,
|
||||
/// Flag-style subcommand: "--robot-docs" → "robot-docs"
|
||||
FlagAsSubcommand,
|
||||
ValueNormalization,
|
||||
ValueFuzzy,
|
||||
FlagPrefix,
|
||||
@@ -294,6 +298,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
|
||||
"--issues",
|
||||
"--mrs",
|
||||
"--activity",
|
||||
"--mentions",
|
||||
"--since",
|
||||
"--project",
|
||||
"--all",
|
||||
@@ -351,6 +356,51 @@ const FUZZY_FLAG_THRESHOLD: f64 = 0.8;
|
||||
/// avoid misleading agents. Still catches obvious typos like `--projct`.
|
||||
const FUZZY_FLAG_THRESHOLD_STRICT: f64 = 0.9;
|
||||
|
||||
/// Fuzzy subcommand threshold — higher than flags because subcommand names
|
||||
/// are shorter words where JW scores inflate more easily.
|
||||
const FUZZY_SUBCMD_THRESHOLD: f64 = 0.85;
|
||||
|
||||
/// All canonical subcommand names for fuzzy matching and flag-as-subcommand
|
||||
/// detection. Includes hidden commands so agents that know about them can
|
||||
/// still benefit from typo correction.
|
||||
const CANONICAL_SUBCOMMANDS: &[&str] = &[
|
||||
"issues",
|
||||
"mrs",
|
||||
"notes",
|
||||
"ingest",
|
||||
"count",
|
||||
"status",
|
||||
"auth",
|
||||
"doctor",
|
||||
"version",
|
||||
"init",
|
||||
"search",
|
||||
"stats",
|
||||
"generate-docs",
|
||||
"embed",
|
||||
"sync",
|
||||
"migrate",
|
||||
"health",
|
||||
"robot-docs",
|
||||
"completions",
|
||||
"timeline",
|
||||
"who",
|
||||
"me",
|
||||
"file-history",
|
||||
"trace",
|
||||
"drift",
|
||||
"related",
|
||||
"cron",
|
||||
"token",
|
||||
// Hidden but still valid
|
||||
"backup",
|
||||
"reset",
|
||||
"list",
|
||||
"show",
|
||||
"auth-test",
|
||||
"sync-status",
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Core logic
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -474,13 +524,15 @@ pub fn correct_args(raw: Vec<String>, strict: bool) -> CorrectionResult {
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase A: Replace subcommand aliases with their canonical names.
|
||||
/// Phase A: Replace subcommand aliases with their canonical names, fuzzy-match
|
||||
/// typo'd subcommands, and detect flag-style subcommands (`--robot-docs`).
|
||||
///
|
||||
/// Handles forms that can't be expressed as clap `alias`/`visible_alias`
|
||||
/// (underscores, no-separator forms). Case-insensitive matching.
|
||||
/// Three-step pipeline:
|
||||
/// - A1: Exact alias match (underscore/no-separator forms)
|
||||
/// - A2: Fuzzy subcommand match ("issuess" → "issues")
|
||||
/// - A3: Flag-as-subcommand ("--robot-docs" → "robot-docs")
|
||||
fn correct_subcommand(mut args: Vec<String>, corrections: &mut Vec<Correction>) -> Vec<String> {
|
||||
// Find the subcommand position index, then check the alias map.
|
||||
// Can't use iterators easily because we need to mutate args[i].
|
||||
// Find the subcommand position index.
|
||||
let mut skip_next = false;
|
||||
let mut subcmd_idx = None;
|
||||
for (i, arg) in args.iter().enumerate().skip(1) {
|
||||
@@ -500,19 +552,106 @@ fn correct_subcommand(mut args: Vec<String>, corrections: &mut Vec<Correction>)
|
||||
subcmd_idx = Some(i);
|
||||
break;
|
||||
}
|
||||
if let Some(i) = subcmd_idx
|
||||
&& let Some((_, canonical)) = SUBCOMMAND_ALIASES
|
||||
|
||||
if let Some(i) = subcmd_idx {
|
||||
// A1: Exact alias match (existing logic)
|
||||
if let Some((_, canonical)) = SUBCOMMAND_ALIASES
|
||||
.iter()
|
||||
.find(|(alias, _)| alias.eq_ignore_ascii_case(&args[i]))
|
||||
{
|
||||
corrections.push(Correction {
|
||||
original: args[i].clone(),
|
||||
corrected: (*canonical).to_string(),
|
||||
rule: CorrectionRule::SubcommandAlias,
|
||||
confidence: 1.0,
|
||||
});
|
||||
args[i] = (*canonical).to_string();
|
||||
{
|
||||
corrections.push(Correction {
|
||||
original: args[i].clone(),
|
||||
corrected: (*canonical).to_string(),
|
||||
rule: CorrectionRule::SubcommandAlias,
|
||||
confidence: 1.0,
|
||||
});
|
||||
args[i] = (*canonical).to_string();
|
||||
}
|
||||
// A2: Fuzzy subcommand match — only if not already a canonical name
|
||||
else {
|
||||
let lower = args[i].to_lowercase();
|
||||
if !CANONICAL_SUBCOMMANDS.contains(&lower.as_str()) {
|
||||
// Guard: don't fuzzy-match words that look like misplaced global flags
|
||||
// (e.g., "robot" should not match "robot-docs")
|
||||
let as_flag = format!("--{lower}");
|
||||
let is_flag_word = GLOBAL_FLAGS
|
||||
.iter()
|
||||
.any(|f| f.eq_ignore_ascii_case(&as_flag));
|
||||
|
||||
// Guard: don't fuzzy-match if it's a valid prefix of a canonical command
|
||||
// (clap's infer_subcommands handles prefix resolution)
|
||||
let is_prefix = CANONICAL_SUBCOMMANDS
|
||||
.iter()
|
||||
.any(|cmd| cmd.starts_with(&*lower) && *cmd != lower);
|
||||
|
||||
if !is_flag_word && !is_prefix {
|
||||
let best = CANONICAL_SUBCOMMANDS
|
||||
.iter()
|
||||
.map(|cmd| (*cmd, jaro_winkler(&lower, cmd)))
|
||||
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
if let Some((cmd, score)) = best
|
||||
&& score >= FUZZY_SUBCMD_THRESHOLD
|
||||
{
|
||||
corrections.push(Correction {
|
||||
original: args[i].clone(),
|
||||
corrected: cmd.to_string(),
|
||||
rule: CorrectionRule::SubcommandFuzzy,
|
||||
confidence: score,
|
||||
});
|
||||
args[i] = cmd.to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// A3: No subcommand detected — check for flag-style subcommands.
|
||||
// Agents sometimes type `--robot-docs` or `--generate-docs` as flags.
|
||||
let mut flag_as_subcmd: Option<(usize, String)> = None;
|
||||
let mut flag_skip = false;
|
||||
for (i, arg) in args.iter().enumerate().skip(1) {
|
||||
if flag_skip {
|
||||
flag_skip = false;
|
||||
continue;
|
||||
}
|
||||
if !arg.starts_with("--") || arg.contains('=') {
|
||||
continue;
|
||||
}
|
||||
|
||||
let arg_lower = arg.to_lowercase();
|
||||
// Skip clap built-in flags (--help, --version)
|
||||
if CLAP_BUILTINS
|
||||
.iter()
|
||||
.any(|b| b.eq_ignore_ascii_case(&arg_lower))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// Skip known global flags
|
||||
if GLOBAL_FLAGS.iter().any(|f| f.to_lowercase() == arg_lower) {
|
||||
if matches!(arg_lower.as_str(), "--config" | "--color" | "--log-format") {
|
||||
flag_skip = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let stripped = arg_lower[2..].to_string();
|
||||
if CANONICAL_SUBCOMMANDS.contains(&stripped.as_str()) {
|
||||
flag_as_subcmd = Some((i, stripped));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((i, subcmd)) = flag_as_subcmd {
|
||||
corrections.push(Correction {
|
||||
original: args[i].clone(),
|
||||
corrected: subcmd.clone(),
|
||||
rule: CorrectionRule::FlagAsSubcommand,
|
||||
confidence: 1.0,
|
||||
});
|
||||
args[i] = subcmd;
|
||||
}
|
||||
}
|
||||
|
||||
args
|
||||
}
|
||||
|
||||
@@ -888,6 +1027,18 @@ pub fn format_teaching_note(correction: &Correction) -> String {
|
||||
correction.corrected, correction.original
|
||||
)
|
||||
}
|
||||
CorrectionRule::SubcommandFuzzy => {
|
||||
format!(
|
||||
"Correct command spelling: lore {} (not lore {})",
|
||||
correction.corrected, correction.original
|
||||
)
|
||||
}
|
||||
CorrectionRule::FlagAsSubcommand => {
|
||||
format!(
|
||||
"Commands are positional, not flags: lore {} (not lore --{})",
|
||||
correction.corrected, correction.corrected
|
||||
)
|
||||
}
|
||||
CorrectionRule::ValueNormalization => {
|
||||
format!(
|
||||
"Values are lowercase: {} (not {})",
|
||||
@@ -1451,6 +1602,198 @@ mod tests {
|
||||
assert_eq!(detect_subcommand(&args("lore --robot")), None);
|
||||
}
|
||||
|
||||
// ---- Fuzzy subcommand matching (A2) ----
|
||||
|
||||
#[test]
|
||||
fn fuzzy_subcommand_issuess() {
|
||||
let result = correct_args(args("lore --robot issuess -n 10"), false);
|
||||
assert!(
|
||||
result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy && c.corrected == "issues"),
|
||||
"expected 'issuess' to fuzzy-match 'issues'"
|
||||
);
|
||||
assert!(result.args.contains(&"issues".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fuzzy_subcommand_timline() {
|
||||
let result = correct_args(args("lore timline \"auth\""), false);
|
||||
assert!(
|
||||
result.corrections.iter().any(|c| c.corrected == "timeline"),
|
||||
"expected 'timline' to fuzzy-match 'timeline'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fuzzy_subcommand_serach() {
|
||||
let result = correct_args(args("lore --robot serach \"auth bug\""), false);
|
||||
assert!(
|
||||
result.corrections.iter().any(|c| c.corrected == "search"),
|
||||
"expected 'serach' to fuzzy-match 'search'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fuzzy_subcommand_already_valid_untouched() {
|
||||
let result = correct_args(args("lore issues -n 10"), false);
|
||||
assert!(
|
||||
!result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fuzzy_subcommand_robot_not_matched_to_robot_docs() {
|
||||
// "robot" looks like a misplaced --robot flag, not a typo for "robot-docs"
|
||||
let result = correct_args(args("lore robot issues"), false);
|
||||
assert!(
|
||||
!result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
|
||||
"expected 'robot' NOT to fuzzy-match 'robot-docs' (it's a misplaced flag)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fuzzy_subcommand_prefix_deferred_to_clap() {
|
||||
// "iss" is a prefix of "issues" — clap's infer_subcommands handles this
|
||||
let result = correct_args(args("lore iss -n 10"), false);
|
||||
assert!(
|
||||
!result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
|
||||
"expected prefix 'iss' NOT to be fuzzy-matched (clap handles it)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fuzzy_subcommand_wildly_wrong_not_matched() {
|
||||
let result = correct_args(args("lore xyzzyplugh"), false);
|
||||
assert!(
|
||||
!result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
|
||||
"expected gibberish NOT to fuzzy-match any command"
|
||||
);
|
||||
}
|
||||
|
||||
// ---- Flag-as-subcommand (A3) ----
|
||||
|
||||
#[test]
|
||||
fn flag_as_subcommand_robot_docs() {
|
||||
let result = correct_args(args("lore --robot-docs"), false);
|
||||
assert!(
|
||||
result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.rule == CorrectionRule::FlagAsSubcommand && c.corrected == "robot-docs"),
|
||||
"expected '--robot-docs' to be corrected to 'robot-docs'"
|
||||
);
|
||||
assert!(result.args.contains(&"robot-docs".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_as_subcommand_generate_docs() {
|
||||
let result = correct_args(args("lore --generate-docs"), false);
|
||||
assert!(
|
||||
result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.corrected == "generate-docs"),
|
||||
"expected '--generate-docs' to be corrected to 'generate-docs'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_as_subcommand_with_robot_flag() {
|
||||
// `lore --robot --robot-docs` — --robot is a valid global flag, --robot-docs is not
|
||||
let result = correct_args(args("lore --robot --robot-docs"), false);
|
||||
assert!(
|
||||
result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.corrected == "robot-docs"),
|
||||
);
|
||||
assert_eq!(result.args, args("lore --robot robot-docs"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_as_subcommand_does_not_touch_real_flags() {
|
||||
// --robot is a real global flag, should NOT be rewritten to "robot"
|
||||
let result = correct_args(args("lore --robot issues"), false);
|
||||
assert!(
|
||||
!result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.rule == CorrectionRule::FlagAsSubcommand),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_as_subcommand_not_triggered_when_subcommand_present() {
|
||||
// A subcommand IS detected, so A3 shouldn't activate
|
||||
let result = correct_args(args("lore issues --robot-docs"), false);
|
||||
assert!(
|
||||
!result
|
||||
.corrections
|
||||
.iter()
|
||||
.any(|c| c.rule == CorrectionRule::FlagAsSubcommand),
|
||||
"expected A3 not to trigger when subcommand is already present"
|
||||
);
|
||||
}
|
||||
|
||||
// ---- Teaching notes for new rules ----
|
||||
|
||||
#[test]
|
||||
fn teaching_note_subcommand_fuzzy() {
|
||||
let c = Correction {
|
||||
original: "issuess".to_string(),
|
||||
corrected: "issues".to_string(),
|
||||
rule: CorrectionRule::SubcommandFuzzy,
|
||||
confidence: 0.92,
|
||||
};
|
||||
let note = format_teaching_note(&c);
|
||||
assert!(note.contains("spelling"));
|
||||
assert!(note.contains("issues"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn teaching_note_flag_as_subcommand() {
|
||||
let c = Correction {
|
||||
original: "--robot-docs".to_string(),
|
||||
corrected: "robot-docs".to_string(),
|
||||
rule: CorrectionRule::FlagAsSubcommand,
|
||||
confidence: 1.0,
|
||||
};
|
||||
let note = format_teaching_note(&c);
|
||||
assert!(note.contains("positional"));
|
||||
assert!(note.contains("robot-docs"));
|
||||
}
|
||||
|
||||
// ---- Canonical subcommands registry drift test ----
|
||||
|
||||
#[test]
|
||||
fn canonical_subcommands_covers_clap() {
|
||||
use clap::CommandFactory;
|
||||
let cmd = crate::cli::Cli::command();
|
||||
|
||||
for sub in cmd.get_subcommands() {
|
||||
let name = sub.get_name();
|
||||
assert!(
|
||||
CANONICAL_SUBCOMMANDS.contains(&name),
|
||||
"Clap subcommand '{name}' is missing from CANONICAL_SUBCOMMANDS. \
|
||||
Add it to autocorrect.rs."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Registry drift test ----
|
||||
// This test uses clap introspection to verify our static registry covers
|
||||
// all long flags defined in the Cli struct.
|
||||
|
||||
Reference in New Issue
Block a user