feat(autocorrect): add fuzzy subcommand matching and flag-as-subcommand detection

Extend the CLI autocorrection pipeline with two new correction rules that help agents recover from common typos and misunderstandings: 1. SubcommandFuzzy (threshold 0.85): Fuzzy-matches typo'd subcommands against the canonical list. Examples: - "issuess" → "issues" - "timline" → "timeline" - "serach" → "search" Guards prevent false positives: - Words that look like misplaced global flags are skipped - Valid command prefixes are left to clap's infer_subcommands 2. FlagAsSubcommand: Detects when agents type subcommands as flags. Some agents (especially Codex) assume `--robot-docs` is a flag rather than a subcommand. This rule converts: - "--robot-docs" → "robot-docs" - "--generate-docs" → "generate-docs" Also improves error messages in main.rs: - MissingRequiredArgument: Contextual example based on detected subcommand - MissingSubcommand: Lists common commands - TooFewValues/TooManyValues: Command-specific help hints Added CANONICAL_SUBCOMMANDS constant enumerating all valid subcommands (including hidden ones) for fuzzy matching. This ensures agents that know about hidden commands still get typo correction. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-06 11:15:15 -05:00
parent ffbd1e2dce
commit 1dfcfd3f83
2 changed files with 461 additions and 22 deletions
--- a/src/cli/autocorrect.rs
+++ b/src/cli/autocorrect.rs
@@ -22,6 +22,10 @@ pub enum CorrectionRule {
    CaseNormalization,
    FuzzyFlag,
    SubcommandAlias,
+    /// Fuzzy subcommand match: "issuess" → "issues"
+    SubcommandFuzzy,
+    /// Flag-style subcommand: "--robot-docs" → "robot-docs"
+    FlagAsSubcommand,
    ValueNormalization,
    ValueFuzzy,
    FlagPrefix,
@@ -294,6 +298,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
            "--issues",
            "--mrs",
            "--activity",
+            "--mentions",
            "--since",
            "--project",
            "--all",
@@ -351,6 +356,51 @@ const FUZZY_FLAG_THRESHOLD: f64 = 0.8;
 /// avoid misleading agents. Still catches obvious typos like `--projct`.
 const FUZZY_FLAG_THRESHOLD_STRICT: f64 = 0.9;

+/// Fuzzy subcommand threshold — higher than flags because subcommand names
+/// are shorter words where JW scores inflate more easily.
+const FUZZY_SUBCMD_THRESHOLD: f64 = 0.85;
+
+/// All canonical subcommand names for fuzzy matching and flag-as-subcommand
+/// detection. Includes hidden commands so agents that know about them can
+/// still benefit from typo correction.
+const CANONICAL_SUBCOMMANDS: &[&str] = &[
+    "issues",
+    "mrs",
+    "notes",
+    "ingest",
+    "count",
+    "status",
+    "auth",
+    "doctor",
+    "version",
+    "init",
+    "search",
+    "stats",
+    "generate-docs",
+    "embed",
+    "sync",
+    "migrate",
+    "health",
+    "robot-docs",
+    "completions",
+    "timeline",
+    "who",
+    "me",
+    "file-history",
+    "trace",
+    "drift",
+    "related",
+    "cron",
+    "token",
+    // Hidden but still valid
+    "backup",
+    "reset",
+    "list",
+    "show",
+    "auth-test",
+    "sync-status",
+];
+
 // ---------------------------------------------------------------------------
 // Core logic
 // ---------------------------------------------------------------------------
@@ -474,13 +524,15 @@ pub fn correct_args(raw: Vec<String>, strict: bool) -> CorrectionResult {
    }
 }

-/// Phase A: Replace subcommand aliases with their canonical names.
+/// Phase A: Replace subcommand aliases with their canonical names, fuzzy-match
+/// typo'd subcommands, and detect flag-style subcommands (`--robot-docs`).
 ///
-/// Handles forms that can't be expressed as clap `alias`/`visible_alias`
-/// (underscores, no-separator forms). Case-insensitive matching.
+/// Three-step pipeline:
+/// - A1: Exact alias match (underscore/no-separator forms)
+/// - A2: Fuzzy subcommand match ("issuess" → "issues")
+/// - A3: Flag-as-subcommand ("--robot-docs" → "robot-docs")
 fn correct_subcommand(mut args: Vec<String>, corrections: &mut Vec<Correction>) -> Vec<String> {
-    // Find the subcommand position index, then check the alias map.
-    // Can't use iterators easily because we need to mutate args[i].
+    // Find the subcommand position index.
    let mut skip_next = false;
    let mut subcmd_idx = None;
    for (i, arg) in args.iter().enumerate().skip(1) {
@@ -500,19 +552,106 @@ fn correct_subcommand(mut args: Vec<String>, corrections: &mut Vec<Correction>)
        subcmd_idx = Some(i);
        break;
    }
-    if let Some(i) = subcmd_idx
-        && let Some((_, canonical)) = SUBCOMMAND_ALIASES
+
+    if let Some(i) = subcmd_idx {
+        // A1: Exact alias match (existing logic)
+        if let Some((_, canonical)) = SUBCOMMAND_ALIASES
            .iter()
            .find(|(alias, _)| alias.eq_ignore_ascii_case(&args[i]))
-    {
-        corrections.push(Correction {
-            original: args[i].clone(),
-            corrected: (*canonical).to_string(),
-            rule: CorrectionRule::SubcommandAlias,
-            confidence: 1.0,
-        });
-        args[i] = (*canonical).to_string();
+        {
+            corrections.push(Correction {
+                original: args[i].clone(),
+                corrected: (*canonical).to_string(),
+                rule: CorrectionRule::SubcommandAlias,
+                confidence: 1.0,
+            });
+            args[i] = (*canonical).to_string();
+        }
+        // A2: Fuzzy subcommand match — only if not already a canonical name
+        else {
+            let lower = args[i].to_lowercase();
+            if !CANONICAL_SUBCOMMANDS.contains(&lower.as_str()) {
+                // Guard: don't fuzzy-match words that look like misplaced global flags
+                // (e.g., "robot" should not match "robot-docs")
+                let as_flag = format!("--{lower}");
+                let is_flag_word = GLOBAL_FLAGS
+                    .iter()
+                    .any(|f| f.eq_ignore_ascii_case(&as_flag));
+
+                // Guard: don't fuzzy-match if it's a valid prefix of a canonical command
+                // (clap's infer_subcommands handles prefix resolution)
+                let is_prefix = CANONICAL_SUBCOMMANDS
+                    .iter()
+                    .any(|cmd| cmd.starts_with(&*lower) && *cmd != lower);
+
+                if !is_flag_word && !is_prefix {
+                    let best = CANONICAL_SUBCOMMANDS
+                        .iter()
+                        .map(|cmd| (*cmd, jaro_winkler(&lower, cmd)))
+                        .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
+
+                    if let Some((cmd, score)) = best
+                        && score >= FUZZY_SUBCMD_THRESHOLD
+                    {
+                        corrections.push(Correction {
+                            original: args[i].clone(),
+                            corrected: cmd.to_string(),
+                            rule: CorrectionRule::SubcommandFuzzy,
+                            confidence: score,
+                        });
+                        args[i] = cmd.to_string();
+                    }
+                }
+            }
+        }
+    } else {
+        // A3: No subcommand detected — check for flag-style subcommands.
+        // Agents sometimes type `--robot-docs` or `--generate-docs` as flags.
+        let mut flag_as_subcmd: Option<(usize, String)> = None;
+        let mut flag_skip = false;
+        for (i, arg) in args.iter().enumerate().skip(1) {
+            if flag_skip {
+                flag_skip = false;
+                continue;
+            }
+            if !arg.starts_with("--") || arg.contains('=') {
+                continue;
+            }
+
+            let arg_lower = arg.to_lowercase();
+            // Skip clap built-in flags (--help, --version)
+            if CLAP_BUILTINS
+                .iter()
+                .any(|b| b.eq_ignore_ascii_case(&arg_lower))
+            {
+                continue;
+            }
+            // Skip known global flags
+            if GLOBAL_FLAGS.iter().any(|f| f.to_lowercase() == arg_lower) {
+                if matches!(arg_lower.as_str(), "--config" | "--color" | "--log-format") {
+                    flag_skip = true;
+                }
+                continue;
+            }
+
+            let stripped = arg_lower[2..].to_string();
+            if CANONICAL_SUBCOMMANDS.contains(&stripped.as_str()) {
+                flag_as_subcmd = Some((i, stripped));
+                break;
+            }
+        }
+
+        if let Some((i, subcmd)) = flag_as_subcmd {
+            corrections.push(Correction {
+                original: args[i].clone(),
+                corrected: subcmd.clone(),
+                rule: CorrectionRule::FlagAsSubcommand,
+                confidence: 1.0,
+            });
+            args[i] = subcmd;
+        }
    }
+
    args
 }

@@ -888,6 +1027,18 @@ pub fn format_teaching_note(correction: &Correction) -> String {
                correction.corrected, correction.original
            )
        }
+        CorrectionRule::SubcommandFuzzy => {
+            format!(
+                "Correct command spelling: lore {} (not lore {})",
+                correction.corrected, correction.original
+            )
+        }
+        CorrectionRule::FlagAsSubcommand => {
+            format!(
+                "Commands are positional, not flags: lore {} (not lore --{})",
+                correction.corrected, correction.corrected
+            )
+        }
        CorrectionRule::ValueNormalization => {
            format!(
                "Values are lowercase: {} (not {})",
@@ -1451,6 +1602,198 @@ mod tests {
        assert_eq!(detect_subcommand(&args("lore --robot")), None);
    }

+    // ---- Fuzzy subcommand matching (A2) ----
+
+    #[test]
+    fn fuzzy_subcommand_issuess() {
+        let result = correct_args(args("lore --robot issuess -n 10"), false);
+        assert!(
+            result
+                .corrections
+                .iter()
+                .any(|c| c.rule == CorrectionRule::SubcommandFuzzy && c.corrected == "issues"),
+            "expected 'issuess' to fuzzy-match 'issues'"
+        );
+        assert!(result.args.contains(&"issues".to_string()));
+    }
+
+    #[test]
+    fn fuzzy_subcommand_timline() {
+        let result = correct_args(args("lore timline \"auth\""), false);
+        assert!(
+            result.corrections.iter().any(|c| c.corrected == "timeline"),
+            "expected 'timline' to fuzzy-match 'timeline'"
+        );
+    }
+
+    #[test]
+    fn fuzzy_subcommand_serach() {
+        let result = correct_args(args("lore --robot serach \"auth bug\""), false);
+        assert!(
+            result.corrections.iter().any(|c| c.corrected == "search"),
+            "expected 'serach' to fuzzy-match 'search'"
+        );
+    }
+
+    #[test]
+    fn fuzzy_subcommand_already_valid_untouched() {
+        let result = correct_args(args("lore issues -n 10"), false);
+        assert!(
+            !result
+                .corrections
+                .iter()
+                .any(|c| c.rule == CorrectionRule::SubcommandFuzzy)
+        );
+    }
+
+    #[test]
+    fn fuzzy_subcommand_robot_not_matched_to_robot_docs() {
+        // "robot" looks like a misplaced --robot flag, not a typo for "robot-docs"
+        let result = correct_args(args("lore robot issues"), false);
+        assert!(
+            !result
+                .corrections
+                .iter()
+                .any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
+            "expected 'robot' NOT to fuzzy-match 'robot-docs' (it's a misplaced flag)"
+        );
+    }
+
+    #[test]
+    fn fuzzy_subcommand_prefix_deferred_to_clap() {
+        // "iss" is a prefix of "issues" — clap's infer_subcommands handles this
+        let result = correct_args(args("lore iss -n 10"), false);
+        assert!(
+            !result
+                .corrections
+                .iter()
+                .any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
+            "expected prefix 'iss' NOT to be fuzzy-matched (clap handles it)"
+        );
+    }
+
+    #[test]
+    fn fuzzy_subcommand_wildly_wrong_not_matched() {
+        let result = correct_args(args("lore xyzzyplugh"), false);
+        assert!(
+            !result
+                .corrections
+                .iter()
+                .any(|c| c.rule == CorrectionRule::SubcommandFuzzy),
+            "expected gibberish NOT to fuzzy-match any command"
+        );
+    }
+
+    // ---- Flag-as-subcommand (A3) ----
+
+    #[test]
+    fn flag_as_subcommand_robot_docs() {
+        let result = correct_args(args("lore --robot-docs"), false);
+        assert!(
+            result
+                .corrections
+                .iter()
+                .any(|c| c.rule == CorrectionRule::FlagAsSubcommand && c.corrected == "robot-docs"),
+            "expected '--robot-docs' to be corrected to 'robot-docs'"
+        );
+        assert!(result.args.contains(&"robot-docs".to_string()));
+    }
+
+    #[test]
+    fn flag_as_subcommand_generate_docs() {
+        let result = correct_args(args("lore --generate-docs"), false);
+        assert!(
+            result
+                .corrections
+                .iter()
+                .any(|c| c.corrected == "generate-docs"),
+            "expected '--generate-docs' to be corrected to 'generate-docs'"
+        );
+    }
+
+    #[test]
+    fn flag_as_subcommand_with_robot_flag() {
+        // `lore --robot --robot-docs` — --robot is a valid global flag, --robot-docs is not
+        let result = correct_args(args("lore --robot --robot-docs"), false);
+        assert!(
+            result
+                .corrections
+                .iter()
+                .any(|c| c.corrected == "robot-docs"),
+        );
+        assert_eq!(result.args, args("lore --robot robot-docs"));
+    }
+
+    #[test]
+    fn flag_as_subcommand_does_not_touch_real_flags() {
+        // --robot is a real global flag, should NOT be rewritten to "robot"
+        let result = correct_args(args("lore --robot issues"), false);
+        assert!(
+            !result
+                .corrections
+                .iter()
+                .any(|c| c.rule == CorrectionRule::FlagAsSubcommand),
+        );
+    }
+
+    #[test]
+    fn flag_as_subcommand_not_triggered_when_subcommand_present() {
+        // A subcommand IS detected, so A3 shouldn't activate
+        let result = correct_args(args("lore issues --robot-docs"), false);
+        assert!(
+            !result
+                .corrections
+                .iter()
+                .any(|c| c.rule == CorrectionRule::FlagAsSubcommand),
+            "expected A3 not to trigger when subcommand is already present"
+        );
+    }
+
+    // ---- Teaching notes for new rules ----
+
+    #[test]
+    fn teaching_note_subcommand_fuzzy() {
+        let c = Correction {
+            original: "issuess".to_string(),
+            corrected: "issues".to_string(),
+            rule: CorrectionRule::SubcommandFuzzy,
+            confidence: 0.92,
+        };
+        let note = format_teaching_note(&c);
+        assert!(note.contains("spelling"));
+        assert!(note.contains("issues"));
+    }
+
+    #[test]
+    fn teaching_note_flag_as_subcommand() {
+        let c = Correction {
+            original: "--robot-docs".to_string(),
+            corrected: "robot-docs".to_string(),
+            rule: CorrectionRule::FlagAsSubcommand,
+            confidence: 1.0,
+        };
+        let note = format_teaching_note(&c);
+        assert!(note.contains("positional"));
+        assert!(note.contains("robot-docs"));
+    }
+
+    // ---- Canonical subcommands registry drift test ----
+
+    #[test]
+    fn canonical_subcommands_covers_clap() {
+        use clap::CommandFactory;
+        let cmd = crate::cli::Cli::command();
+
+        for sub in cmd.get_subcommands() {
+            let name = sub.get_name();
+            assert!(
+                CANONICAL_SUBCOMMANDS.contains(&name),
+                "Clap subcommand '{name}' is missing from CANONICAL_SUBCOMMANDS. \
+                 Add it to autocorrect.rs."
+            );
+        }
+    }
+
    // ---- Registry drift test ----
    // This test uses clap introspection to verify our static registry covers
    // all long flags defined in the Cli struct.