Fix category synonym matching: deduplicate aliases, remove broken fast-path

Three issues in categoryAliasList/categoryMatcher:

1. categoryAliasList appended raw synonyms without deduplication—the
   addAlias helper already handles lowering and dedup, so route synonyms
   through it instead of direct append.

2. categoryMatcher.matches had a fast-path that returned false when the
   input contained no separators (-_ space), skipping the normalization
   step entirely. This caused legitimate matches like "frozen foods" vs
   "frozen" to fail when the input was a simple word that needed plural
   stripping to match.

3. normalizeCategory unconditionally replaced underscores/hyphens and
   re-joined fields even for inputs without separators. Gate the
   separator logic behind a ContainsAny check, and use direct slice
   indexing instead of TrimSuffix for the plural stripping.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 01:31:50 -05:00
parent 11a90d2fd1
commit 28479071ae

View File

@@ -60,7 +60,9 @@ func categoryAliasList(wanted string) []string {
addAlias(group) addAlias(group)
if synonyms, ok := categorySynonyms[group]; ok { if synonyms, ok := categorySynonyms[group]; ok {
out = append(out, synonyms...) for _, s := range synonyms {
addAlias(s)
}
} }
return out return out
} }
@@ -92,12 +94,6 @@ func (m categoryMatcher) matches(category string) bool {
} }
} }
// Fast path: if no separators are present and direct aliases didn't match,
// normalization would only add overhead for common categories like "grocery".
if !strings.ContainsAny(trimmed, "-_ ") {
return false
}
norm := normalizeCategory(trimmed) norm := normalizeCategory(trimmed)
_, ok := m.normalized[norm] _, ok := m.normalized[norm]
return ok return ok
@@ -108,14 +104,16 @@ func normalizeCategory(raw string) string {
if s == "" { if s == "" {
return "" return ""
} }
if strings.ContainsAny(s, "_-") {
s = strings.ReplaceAll(s, "_", " ") s = strings.ReplaceAll(s, "_", " ")
s = strings.ReplaceAll(s, "-", " ") s = strings.ReplaceAll(s, "-", " ")
s = strings.Join(strings.Fields(s), " ") s = strings.Join(strings.Fields(s), " ")
}
switch { switch {
case len(s) > 4 && strings.HasSuffix(s, "ies"): case len(s) > 4 && strings.HasSuffix(s, "ies"):
s = strings.TrimSuffix(s, "ies") + "y" s = s[:len(s)-3] + "y"
case len(s) > 3 && strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"): case len(s) > 3 && strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"):
s = strings.TrimSuffix(s, "s") s = s[:len(s)-1]
} }
return s return s
} }