Fix category synonym matching: deduplicate aliases, remove broken fast-path
Three issues in categoryAliasList/categoryMatcher: 1. categoryAliasList appended raw synonyms without deduplication—the addAlias helper already handles lowering and dedup, so route synonyms through it instead of direct append. 2. categoryMatcher.matches had a fast-path that returned false when the input contained no separators (-_ space), skipping the normalization step entirely. This caused legitimate matches like "frozen foods" vs "frozen" to fail when the input was a simple word that needed plural stripping to match. 3. normalizeCategory unconditionally replaced underscores/hyphens and re-joined fields even for inputs without separators. Gate the separator logic behind a ContainsAny check, and use direct slice indexing instead of TrimSuffix for the plural stripping. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -60,7 +60,9 @@ func categoryAliasList(wanted string) []string {
|
|||||||
addAlias(group)
|
addAlias(group)
|
||||||
|
|
||||||
if synonyms, ok := categorySynonyms[group]; ok {
|
if synonyms, ok := categorySynonyms[group]; ok {
|
||||||
out = append(out, synonyms...)
|
for _, s := range synonyms {
|
||||||
|
addAlias(s)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
@@ -92,12 +94,6 @@ func (m categoryMatcher) matches(category string) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fast path: if no separators are present and direct aliases didn't match,
|
|
||||||
// normalization would only add overhead for common categories like "grocery".
|
|
||||||
if !strings.ContainsAny(trimmed, "-_ ") {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
norm := normalizeCategory(trimmed)
|
norm := normalizeCategory(trimmed)
|
||||||
_, ok := m.normalized[norm]
|
_, ok := m.normalized[norm]
|
||||||
return ok
|
return ok
|
||||||
@@ -108,14 +104,16 @@ func normalizeCategory(raw string) string {
|
|||||||
if s == "" {
|
if s == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
s = strings.ReplaceAll(s, "_", " ")
|
if strings.ContainsAny(s, "_-") {
|
||||||
s = strings.ReplaceAll(s, "-", " ")
|
s = strings.ReplaceAll(s, "_", " ")
|
||||||
s = strings.Join(strings.Fields(s), " ")
|
s = strings.ReplaceAll(s, "-", " ")
|
||||||
|
s = strings.Join(strings.Fields(s), " ")
|
||||||
|
}
|
||||||
switch {
|
switch {
|
||||||
case len(s) > 4 && strings.HasSuffix(s, "ies"):
|
case len(s) > 4 && strings.HasSuffix(s, "ies"):
|
||||||
s = strings.TrimSuffix(s, "ies") + "y"
|
s = s[:len(s)-3] + "y"
|
||||||
case len(s) > 3 && strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"):
|
case len(s) > 3 && strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"):
|
||||||
s = strings.TrimSuffix(s, "s")
|
s = s[:len(s)-1]
|
||||||
}
|
}
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user