Enhance filter pipeline with synonym-aware categories and deal sorting
- extend filter.Options with sort mode support and keep Apply as a single-pass pipeline with limit behavior preserved for unsorted flows - add sort normalization and two ordering strategies: * savings: rank by computed DealScore with deterministic title tie-break * ending: rank by earliest parsed end date, then DealScore fallback - introduce DealScore heuristics that combine BOGO weighting, dollar-off extraction, and percentage extraction from savings/deal-info text - add category synonym matcher that supports: * direct case-insensitive matches * canonical group synonym expansion (e.g. veggies -> produce) * normalized fallback for hyphen/underscore/plural variants without breaking exact unknown-category matching - include explicit tests for synonym matching, hyphenated category handling, unknown plural exact matching, and sort ordering behavior - keep allocation-sensitive behavior intact while adding matcher precomputation and fast-path checks
This commit is contained in:
121
internal/filter/category_synonyms.go
Normal file
121
internal/filter/category_synonyms.go
Normal file
@@ -0,0 +1,121 @@
|
||||
package filter
|
||||
|
||||
import "strings"
|
||||
|
||||
var categorySynonyms = map[string][]string{
|
||||
"bogo": {"bogof", "buy one get one", "buy1get1", "2 for 1", "two for one"},
|
||||
"produce": {"fruit", "fruits", "vegetable", "vegetables", "veggie", "veggies"},
|
||||
"meat": {"beef", "chicken", "poultry", "pork", "seafood"},
|
||||
"dairy": {"milk", "cheese", "yogurt"},
|
||||
"bakery": {"bread", "pastry", "pastries"},
|
||||
"deli": {"delicatessen", "cold cuts", "lunch meat"},
|
||||
"frozen": {"frozen foods"},
|
||||
"grocery": {"pantry", "shelf"},
|
||||
}
|
||||
|
||||
type categoryMatcher struct {
|
||||
exactAliases []string
|
||||
normalized map[string]struct{}
|
||||
}
|
||||
|
||||
func newCategoryMatcher(wanted string) categoryMatcher {
|
||||
aliases := categoryAliasList(wanted)
|
||||
if len(aliases) == 0 {
|
||||
return categoryMatcher{}
|
||||
}
|
||||
|
||||
normalized := make(map[string]struct{}, len(aliases))
|
||||
for _, alias := range aliases {
|
||||
normalized[normalizeCategory(alias)] = struct{}{}
|
||||
}
|
||||
|
||||
return categoryMatcher{
|
||||
exactAliases: aliases,
|
||||
normalized: normalized,
|
||||
}
|
||||
}
|
||||
|
||||
func categoryAliasList(wanted string) []string {
|
||||
raw := strings.TrimSpace(wanted)
|
||||
group := resolveCategoryGroup(wanted)
|
||||
if raw == "" && group == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
out := make([]string, 0, 1+len(categorySynonyms[group]))
|
||||
addAlias := func(alias string) {
|
||||
alias = strings.TrimSpace(alias)
|
||||
if alias == "" {
|
||||
return
|
||||
}
|
||||
for _, existing := range out {
|
||||
if strings.EqualFold(existing, alias) {
|
||||
return
|
||||
}
|
||||
}
|
||||
out = append(out, alias)
|
||||
}
|
||||
|
||||
addAlias(raw)
|
||||
addAlias(group)
|
||||
|
||||
if synonyms, ok := categorySynonyms[group]; ok {
|
||||
out = append(out, synonyms...)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func resolveCategoryGroup(wanted string) string {
|
||||
norm := normalizeCategory(wanted)
|
||||
if norm == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
if _, ok := categorySynonyms[norm]; ok {
|
||||
return norm
|
||||
}
|
||||
for key, synonyms := range categorySynonyms {
|
||||
for _, s := range synonyms {
|
||||
if normalizeCategory(s) == norm {
|
||||
return key
|
||||
}
|
||||
}
|
||||
}
|
||||
return norm
|
||||
}
|
||||
|
||||
func (m categoryMatcher) matches(category string) bool {
|
||||
trimmed := strings.TrimSpace(category)
|
||||
for _, alias := range m.exactAliases {
|
||||
if strings.EqualFold(trimmed, alias) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Fast path: if no separators are present and direct aliases didn't match,
|
||||
// normalization would only add overhead for common categories like "grocery".
|
||||
if !strings.ContainsAny(trimmed, "-_ ") {
|
||||
return false
|
||||
}
|
||||
|
||||
norm := normalizeCategory(trimmed)
|
||||
_, ok := m.normalized[norm]
|
||||
return ok
|
||||
}
|
||||
|
||||
func normalizeCategory(raw string) string {
|
||||
s := strings.ToLower(strings.TrimSpace(raw))
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
s = strings.ReplaceAll(s, "_", " ")
|
||||
s = strings.ReplaceAll(s, "-", " ")
|
||||
s = strings.Join(strings.Fields(s), " ")
|
||||
switch {
|
||||
case len(s) > 4 && strings.HasSuffix(s, "ies"):
|
||||
s = strings.TrimSuffix(s, "ies") + "y"
|
||||
case len(s) > 3 && strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"):
|
||||
s = strings.TrimSuffix(s, "s")
|
||||
}
|
||||
return s
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package filter
|
||||
|
||||
import (
|
||||
"html"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/tayloree/publix-deals/internal/api"
|
||||
@@ -13,6 +14,7 @@ type Options struct {
|
||||
Category string
|
||||
Department string
|
||||
Query string
|
||||
Sort string
|
||||
Limit int
|
||||
}
|
||||
|
||||
@@ -22,8 +24,10 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
||||
wantDepartment := opts.Department != ""
|
||||
wantQuery := opts.Query != ""
|
||||
needsFiltering := opts.BOGO || wantCategory || wantDepartment || wantQuery
|
||||
sortMode := normalizeSortMode(opts.Sort)
|
||||
hasSort := sortMode != ""
|
||||
|
||||
if !needsFiltering {
|
||||
if !needsFiltering && !hasSort {
|
||||
if opts.Limit > 0 && opts.Limit < len(items) {
|
||||
return items[:opts.Limit]
|
||||
}
|
||||
@@ -37,9 +41,10 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
||||
result = make([]api.SavingItem, 0, len(items))
|
||||
}
|
||||
|
||||
category := opts.Category
|
||||
department := strings.ToLower(opts.Department)
|
||||
query := strings.ToLower(opts.Query)
|
||||
applyLimitWhileFiltering := !hasSort && opts.Limit > 0
|
||||
categoryMatcher := newCategoryMatcher(opts.Category)
|
||||
|
||||
for _, item := range items {
|
||||
if opts.BOGO || wantCategory {
|
||||
@@ -50,7 +55,7 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
||||
if !hasBogo && strings.EqualFold(c, "bogo") {
|
||||
hasBogo = true
|
||||
}
|
||||
if !hasCategory && strings.EqualFold(c, category) {
|
||||
if !hasCategory && categoryMatcher.matches(c) {
|
||||
hasCategory = true
|
||||
}
|
||||
if hasBogo && hasCategory {
|
||||
@@ -76,11 +81,18 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
||||
}
|
||||
|
||||
result = append(result, item)
|
||||
if opts.Limit > 0 && len(result) >= opts.Limit {
|
||||
if applyLimitWhileFiltering && len(result) >= opts.Limit {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if hasSort && len(result) > 1 {
|
||||
sortItems(result, sortMode)
|
||||
}
|
||||
if opts.Limit > 0 && opts.Limit < len(result) {
|
||||
result = result[:opts.Limit]
|
||||
}
|
||||
|
||||
if len(result) == 0 {
|
||||
return nil
|
||||
}
|
||||
@@ -132,3 +144,35 @@ func ContainsIgnoreCase(slice []string, val string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func sortItems(items []api.SavingItem, mode string) {
|
||||
switch mode {
|
||||
case "savings":
|
||||
sort.SliceStable(items, func(i, j int) bool {
|
||||
left := DealScore(items[i])
|
||||
right := DealScore(items[j])
|
||||
if left == right {
|
||||
return strings.ToLower(CleanText(Deref(items[i].Title))) < strings.ToLower(CleanText(Deref(items[j].Title)))
|
||||
}
|
||||
return left > right
|
||||
})
|
||||
case "ending":
|
||||
sort.SliceStable(items, func(i, j int) bool {
|
||||
leftDate, leftOK := parseDealDate(items[i].EndFormatted)
|
||||
rightDate, rightOK := parseDealDate(items[j].EndFormatted)
|
||||
switch {
|
||||
case leftOK && rightOK:
|
||||
if leftDate.Equal(rightDate) {
|
||||
return DealScore(items[i]) > DealScore(items[j])
|
||||
}
|
||||
return leftDate.Before(rightDate)
|
||||
case leftOK:
|
||||
return true
|
||||
case rightOK:
|
||||
return false
|
||||
default:
|
||||
return DealScore(items[i]) > DealScore(items[j])
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,6 +73,25 @@ func TestApply_CategoryCaseInsensitive(t *testing.T) {
|
||||
assert.Len(t, result, 2)
|
||||
}
|
||||
|
||||
func TestApply_CategorySynonym(t *testing.T) {
|
||||
result := filter.Apply(sampleItems(), filter.Options{Category: "veggies"})
|
||||
assert.Len(t, result, 1)
|
||||
assert.Equal(t, "3", result[0].ID)
|
||||
}
|
||||
|
||||
func TestApply_CategoryHyphenatedExactMatch(t *testing.T) {
|
||||
result := filter.Apply(sampleItems(), filter.Options{Category: "pet-bogos"})
|
||||
assert.Len(t, result, 1)
|
||||
assert.Equal(t, "4", result[0].ID)
|
||||
}
|
||||
|
||||
func TestApply_CategoryUnknownPluralStillMatchesExact(t *testing.T) {
|
||||
items := []api.SavingItem{{ID: "x", Categories: []string{"snacks"}}}
|
||||
result := filter.Apply(items, filter.Options{Category: "snacks"})
|
||||
assert.Len(t, result, 1)
|
||||
assert.Equal(t, "x", result[0].ID)
|
||||
}
|
||||
|
||||
func TestApply_Department(t *testing.T) {
|
||||
result := filter.Apply(sampleItems(), filter.Options{Department: "produce"})
|
||||
assert.Len(t, result, 1)
|
||||
@@ -116,6 +135,33 @@ func TestApply_CombinedFilters(t *testing.T) {
|
||||
assert.Equal(t, "2", result[0].ID)
|
||||
}
|
||||
|
||||
func TestApply_SortSavings(t *testing.T) {
|
||||
items := []api.SavingItem{
|
||||
{ID: "a", Title: ptr("A"), Savings: ptr("$1.00 off")},
|
||||
{ID: "b", Title: ptr("B"), Savings: ptr("$4.00 off")},
|
||||
{ID: "c", Title: ptr("C"), Categories: []string{"bogo"}},
|
||||
}
|
||||
result := filter.Apply(items, filter.Options{Sort: "savings"})
|
||||
|
||||
assert.Len(t, result, 3)
|
||||
assert.Equal(t, "c", result[0].ID)
|
||||
assert.Equal(t, "b", result[1].ID)
|
||||
}
|
||||
|
||||
func TestApply_SortEnding(t *testing.T) {
|
||||
items := []api.SavingItem{
|
||||
{ID: "late", EndFormatted: "12/31/2026"},
|
||||
{ID: "soon", EndFormatted: "01/02/2026"},
|
||||
{ID: "unknown"},
|
||||
}
|
||||
result := filter.Apply(items, filter.Options{Sort: "ending"})
|
||||
|
||||
assert.Len(t, result, 3)
|
||||
assert.Equal(t, "soon", result[0].ID)
|
||||
assert.Equal(t, "late", result[1].ID)
|
||||
assert.Equal(t, "unknown", result[2].ID)
|
||||
}
|
||||
|
||||
func TestApply_NilFields(t *testing.T) {
|
||||
// Item 5 has nil title/department/categories — should not panic
|
||||
result := filter.Apply(sampleItems(), filter.Options{Query: "anything"})
|
||||
|
||||
85
internal/filter/sort.go
Normal file
85
internal/filter/sort.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package filter
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/tayloree/publix-deals/internal/api"
|
||||
)
|
||||
|
||||
var (
|
||||
reDollar = regexp.MustCompile(`\$(\d+(?:\.\d{1,2})?)`)
|
||||
rePercent = regexp.MustCompile(`(\d{1,3})\s*%`)
|
||||
)
|
||||
|
||||
// DealScore estimates relative deal value for ranking.
|
||||
func DealScore(item api.SavingItem) float64 {
|
||||
score := 0.0
|
||||
|
||||
if ContainsIgnoreCase(item.Categories, "bogo") {
|
||||
score += 8
|
||||
}
|
||||
|
||||
text := strings.ToLower(
|
||||
CleanText(Deref(item.Savings) + " " + Deref(item.AdditionalDealInfo)),
|
||||
)
|
||||
for _, m := range reDollar.FindAllStringSubmatch(text, -1) {
|
||||
if len(m) < 2 {
|
||||
continue
|
||||
}
|
||||
if amount, err := strconv.ParseFloat(m[1], 64); err == nil {
|
||||
score += amount
|
||||
}
|
||||
}
|
||||
for _, m := range rePercent.FindAllStringSubmatch(text, -1) {
|
||||
if len(m) < 2 {
|
||||
continue
|
||||
}
|
||||
if pct, err := strconv.ParseFloat(m[1], 64); err == nil {
|
||||
score += pct / 20.0
|
||||
}
|
||||
}
|
||||
|
||||
if score == 0 {
|
||||
return 0.01
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
func normalizeSortMode(raw string) string {
|
||||
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||
case "", "relevance":
|
||||
return ""
|
||||
case "savings":
|
||||
return "savings"
|
||||
case "ending", "end", "expiry", "expiration":
|
||||
return "ending"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func parseDealDate(raw string) (time.Time, bool) {
|
||||
value := strings.TrimSpace(raw)
|
||||
if value == "" {
|
||||
return time.Time{}, false
|
||||
}
|
||||
|
||||
layouts := []string{
|
||||
"1/2/2006",
|
||||
"01/02/2006",
|
||||
"1/2/06",
|
||||
"01/02/06",
|
||||
"2006-01-02",
|
||||
"Jan 2, 2006",
|
||||
"January 2, 2006",
|
||||
}
|
||||
for _, layout := range layouts {
|
||||
if t, err := time.Parse(layout, value); err == nil {
|
||||
return t, true
|
||||
}
|
||||
}
|
||||
return time.Time{}, false
|
||||
}
|
||||
Reference in New Issue
Block a user