From eb2328b7680123c24ca14762a645c7fcee8083cc Mon Sep 17 00:00:00 2001 From: teernisse Date: Mon, 23 Feb 2026 00:26:55 -0500 Subject: [PATCH] Enhance filter pipeline with synonym-aware categories and deal sorting - extend filter.Options with sort mode support and keep Apply as a single-pass pipeline with limit behavior preserved for unsorted flows - add sort normalization and two ordering strategies: * savings: rank by computed DealScore with deterministic title tie-break * ending: rank by earliest parsed end date, then DealScore fallback - introduce DealScore heuristics that combine BOGO weighting, dollar-off extraction, and percentage extraction from savings/deal-info text - add category synonym matcher that supports: * direct case-insensitive matches * canonical group synonym expansion (e.g. veggies -> produce) * normalized fallback for hyphen/underscore/plural variants without breaking exact unknown-category matching - include explicit tests for synonym matching, hyphenated category handling, unknown plural exact matching, and sort ordering behavior - keep allocation-sensitive behavior intact while adding matcher precomputation and fast-path checks --- internal/filter/category_synonyms.go | 121 +++++++++++++++++++++++++++ internal/filter/filter.go | 52 +++++++++++- internal/filter/filter_test.go | 46 ++++++++++ internal/filter/sort.go | 85 +++++++++++++++++++ 4 files changed, 300 insertions(+), 4 deletions(-) create mode 100644 internal/filter/category_synonyms.go create mode 100644 internal/filter/sort.go diff --git a/internal/filter/category_synonyms.go b/internal/filter/category_synonyms.go new file mode 100644 index 0000000..3c1d4ad --- /dev/null +++ b/internal/filter/category_synonyms.go @@ -0,0 +1,121 @@ +package filter + +import "strings" + +var categorySynonyms = map[string][]string{ + "bogo": {"bogof", "buy one get one", "buy1get1", "2 for 1", "two for one"}, + "produce": {"fruit", "fruits", "vegetable", "vegetables", "veggie", "veggies"}, + "meat": {"beef", "chicken", "poultry", "pork", "seafood"}, + "dairy": {"milk", "cheese", "yogurt"}, + "bakery": {"bread", "pastry", "pastries"}, + "deli": {"delicatessen", "cold cuts", "lunch meat"}, + "frozen": {"frozen foods"}, + "grocery": {"pantry", "shelf"}, +} + +type categoryMatcher struct { + exactAliases []string + normalized map[string]struct{} +} + +func newCategoryMatcher(wanted string) categoryMatcher { + aliases := categoryAliasList(wanted) + if len(aliases) == 0 { + return categoryMatcher{} + } + + normalized := make(map[string]struct{}, len(aliases)) + for _, alias := range aliases { + normalized[normalizeCategory(alias)] = struct{}{} + } + + return categoryMatcher{ + exactAliases: aliases, + normalized: normalized, + } +} + +func categoryAliasList(wanted string) []string { + raw := strings.TrimSpace(wanted) + group := resolveCategoryGroup(wanted) + if raw == "" && group == "" { + return nil + } + + out := make([]string, 0, 1+len(categorySynonyms[group])) + addAlias := func(alias string) { + alias = strings.TrimSpace(alias) + if alias == "" { + return + } + for _, existing := range out { + if strings.EqualFold(existing, alias) { + return + } + } + out = append(out, alias) + } + + addAlias(raw) + addAlias(group) + + if synonyms, ok := categorySynonyms[group]; ok { + out = append(out, synonyms...) + } + return out +} + +func resolveCategoryGroup(wanted string) string { + norm := normalizeCategory(wanted) + if norm == "" { + return "" + } + + if _, ok := categorySynonyms[norm]; ok { + return norm + } + for key, synonyms := range categorySynonyms { + for _, s := range synonyms { + if normalizeCategory(s) == norm { + return key + } + } + } + return norm +} + +func (m categoryMatcher) matches(category string) bool { + trimmed := strings.TrimSpace(category) + for _, alias := range m.exactAliases { + if strings.EqualFold(trimmed, alias) { + return true + } + } + + // Fast path: if no separators are present and direct aliases didn't match, + // normalization would only add overhead for common categories like "grocery". + if !strings.ContainsAny(trimmed, "-_ ") { + return false + } + + norm := normalizeCategory(trimmed) + _, ok := m.normalized[norm] + return ok +} + +func normalizeCategory(raw string) string { + s := strings.ToLower(strings.TrimSpace(raw)) + if s == "" { + return "" + } + s = strings.ReplaceAll(s, "_", " ") + s = strings.ReplaceAll(s, "-", " ") + s = strings.Join(strings.Fields(s), " ") + switch { + case len(s) > 4 && strings.HasSuffix(s, "ies"): + s = strings.TrimSuffix(s, "ies") + "y" + case len(s) > 3 && strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"): + s = strings.TrimSuffix(s, "s") + } + return s +} diff --git a/internal/filter/filter.go b/internal/filter/filter.go index 7cfb468..f56a441 100644 --- a/internal/filter/filter.go +++ b/internal/filter/filter.go @@ -2,6 +2,7 @@ package filter import ( "html" + "sort" "strings" "github.com/tayloree/publix-deals/internal/api" @@ -13,6 +14,7 @@ type Options struct { Category string Department string Query string + Sort string Limit int } @@ -22,8 +24,10 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem { wantDepartment := opts.Department != "" wantQuery := opts.Query != "" needsFiltering := opts.BOGO || wantCategory || wantDepartment || wantQuery + sortMode := normalizeSortMode(opts.Sort) + hasSort := sortMode != "" - if !needsFiltering { + if !needsFiltering && !hasSort { if opts.Limit > 0 && opts.Limit < len(items) { return items[:opts.Limit] } @@ -37,9 +41,10 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem { result = make([]api.SavingItem, 0, len(items)) } - category := opts.Category department := strings.ToLower(opts.Department) query := strings.ToLower(opts.Query) + applyLimitWhileFiltering := !hasSort && opts.Limit > 0 + categoryMatcher := newCategoryMatcher(opts.Category) for _, item := range items { if opts.BOGO || wantCategory { @@ -50,7 +55,7 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem { if !hasBogo && strings.EqualFold(c, "bogo") { hasBogo = true } - if !hasCategory && strings.EqualFold(c, category) { + if !hasCategory && categoryMatcher.matches(c) { hasCategory = true } if hasBogo && hasCategory { @@ -76,11 +81,18 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem { } result = append(result, item) - if opts.Limit > 0 && len(result) >= opts.Limit { + if applyLimitWhileFiltering && len(result) >= opts.Limit { break } } + if hasSort && len(result) > 1 { + sortItems(result, sortMode) + } + if opts.Limit > 0 && opts.Limit < len(result) { + result = result[:opts.Limit] + } + if len(result) == 0 { return nil } @@ -132,3 +144,35 @@ func ContainsIgnoreCase(slice []string, val string) bool { } return false } + +func sortItems(items []api.SavingItem, mode string) { + switch mode { + case "savings": + sort.SliceStable(items, func(i, j int) bool { + left := DealScore(items[i]) + right := DealScore(items[j]) + if left == right { + return strings.ToLower(CleanText(Deref(items[i].Title))) < strings.ToLower(CleanText(Deref(items[j].Title))) + } + return left > right + }) + case "ending": + sort.SliceStable(items, func(i, j int) bool { + leftDate, leftOK := parseDealDate(items[i].EndFormatted) + rightDate, rightOK := parseDealDate(items[j].EndFormatted) + switch { + case leftOK && rightOK: + if leftDate.Equal(rightDate) { + return DealScore(items[i]) > DealScore(items[j]) + } + return leftDate.Before(rightDate) + case leftOK: + return true + case rightOK: + return false + default: + return DealScore(items[i]) > DealScore(items[j]) + } + }) + } +} diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go index 9a520d2..74bb577 100644 --- a/internal/filter/filter_test.go +++ b/internal/filter/filter_test.go @@ -73,6 +73,25 @@ func TestApply_CategoryCaseInsensitive(t *testing.T) { assert.Len(t, result, 2) } +func TestApply_CategorySynonym(t *testing.T) { + result := filter.Apply(sampleItems(), filter.Options{Category: "veggies"}) + assert.Len(t, result, 1) + assert.Equal(t, "3", result[0].ID) +} + +func TestApply_CategoryHyphenatedExactMatch(t *testing.T) { + result := filter.Apply(sampleItems(), filter.Options{Category: "pet-bogos"}) + assert.Len(t, result, 1) + assert.Equal(t, "4", result[0].ID) +} + +func TestApply_CategoryUnknownPluralStillMatchesExact(t *testing.T) { + items := []api.SavingItem{{ID: "x", Categories: []string{"snacks"}}} + result := filter.Apply(items, filter.Options{Category: "snacks"}) + assert.Len(t, result, 1) + assert.Equal(t, "x", result[0].ID) +} + func TestApply_Department(t *testing.T) { result := filter.Apply(sampleItems(), filter.Options{Department: "produce"}) assert.Len(t, result, 1) @@ -116,6 +135,33 @@ func TestApply_CombinedFilters(t *testing.T) { assert.Equal(t, "2", result[0].ID) } +func TestApply_SortSavings(t *testing.T) { + items := []api.SavingItem{ + {ID: "a", Title: ptr("A"), Savings: ptr("$1.00 off")}, + {ID: "b", Title: ptr("B"), Savings: ptr("$4.00 off")}, + {ID: "c", Title: ptr("C"), Categories: []string{"bogo"}}, + } + result := filter.Apply(items, filter.Options{Sort: "savings"}) + + assert.Len(t, result, 3) + assert.Equal(t, "c", result[0].ID) + assert.Equal(t, "b", result[1].ID) +} + +func TestApply_SortEnding(t *testing.T) { + items := []api.SavingItem{ + {ID: "late", EndFormatted: "12/31/2026"}, + {ID: "soon", EndFormatted: "01/02/2026"}, + {ID: "unknown"}, + } + result := filter.Apply(items, filter.Options{Sort: "ending"}) + + assert.Len(t, result, 3) + assert.Equal(t, "soon", result[0].ID) + assert.Equal(t, "late", result[1].ID) + assert.Equal(t, "unknown", result[2].ID) +} + func TestApply_NilFields(t *testing.T) { // Item 5 has nil title/department/categories — should not panic result := filter.Apply(sampleItems(), filter.Options{Query: "anything"}) diff --git a/internal/filter/sort.go b/internal/filter/sort.go new file mode 100644 index 0000000..595e492 --- /dev/null +++ b/internal/filter/sort.go @@ -0,0 +1,85 @@ +package filter + +import ( + "regexp" + "strconv" + "strings" + "time" + + "github.com/tayloree/publix-deals/internal/api" +) + +var ( + reDollar = regexp.MustCompile(`\$(\d+(?:\.\d{1,2})?)`) + rePercent = regexp.MustCompile(`(\d{1,3})\s*%`) +) + +// DealScore estimates relative deal value for ranking. +func DealScore(item api.SavingItem) float64 { + score := 0.0 + + if ContainsIgnoreCase(item.Categories, "bogo") { + score += 8 + } + + text := strings.ToLower( + CleanText(Deref(item.Savings) + " " + Deref(item.AdditionalDealInfo)), + ) + for _, m := range reDollar.FindAllStringSubmatch(text, -1) { + if len(m) < 2 { + continue + } + if amount, err := strconv.ParseFloat(m[1], 64); err == nil { + score += amount + } + } + for _, m := range rePercent.FindAllStringSubmatch(text, -1) { + if len(m) < 2 { + continue + } + if pct, err := strconv.ParseFloat(m[1], 64); err == nil { + score += pct / 20.0 + } + } + + if score == 0 { + return 0.01 + } + return score +} + +func normalizeSortMode(raw string) string { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "", "relevance": + return "" + case "savings": + return "savings" + case "ending", "end", "expiry", "expiration": + return "ending" + default: + return "" + } +} + +func parseDealDate(raw string) (time.Time, bool) { + value := strings.TrimSpace(raw) + if value == "" { + return time.Time{}, false + } + + layouts := []string{ + "1/2/2006", + "01/02/2006", + "1/2/06", + "01/02/06", + "2006-01-02", + "Jan 2, 2006", + "January 2, 2006", + } + for _, layout := range layouts { + if t, err := time.Parse(layout, value); err == nil { + return t, true + } + } + return time.Time{}, false +}