Enhance filter pipeline with synonym-aware categories and deal sorting

- extend filter.Options with sort mode support and keep Apply as a single-pass pipeline with limit behavior preserved for unsorted flows
- add sort normalization and two ordering strategies:
  * savings: rank by computed DealScore with deterministic title tie-break
  * ending: rank by earliest parsed end date, then DealScore fallback
- introduce DealScore heuristics that combine BOGO weighting, dollar-off extraction, and percentage extraction from savings/deal-info text
- add category synonym matcher that supports:
  * direct case-insensitive matches
  * canonical group synonym expansion (e.g. veggies -> produce)
  * normalized fallback for hyphen/underscore/plural variants without breaking exact unknown-category matching
- include explicit tests for synonym matching, hyphenated category handling, unknown plural exact matching, and sort ordering behavior
- keep allocation-sensitive behavior intact while adding matcher precomputation and fast-path checks
This commit is contained in:
2026-02-23 00:26:55 -05:00
parent b91c44c4ed
commit eb2328b768
4 changed files with 300 additions and 4 deletions

View File

@@ -0,0 +1,121 @@
package filter
import "strings"
var categorySynonyms = map[string][]string{
"bogo": {"bogof", "buy one get one", "buy1get1", "2 for 1", "two for one"},
"produce": {"fruit", "fruits", "vegetable", "vegetables", "veggie", "veggies"},
"meat": {"beef", "chicken", "poultry", "pork", "seafood"},
"dairy": {"milk", "cheese", "yogurt"},
"bakery": {"bread", "pastry", "pastries"},
"deli": {"delicatessen", "cold cuts", "lunch meat"},
"frozen": {"frozen foods"},
"grocery": {"pantry", "shelf"},
}
type categoryMatcher struct {
exactAliases []string
normalized map[string]struct{}
}
func newCategoryMatcher(wanted string) categoryMatcher {
aliases := categoryAliasList(wanted)
if len(aliases) == 0 {
return categoryMatcher{}
}
normalized := make(map[string]struct{}, len(aliases))
for _, alias := range aliases {
normalized[normalizeCategory(alias)] = struct{}{}
}
return categoryMatcher{
exactAliases: aliases,
normalized: normalized,
}
}
func categoryAliasList(wanted string) []string {
raw := strings.TrimSpace(wanted)
group := resolveCategoryGroup(wanted)
if raw == "" && group == "" {
return nil
}
out := make([]string, 0, 1+len(categorySynonyms[group]))
addAlias := func(alias string) {
alias = strings.TrimSpace(alias)
if alias == "" {
return
}
for _, existing := range out {
if strings.EqualFold(existing, alias) {
return
}
}
out = append(out, alias)
}
addAlias(raw)
addAlias(group)
if synonyms, ok := categorySynonyms[group]; ok {
out = append(out, synonyms...)
}
return out
}
func resolveCategoryGroup(wanted string) string {
norm := normalizeCategory(wanted)
if norm == "" {
return ""
}
if _, ok := categorySynonyms[norm]; ok {
return norm
}
for key, synonyms := range categorySynonyms {
for _, s := range synonyms {
if normalizeCategory(s) == norm {
return key
}
}
}
return norm
}
func (m categoryMatcher) matches(category string) bool {
trimmed := strings.TrimSpace(category)
for _, alias := range m.exactAliases {
if strings.EqualFold(trimmed, alias) {
return true
}
}
// Fast path: if no separators are present and direct aliases didn't match,
// normalization would only add overhead for common categories like "grocery".
if !strings.ContainsAny(trimmed, "-_ ") {
return false
}
norm := normalizeCategory(trimmed)
_, ok := m.normalized[norm]
return ok
}
func normalizeCategory(raw string) string {
s := strings.ToLower(strings.TrimSpace(raw))
if s == "" {
return ""
}
s = strings.ReplaceAll(s, "_", " ")
s = strings.ReplaceAll(s, "-", " ")
s = strings.Join(strings.Fields(s), " ")
switch {
case len(s) > 4 && strings.HasSuffix(s, "ies"):
s = strings.TrimSuffix(s, "ies") + "y"
case len(s) > 3 && strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"):
s = strings.TrimSuffix(s, "s")
}
return s
}

View File

@@ -2,6 +2,7 @@ package filter
import ( import (
"html" "html"
"sort"
"strings" "strings"
"github.com/tayloree/publix-deals/internal/api" "github.com/tayloree/publix-deals/internal/api"
@@ -13,6 +14,7 @@ type Options struct {
Category string Category string
Department string Department string
Query string Query string
Sort string
Limit int Limit int
} }
@@ -22,8 +24,10 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
wantDepartment := opts.Department != "" wantDepartment := opts.Department != ""
wantQuery := opts.Query != "" wantQuery := opts.Query != ""
needsFiltering := opts.BOGO || wantCategory || wantDepartment || wantQuery needsFiltering := opts.BOGO || wantCategory || wantDepartment || wantQuery
sortMode := normalizeSortMode(opts.Sort)
hasSort := sortMode != ""
if !needsFiltering { if !needsFiltering && !hasSort {
if opts.Limit > 0 && opts.Limit < len(items) { if opts.Limit > 0 && opts.Limit < len(items) {
return items[:opts.Limit] return items[:opts.Limit]
} }
@@ -37,9 +41,10 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
result = make([]api.SavingItem, 0, len(items)) result = make([]api.SavingItem, 0, len(items))
} }
category := opts.Category
department := strings.ToLower(opts.Department) department := strings.ToLower(opts.Department)
query := strings.ToLower(opts.Query) query := strings.ToLower(opts.Query)
applyLimitWhileFiltering := !hasSort && opts.Limit > 0
categoryMatcher := newCategoryMatcher(opts.Category)
for _, item := range items { for _, item := range items {
if opts.BOGO || wantCategory { if opts.BOGO || wantCategory {
@@ -50,7 +55,7 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
if !hasBogo && strings.EqualFold(c, "bogo") { if !hasBogo && strings.EqualFold(c, "bogo") {
hasBogo = true hasBogo = true
} }
if !hasCategory && strings.EqualFold(c, category) { if !hasCategory && categoryMatcher.matches(c) {
hasCategory = true hasCategory = true
} }
if hasBogo && hasCategory { if hasBogo && hasCategory {
@@ -76,11 +81,18 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
} }
result = append(result, item) result = append(result, item)
if opts.Limit > 0 && len(result) >= opts.Limit { if applyLimitWhileFiltering && len(result) >= opts.Limit {
break break
} }
} }
if hasSort && len(result) > 1 {
sortItems(result, sortMode)
}
if opts.Limit > 0 && opts.Limit < len(result) {
result = result[:opts.Limit]
}
if len(result) == 0 { if len(result) == 0 {
return nil return nil
} }
@@ -132,3 +144,35 @@ func ContainsIgnoreCase(slice []string, val string) bool {
} }
return false return false
} }
func sortItems(items []api.SavingItem, mode string) {
switch mode {
case "savings":
sort.SliceStable(items, func(i, j int) bool {
left := DealScore(items[i])
right := DealScore(items[j])
if left == right {
return strings.ToLower(CleanText(Deref(items[i].Title))) < strings.ToLower(CleanText(Deref(items[j].Title)))
}
return left > right
})
case "ending":
sort.SliceStable(items, func(i, j int) bool {
leftDate, leftOK := parseDealDate(items[i].EndFormatted)
rightDate, rightOK := parseDealDate(items[j].EndFormatted)
switch {
case leftOK && rightOK:
if leftDate.Equal(rightDate) {
return DealScore(items[i]) > DealScore(items[j])
}
return leftDate.Before(rightDate)
case leftOK:
return true
case rightOK:
return false
default:
return DealScore(items[i]) > DealScore(items[j])
}
})
}
}

View File

@@ -73,6 +73,25 @@ func TestApply_CategoryCaseInsensitive(t *testing.T) {
assert.Len(t, result, 2) assert.Len(t, result, 2)
} }
func TestApply_CategorySynonym(t *testing.T) {
result := filter.Apply(sampleItems(), filter.Options{Category: "veggies"})
assert.Len(t, result, 1)
assert.Equal(t, "3", result[0].ID)
}
func TestApply_CategoryHyphenatedExactMatch(t *testing.T) {
result := filter.Apply(sampleItems(), filter.Options{Category: "pet-bogos"})
assert.Len(t, result, 1)
assert.Equal(t, "4", result[0].ID)
}
func TestApply_CategoryUnknownPluralStillMatchesExact(t *testing.T) {
items := []api.SavingItem{{ID: "x", Categories: []string{"snacks"}}}
result := filter.Apply(items, filter.Options{Category: "snacks"})
assert.Len(t, result, 1)
assert.Equal(t, "x", result[0].ID)
}
func TestApply_Department(t *testing.T) { func TestApply_Department(t *testing.T) {
result := filter.Apply(sampleItems(), filter.Options{Department: "produce"}) result := filter.Apply(sampleItems(), filter.Options{Department: "produce"})
assert.Len(t, result, 1) assert.Len(t, result, 1)
@@ -116,6 +135,33 @@ func TestApply_CombinedFilters(t *testing.T) {
assert.Equal(t, "2", result[0].ID) assert.Equal(t, "2", result[0].ID)
} }
func TestApply_SortSavings(t *testing.T) {
items := []api.SavingItem{
{ID: "a", Title: ptr("A"), Savings: ptr("$1.00 off")},
{ID: "b", Title: ptr("B"), Savings: ptr("$4.00 off")},
{ID: "c", Title: ptr("C"), Categories: []string{"bogo"}},
}
result := filter.Apply(items, filter.Options{Sort: "savings"})
assert.Len(t, result, 3)
assert.Equal(t, "c", result[0].ID)
assert.Equal(t, "b", result[1].ID)
}
func TestApply_SortEnding(t *testing.T) {
items := []api.SavingItem{
{ID: "late", EndFormatted: "12/31/2026"},
{ID: "soon", EndFormatted: "01/02/2026"},
{ID: "unknown"},
}
result := filter.Apply(items, filter.Options{Sort: "ending"})
assert.Len(t, result, 3)
assert.Equal(t, "soon", result[0].ID)
assert.Equal(t, "late", result[1].ID)
assert.Equal(t, "unknown", result[2].ID)
}
func TestApply_NilFields(t *testing.T) { func TestApply_NilFields(t *testing.T) {
// Item 5 has nil title/department/categories — should not panic // Item 5 has nil title/department/categories — should not panic
result := filter.Apply(sampleItems(), filter.Options{Query: "anything"}) result := filter.Apply(sampleItems(), filter.Options{Query: "anything"})

85
internal/filter/sort.go Normal file
View File

@@ -0,0 +1,85 @@
package filter
import (
"regexp"
"strconv"
"strings"
"time"
"github.com/tayloree/publix-deals/internal/api"
)
var (
reDollar = regexp.MustCompile(`\$(\d+(?:\.\d{1,2})?)`)
rePercent = regexp.MustCompile(`(\d{1,3})\s*%`)
)
// DealScore estimates relative deal value for ranking.
func DealScore(item api.SavingItem) float64 {
score := 0.0
if ContainsIgnoreCase(item.Categories, "bogo") {
score += 8
}
text := strings.ToLower(
CleanText(Deref(item.Savings) + " " + Deref(item.AdditionalDealInfo)),
)
for _, m := range reDollar.FindAllStringSubmatch(text, -1) {
if len(m) < 2 {
continue
}
if amount, err := strconv.ParseFloat(m[1], 64); err == nil {
score += amount
}
}
for _, m := range rePercent.FindAllStringSubmatch(text, -1) {
if len(m) < 2 {
continue
}
if pct, err := strconv.ParseFloat(m[1], 64); err == nil {
score += pct / 20.0
}
}
if score == 0 {
return 0.01
}
return score
}
func normalizeSortMode(raw string) string {
switch strings.ToLower(strings.TrimSpace(raw)) {
case "", "relevance":
return ""
case "savings":
return "savings"
case "ending", "end", "expiry", "expiration":
return "ending"
default:
return ""
}
}
func parseDealDate(raw string) (time.Time, bool) {
value := strings.TrimSpace(raw)
if value == "" {
return time.Time{}, false
}
layouts := []string{
"1/2/2006",
"01/02/2006",
"1/2/06",
"01/02/06",
"2006-01-02",
"Jan 2, 2006",
"January 2, 2006",
}
for _, layout := range layouts {
if t, err := time.Parse(layout, value); err == nil {
return t, true
}
}
return time.Time{}, false
}