Enhance filter pipeline with synonym-aware categories and deal sorting
- extend filter.Options with sort mode support and keep Apply as a single-pass pipeline with limit behavior preserved for unsorted flows - add sort normalization and two ordering strategies: * savings: rank by computed DealScore with deterministic title tie-break * ending: rank by earliest parsed end date, then DealScore fallback - introduce DealScore heuristics that combine BOGO weighting, dollar-off extraction, and percentage extraction from savings/deal-info text - add category synonym matcher that supports: * direct case-insensitive matches * canonical group synonym expansion (e.g. veggies -> produce) * normalized fallback for hyphen/underscore/plural variants without breaking exact unknown-category matching - include explicit tests for synonym matching, hyphenated category handling, unknown plural exact matching, and sort ordering behavior - keep allocation-sensitive behavior intact while adding matcher precomputation and fast-path checks
This commit is contained in:
121
internal/filter/category_synonyms.go
Normal file
121
internal/filter/category_synonyms.go
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
package filter
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
var categorySynonyms = map[string][]string{
|
||||||
|
"bogo": {"bogof", "buy one get one", "buy1get1", "2 for 1", "two for one"},
|
||||||
|
"produce": {"fruit", "fruits", "vegetable", "vegetables", "veggie", "veggies"},
|
||||||
|
"meat": {"beef", "chicken", "poultry", "pork", "seafood"},
|
||||||
|
"dairy": {"milk", "cheese", "yogurt"},
|
||||||
|
"bakery": {"bread", "pastry", "pastries"},
|
||||||
|
"deli": {"delicatessen", "cold cuts", "lunch meat"},
|
||||||
|
"frozen": {"frozen foods"},
|
||||||
|
"grocery": {"pantry", "shelf"},
|
||||||
|
}
|
||||||
|
|
||||||
|
type categoryMatcher struct {
|
||||||
|
exactAliases []string
|
||||||
|
normalized map[string]struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newCategoryMatcher(wanted string) categoryMatcher {
|
||||||
|
aliases := categoryAliasList(wanted)
|
||||||
|
if len(aliases) == 0 {
|
||||||
|
return categoryMatcher{}
|
||||||
|
}
|
||||||
|
|
||||||
|
normalized := make(map[string]struct{}, len(aliases))
|
||||||
|
for _, alias := range aliases {
|
||||||
|
normalized[normalizeCategory(alias)] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return categoryMatcher{
|
||||||
|
exactAliases: aliases,
|
||||||
|
normalized: normalized,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func categoryAliasList(wanted string) []string {
|
||||||
|
raw := strings.TrimSpace(wanted)
|
||||||
|
group := resolveCategoryGroup(wanted)
|
||||||
|
if raw == "" && group == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]string, 0, 1+len(categorySynonyms[group]))
|
||||||
|
addAlias := func(alias string) {
|
||||||
|
alias = strings.TrimSpace(alias)
|
||||||
|
if alias == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, existing := range out {
|
||||||
|
if strings.EqualFold(existing, alias) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out = append(out, alias)
|
||||||
|
}
|
||||||
|
|
||||||
|
addAlias(raw)
|
||||||
|
addAlias(group)
|
||||||
|
|
||||||
|
if synonyms, ok := categorySynonyms[group]; ok {
|
||||||
|
out = append(out, synonyms...)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveCategoryGroup(wanted string) string {
|
||||||
|
norm := normalizeCategory(wanted)
|
||||||
|
if norm == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := categorySynonyms[norm]; ok {
|
||||||
|
return norm
|
||||||
|
}
|
||||||
|
for key, synonyms := range categorySynonyms {
|
||||||
|
for _, s := range synonyms {
|
||||||
|
if normalizeCategory(s) == norm {
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return norm
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m categoryMatcher) matches(category string) bool {
|
||||||
|
trimmed := strings.TrimSpace(category)
|
||||||
|
for _, alias := range m.exactAliases {
|
||||||
|
if strings.EqualFold(trimmed, alias) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fast path: if no separators are present and direct aliases didn't match,
|
||||||
|
// normalization would only add overhead for common categories like "grocery".
|
||||||
|
if !strings.ContainsAny(trimmed, "-_ ") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
norm := normalizeCategory(trimmed)
|
||||||
|
_, ok := m.normalized[norm]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeCategory(raw string) string {
|
||||||
|
s := strings.ToLower(strings.TrimSpace(raw))
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
s = strings.ReplaceAll(s, "_", " ")
|
||||||
|
s = strings.ReplaceAll(s, "-", " ")
|
||||||
|
s = strings.Join(strings.Fields(s), " ")
|
||||||
|
switch {
|
||||||
|
case len(s) > 4 && strings.HasSuffix(s, "ies"):
|
||||||
|
s = strings.TrimSuffix(s, "ies") + "y"
|
||||||
|
case len(s) > 3 && strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"):
|
||||||
|
s = strings.TrimSuffix(s, "s")
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ package filter
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"html"
|
"html"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/tayloree/publix-deals/internal/api"
|
"github.com/tayloree/publix-deals/internal/api"
|
||||||
@@ -13,6 +14,7 @@ type Options struct {
|
|||||||
Category string
|
Category string
|
||||||
Department string
|
Department string
|
||||||
Query string
|
Query string
|
||||||
|
Sort string
|
||||||
Limit int
|
Limit int
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,8 +24,10 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
|||||||
wantDepartment := opts.Department != ""
|
wantDepartment := opts.Department != ""
|
||||||
wantQuery := opts.Query != ""
|
wantQuery := opts.Query != ""
|
||||||
needsFiltering := opts.BOGO || wantCategory || wantDepartment || wantQuery
|
needsFiltering := opts.BOGO || wantCategory || wantDepartment || wantQuery
|
||||||
|
sortMode := normalizeSortMode(opts.Sort)
|
||||||
|
hasSort := sortMode != ""
|
||||||
|
|
||||||
if !needsFiltering {
|
if !needsFiltering && !hasSort {
|
||||||
if opts.Limit > 0 && opts.Limit < len(items) {
|
if opts.Limit > 0 && opts.Limit < len(items) {
|
||||||
return items[:opts.Limit]
|
return items[:opts.Limit]
|
||||||
}
|
}
|
||||||
@@ -37,9 +41,10 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
|||||||
result = make([]api.SavingItem, 0, len(items))
|
result = make([]api.SavingItem, 0, len(items))
|
||||||
}
|
}
|
||||||
|
|
||||||
category := opts.Category
|
|
||||||
department := strings.ToLower(opts.Department)
|
department := strings.ToLower(opts.Department)
|
||||||
query := strings.ToLower(opts.Query)
|
query := strings.ToLower(opts.Query)
|
||||||
|
applyLimitWhileFiltering := !hasSort && opts.Limit > 0
|
||||||
|
categoryMatcher := newCategoryMatcher(opts.Category)
|
||||||
|
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
if opts.BOGO || wantCategory {
|
if opts.BOGO || wantCategory {
|
||||||
@@ -50,7 +55,7 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
|||||||
if !hasBogo && strings.EqualFold(c, "bogo") {
|
if !hasBogo && strings.EqualFold(c, "bogo") {
|
||||||
hasBogo = true
|
hasBogo = true
|
||||||
}
|
}
|
||||||
if !hasCategory && strings.EqualFold(c, category) {
|
if !hasCategory && categoryMatcher.matches(c) {
|
||||||
hasCategory = true
|
hasCategory = true
|
||||||
}
|
}
|
||||||
if hasBogo && hasCategory {
|
if hasBogo && hasCategory {
|
||||||
@@ -76,11 +81,18 @@ func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
result = append(result, item)
|
result = append(result, item)
|
||||||
if opts.Limit > 0 && len(result) >= opts.Limit {
|
if applyLimitWhileFiltering && len(result) >= opts.Limit {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if hasSort && len(result) > 1 {
|
||||||
|
sortItems(result, sortMode)
|
||||||
|
}
|
||||||
|
if opts.Limit > 0 && opts.Limit < len(result) {
|
||||||
|
result = result[:opts.Limit]
|
||||||
|
}
|
||||||
|
|
||||||
if len(result) == 0 {
|
if len(result) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -132,3 +144,35 @@ func ContainsIgnoreCase(slice []string, val string) bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sortItems(items []api.SavingItem, mode string) {
|
||||||
|
switch mode {
|
||||||
|
case "savings":
|
||||||
|
sort.SliceStable(items, func(i, j int) bool {
|
||||||
|
left := DealScore(items[i])
|
||||||
|
right := DealScore(items[j])
|
||||||
|
if left == right {
|
||||||
|
return strings.ToLower(CleanText(Deref(items[i].Title))) < strings.ToLower(CleanText(Deref(items[j].Title)))
|
||||||
|
}
|
||||||
|
return left > right
|
||||||
|
})
|
||||||
|
case "ending":
|
||||||
|
sort.SliceStable(items, func(i, j int) bool {
|
||||||
|
leftDate, leftOK := parseDealDate(items[i].EndFormatted)
|
||||||
|
rightDate, rightOK := parseDealDate(items[j].EndFormatted)
|
||||||
|
switch {
|
||||||
|
case leftOK && rightOK:
|
||||||
|
if leftDate.Equal(rightDate) {
|
||||||
|
return DealScore(items[i]) > DealScore(items[j])
|
||||||
|
}
|
||||||
|
return leftDate.Before(rightDate)
|
||||||
|
case leftOK:
|
||||||
|
return true
|
||||||
|
case rightOK:
|
||||||
|
return false
|
||||||
|
default:
|
||||||
|
return DealScore(items[i]) > DealScore(items[j])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -73,6 +73,25 @@ func TestApply_CategoryCaseInsensitive(t *testing.T) {
|
|||||||
assert.Len(t, result, 2)
|
assert.Len(t, result, 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApply_CategorySynonym(t *testing.T) {
|
||||||
|
result := filter.Apply(sampleItems(), filter.Options{Category: "veggies"})
|
||||||
|
assert.Len(t, result, 1)
|
||||||
|
assert.Equal(t, "3", result[0].ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApply_CategoryHyphenatedExactMatch(t *testing.T) {
|
||||||
|
result := filter.Apply(sampleItems(), filter.Options{Category: "pet-bogos"})
|
||||||
|
assert.Len(t, result, 1)
|
||||||
|
assert.Equal(t, "4", result[0].ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApply_CategoryUnknownPluralStillMatchesExact(t *testing.T) {
|
||||||
|
items := []api.SavingItem{{ID: "x", Categories: []string{"snacks"}}}
|
||||||
|
result := filter.Apply(items, filter.Options{Category: "snacks"})
|
||||||
|
assert.Len(t, result, 1)
|
||||||
|
assert.Equal(t, "x", result[0].ID)
|
||||||
|
}
|
||||||
|
|
||||||
func TestApply_Department(t *testing.T) {
|
func TestApply_Department(t *testing.T) {
|
||||||
result := filter.Apply(sampleItems(), filter.Options{Department: "produce"})
|
result := filter.Apply(sampleItems(), filter.Options{Department: "produce"})
|
||||||
assert.Len(t, result, 1)
|
assert.Len(t, result, 1)
|
||||||
@@ -116,6 +135,33 @@ func TestApply_CombinedFilters(t *testing.T) {
|
|||||||
assert.Equal(t, "2", result[0].ID)
|
assert.Equal(t, "2", result[0].ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApply_SortSavings(t *testing.T) {
|
||||||
|
items := []api.SavingItem{
|
||||||
|
{ID: "a", Title: ptr("A"), Savings: ptr("$1.00 off")},
|
||||||
|
{ID: "b", Title: ptr("B"), Savings: ptr("$4.00 off")},
|
||||||
|
{ID: "c", Title: ptr("C"), Categories: []string{"bogo"}},
|
||||||
|
}
|
||||||
|
result := filter.Apply(items, filter.Options{Sort: "savings"})
|
||||||
|
|
||||||
|
assert.Len(t, result, 3)
|
||||||
|
assert.Equal(t, "c", result[0].ID)
|
||||||
|
assert.Equal(t, "b", result[1].ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApply_SortEnding(t *testing.T) {
|
||||||
|
items := []api.SavingItem{
|
||||||
|
{ID: "late", EndFormatted: "12/31/2026"},
|
||||||
|
{ID: "soon", EndFormatted: "01/02/2026"},
|
||||||
|
{ID: "unknown"},
|
||||||
|
}
|
||||||
|
result := filter.Apply(items, filter.Options{Sort: "ending"})
|
||||||
|
|
||||||
|
assert.Len(t, result, 3)
|
||||||
|
assert.Equal(t, "soon", result[0].ID)
|
||||||
|
assert.Equal(t, "late", result[1].ID)
|
||||||
|
assert.Equal(t, "unknown", result[2].ID)
|
||||||
|
}
|
||||||
|
|
||||||
func TestApply_NilFields(t *testing.T) {
|
func TestApply_NilFields(t *testing.T) {
|
||||||
// Item 5 has nil title/department/categories — should not panic
|
// Item 5 has nil title/department/categories — should not panic
|
||||||
result := filter.Apply(sampleItems(), filter.Options{Query: "anything"})
|
result := filter.Apply(sampleItems(), filter.Options{Query: "anything"})
|
||||||
|
|||||||
85
internal/filter/sort.go
Normal file
85
internal/filter/sort.go
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
package filter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/tayloree/publix-deals/internal/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
reDollar = regexp.MustCompile(`\$(\d+(?:\.\d{1,2})?)`)
|
||||||
|
rePercent = regexp.MustCompile(`(\d{1,3})\s*%`)
|
||||||
|
)
|
||||||
|
|
||||||
|
// DealScore estimates relative deal value for ranking.
|
||||||
|
func DealScore(item api.SavingItem) float64 {
|
||||||
|
score := 0.0
|
||||||
|
|
||||||
|
if ContainsIgnoreCase(item.Categories, "bogo") {
|
||||||
|
score += 8
|
||||||
|
}
|
||||||
|
|
||||||
|
text := strings.ToLower(
|
||||||
|
CleanText(Deref(item.Savings) + " " + Deref(item.AdditionalDealInfo)),
|
||||||
|
)
|
||||||
|
for _, m := range reDollar.FindAllStringSubmatch(text, -1) {
|
||||||
|
if len(m) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if amount, err := strconv.ParseFloat(m[1], 64); err == nil {
|
||||||
|
score += amount
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, m := range rePercent.FindAllStringSubmatch(text, -1) {
|
||||||
|
if len(m) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if pct, err := strconv.ParseFloat(m[1], 64); err == nil {
|
||||||
|
score += pct / 20.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if score == 0 {
|
||||||
|
return 0.01
|
||||||
|
}
|
||||||
|
return score
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeSortMode(raw string) string {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||||
|
case "", "relevance":
|
||||||
|
return ""
|
||||||
|
case "savings":
|
||||||
|
return "savings"
|
||||||
|
case "ending", "end", "expiry", "expiration":
|
||||||
|
return "ending"
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseDealDate(raw string) (time.Time, bool) {
|
||||||
|
value := strings.TrimSpace(raw)
|
||||||
|
if value == "" {
|
||||||
|
return time.Time{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
layouts := []string{
|
||||||
|
"1/2/2006",
|
||||||
|
"01/02/2006",
|
||||||
|
"1/2/06",
|
||||||
|
"01/02/06",
|
||||||
|
"2006-01-02",
|
||||||
|
"Jan 2, 2006",
|
||||||
|
"January 2, 2006",
|
||||||
|
}
|
||||||
|
for _, layout := range layouts {
|
||||||
|
if t, err := time.Parse(layout, value); err == nil {
|
||||||
|
return t, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return time.Time{}, false
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user