diff --git a/internal/filter/filter.go b/internal/filter/filter.go index 551477f..7cfb468 100644 --- a/internal/filter/filter.go +++ b/internal/filter/filter.go @@ -18,40 +18,72 @@ type Options struct { // Apply filters a slice of SavingItems according to the given options. func Apply(items []api.SavingItem, opts Options) []api.SavingItem { - result := items + wantCategory := opts.Category != "" + wantDepartment := opts.Department != "" + wantQuery := opts.Query != "" + needsFiltering := opts.BOGO || wantCategory || wantDepartment || wantQuery - if opts.BOGO { - result = where(result, func(i api.SavingItem) bool { - return ContainsIgnoreCase(i.Categories, "bogo") - }) + if !needsFiltering { + if opts.Limit > 0 && opts.Limit < len(items) { + return items[:opts.Limit] + } + return items } - if opts.Category != "" { - result = where(result, func(i api.SavingItem) bool { - return ContainsIgnoreCase(i.Categories, opts.Category) - }) + var result []api.SavingItem + if opts.Limit > 0 && opts.Limit < len(items) { + result = make([]api.SavingItem, 0, opts.Limit) + } else { + result = make([]api.SavingItem, 0, len(items)) } - if opts.Department != "" { - dept := strings.ToLower(opts.Department) - result = where(result, func(i api.SavingItem) bool { - return strings.Contains(strings.ToLower(Deref(i.Department)), dept) - }) + category := opts.Category + department := strings.ToLower(opts.Department) + query := strings.ToLower(opts.Query) + + for _, item := range items { + if opts.BOGO || wantCategory { + hasBogo := !opts.BOGO + hasCategory := !wantCategory + + for _, c := range item.Categories { + if !hasBogo && strings.EqualFold(c, "bogo") { + hasBogo = true + } + if !hasCategory && strings.EqualFold(c, category) { + hasCategory = true + } + if hasBogo && hasCategory { + break + } + } + + if !hasBogo || !hasCategory { + continue + } + } + + if wantDepartment && !strings.Contains(strings.ToLower(Deref(item.Department)), department) { + continue + } + + if wantQuery { + title := strings.ToLower(CleanText(Deref(item.Title))) + desc := strings.ToLower(CleanText(Deref(item.Description))) + if !strings.Contains(title, query) && !strings.Contains(desc, query) { + continue + } + } + + result = append(result, item) + if opts.Limit > 0 && len(result) >= opts.Limit { + break + } } - if opts.Query != "" { - q := strings.ToLower(opts.Query) - result = where(result, func(i api.SavingItem) bool { - title := strings.ToLower(CleanText(Deref(i.Title))) - desc := strings.ToLower(CleanText(Deref(i.Description))) - return strings.Contains(title, q) || strings.Contains(desc, q) - }) + if len(result) == 0 { + return nil } - - if opts.Limit > 0 && opts.Limit < len(result) { - result = result[:opts.Limit] - } - return result } @@ -76,23 +108,21 @@ func Deref(s *string) string { // CleanText unescapes HTML entities and normalizes whitespace. func CleanText(s string) string { + if !strings.ContainsAny(s, "&\r\n") { + return strings.TrimSpace(s) + } + s = html.UnescapeString(s) + if !strings.ContainsAny(s, "\r\n") { + return strings.TrimSpace(s) + } + s = strings.ReplaceAll(s, "\r\n", " ") s = strings.ReplaceAll(s, "\r", " ") s = strings.ReplaceAll(s, "\n", " ") return strings.TrimSpace(s) } -func where(items []api.SavingItem, fn func(api.SavingItem) bool) []api.SavingItem { - var result []api.SavingItem - for _, item := range items { - if fn(item) { - result = append(result, item) - } - } - return result -} - // ContainsIgnoreCase reports whether any element in slice matches val case-insensitively. func ContainsIgnoreCase(slice []string, val string) bool { for _, s := range slice { diff --git a/internal/filter/filter_equivalence_test.go b/internal/filter/filter_equivalence_test.go new file mode 100644 index 0000000..8832313 --- /dev/null +++ b/internal/filter/filter_equivalence_test.go @@ -0,0 +1,195 @@ +package filter_test + +import ( + "fmt" + "math/rand" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/tayloree/publix-deals/internal/api" + "github.com/tayloree/publix-deals/internal/filter" +) + +func referenceApply(items []api.SavingItem, opts filter.Options) []api.SavingItem { + result := items + + if opts.BOGO { + result = referenceWhere(result, func(i api.SavingItem) bool { + return filter.ContainsIgnoreCase(i.Categories, "bogo") + }) + } + + if opts.Category != "" { + result = referenceWhere(result, func(i api.SavingItem) bool { + return filter.ContainsIgnoreCase(i.Categories, opts.Category) + }) + } + + if opts.Department != "" { + dept := strings.ToLower(opts.Department) + result = referenceWhere(result, func(i api.SavingItem) bool { + return strings.Contains(strings.ToLower(filter.Deref(i.Department)), dept) + }) + } + + if opts.Query != "" { + q := strings.ToLower(opts.Query) + result = referenceWhere(result, func(i api.SavingItem) bool { + title := strings.ToLower(filter.CleanText(filter.Deref(i.Title))) + desc := strings.ToLower(filter.CleanText(filter.Deref(i.Description))) + return strings.Contains(title, q) || strings.Contains(desc, q) + }) + } + + if opts.Limit > 0 && opts.Limit < len(result) { + result = result[:opts.Limit] + } + + return result +} + +func referenceWhere(items []api.SavingItem, fn func(api.SavingItem) bool) []api.SavingItem { + var result []api.SavingItem + for _, item := range items { + if fn(item) { + result = append(result, item) + } + } + return result +} + +func randomItem(rng *rand.Rand, idx int) api.SavingItem { + makePtr := func(v string) *string { return &v } + + var title *string + if rng.Intn(4) != 0 { + title = makePtr(fmt.Sprintf("Fresh Deal %d", idx)) + } + + var desc *string + if rng.Intn(3) != 0 { + desc = makePtr(fmt.Sprintf("Weekly offer %d", idx)) + } + + var dept *string + deptOptions := []*string{ + nil, + makePtr("Grocery"), + makePtr("Produce"), + makePtr("Meat"), + makePtr("Frozen"), + } + dept = deptOptions[rng.Intn(len(deptOptions))] + + catPool := []string{"bogo", "grocery", "produce", "meat", "frozen", "dairy"} + catCount := rng.Intn(4) + cats := make([]string, 0, catCount) + for range catCount { + cats = append(cats, catPool[rng.Intn(len(catPool))]) + } + + return api.SavingItem{ + ID: fmt.Sprintf("id-%d", idx), + Title: title, + Description: desc, + Department: dept, + Categories: cats, + } +} + +func randomOptions(rng *rand.Rand) filter.Options { + categories := []string{"", "bogo", "grocery", "produce", "meat"} + departments := []string{"", "groc", "prod", "meat"} + queries := []string{"", "fresh", "offer", "deal"} + limits := []int{0, 1, 3, 5, 10} + return filter.Options{ + BOGO: rng.Intn(2) == 0, + Category: categories[rng.Intn(len(categories))], + Department: departments[rng.Intn(len(departments))], + Query: queries[rng.Intn(len(queries))], + Limit: limits[rng.Intn(len(limits))], + } +} + +func TestApply_ReferenceEquivalence(t *testing.T) { + rng := rand.New(rand.NewSource(42)) + + for caseNum := 0; caseNum < 500; caseNum++ { + itemCount := rng.Intn(60) + items := make([]api.SavingItem, 0, itemCount) + for i := range itemCount { + items = append(items, randomItem(rng, i)) + } + + opts := randomOptions(rng) + got := filter.Apply(items, opts) + want := referenceApply(items, opts) + + assert.Equal(t, want, got, "mismatch for opts=%+v case=%d", opts, caseNum) + } +} + +func BenchmarkApply_ReferenceWorkload_1kDeals(b *testing.B) { + rng := rand.New(rand.NewSource(7)) + items := make([]api.SavingItem, 0, 1000) + for i := 0; i < 1000; i++ { + items = append(items, randomItem(rng, i)) + } + opts := filter.Options{ + BOGO: true, + Category: "grocery", + Department: "groc", + Query: "deal", + Limit: 50, + } + + b.ReportAllocs() + b.ResetTimer() + for range b.N { + _ = filter.Apply(items, opts) + } +} + +func TestApply_AllocationBudget(t *testing.T) { + rng := rand.New(rand.NewSource(7)) + items := make([]api.SavingItem, 0, 1000) + for i := 0; i < 1000; i++ { + items = append(items, randomItem(rng, i)) + } + opts := filter.Options{ + BOGO: true, + Category: "grocery", + Department: "groc", + Query: "deal", + Limit: 50, + } + + allocs := testing.AllocsPerRun(100, func() { + _ = filter.Apply(items, opts) + }) + + // Guardrail for accidental reintroduction of multi-pass intermediate slices. + assert.LessOrEqual(t, allocs, 80.0) +} + +func BenchmarkApply_LegacyReference_1kDeals(b *testing.B) { + rng := rand.New(rand.NewSource(7)) + items := make([]api.SavingItem, 0, 1000) + for i := 0; i < 1000; i++ { + items = append(items, randomItem(rng, i)) + } + opts := filter.Options{ + BOGO: true, + Category: "grocery", + Department: "groc", + Query: "deal", + Limit: 50, + } + + b.ReportAllocs() + b.ResetTimer() + for range b.N { + _ = referenceApply(items, opts) + } +} diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go index b01519d..9a520d2 100644 --- a/internal/filter/filter_test.go +++ b/internal/filter/filter_test.go @@ -1,6 +1,8 @@ package filter_test import ( + "html" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -152,3 +154,47 @@ func TestCleanText(t *testing.T) { assert.Equal(t, tt.want, filter.CleanText(tt.input), "CleanText(%q)", tt.input) } } + +func legacyCleanText(s string) string { + s = html.UnescapeString(s) + s = strings.ReplaceAll(s, "\r\n", " ") + s = strings.ReplaceAll(s, "\r", " ") + s = strings.ReplaceAll(s, "\n", " ") + return strings.TrimSpace(s) +} + +func BenchmarkCleanText_Plain_New(b *testing.B) { + const input = "Simple Weekly Deal Title 123" + b.ReportAllocs() + b.ResetTimer() + for range b.N { + _ = filter.CleanText(input) + } +} + +func BenchmarkCleanText_Plain_Legacy(b *testing.B) { + const input = "Simple Weekly Deal Title 123" + b.ReportAllocs() + b.ResetTimer() + for range b.N { + _ = legacyCleanText(input) + } +} + +func BenchmarkCleanText_Escaped_New(b *testing.B) { + const input = " Eight O'Clock & Tea\r\nSpecial " + b.ReportAllocs() + b.ResetTimer() + for range b.N { + _ = filter.CleanText(input) + } +} + +func BenchmarkCleanText_Escaped_Legacy(b *testing.B) { + const input = " Eight O'Clock & Tea\r\nSpecial " + b.ReportAllocs() + b.ResetTimer() + for range b.N { + _ = legacyCleanText(input) + } +}