Rewrite filter.Apply as single-pass with early-exit and pre-allocation
Replace the multi-pass where() chain in Apply() with a single loop that evaluates all filter predicates per item and skips immediately on first mismatch. This eliminates N intermediate slice allocations (one per active filter) and avoids re-scanning the full dataset for each filter dimension. Key changes in filter.go: - Single loop with continue-on-mismatch for BOGO, category, department, and query filters — combined categories check scans item.Categories once for both BOGO and category instead of twice - Pre-allocate result slice capped at min(len(items), opts.Limit) to avoid grow-and-copy churn - Fast-path bypass when no filters are active (just apply limit) - Break early once limit is reached instead of filtering everything and truncating after - Remove the now-unused where() helper function - Add early-return fast paths to CleanText() for the common case where input contains no HTML entities or newlines, avoiding unnecessary html.UnescapeString and ReplaceAll calls Test coverage: - filter_equivalence_test.go (new): Reference implementation of the original multi-pass algorithm with 500 randomized test cases verifying behavioral equivalence. Includes allocation budget guardrail (<=80 allocs/op for 1k items) to catch accidental regression to multi-pass. Benchmarks for new vs legacy reference on identical workload. - filter_test.go: Benchmark comparisons for CleanText on plain text (fast path) vs escaped HTML (full path), new vs legacy. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -18,40 +18,72 @@ type Options struct {
|
|||||||
|
|
||||||
// Apply filters a slice of SavingItems according to the given options.
|
// Apply filters a slice of SavingItems according to the given options.
|
||||||
func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
func Apply(items []api.SavingItem, opts Options) []api.SavingItem {
|
||||||
result := items
|
wantCategory := opts.Category != ""
|
||||||
|
wantDepartment := opts.Department != ""
|
||||||
|
wantQuery := opts.Query != ""
|
||||||
|
needsFiltering := opts.BOGO || wantCategory || wantDepartment || wantQuery
|
||||||
|
|
||||||
if opts.BOGO {
|
if !needsFiltering {
|
||||||
result = where(result, func(i api.SavingItem) bool {
|
if opts.Limit > 0 && opts.Limit < len(items) {
|
||||||
return ContainsIgnoreCase(i.Categories, "bogo")
|
return items[:opts.Limit]
|
||||||
})
|
}
|
||||||
|
return items
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.Category != "" {
|
var result []api.SavingItem
|
||||||
result = where(result, func(i api.SavingItem) bool {
|
if opts.Limit > 0 && opts.Limit < len(items) {
|
||||||
return ContainsIgnoreCase(i.Categories, opts.Category)
|
result = make([]api.SavingItem, 0, opts.Limit)
|
||||||
})
|
} else {
|
||||||
|
result = make([]api.SavingItem, 0, len(items))
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.Department != "" {
|
category := opts.Category
|
||||||
dept := strings.ToLower(opts.Department)
|
department := strings.ToLower(opts.Department)
|
||||||
result = where(result, func(i api.SavingItem) bool {
|
query := strings.ToLower(opts.Query)
|
||||||
return strings.Contains(strings.ToLower(Deref(i.Department)), dept)
|
|
||||||
})
|
for _, item := range items {
|
||||||
|
if opts.BOGO || wantCategory {
|
||||||
|
hasBogo := !opts.BOGO
|
||||||
|
hasCategory := !wantCategory
|
||||||
|
|
||||||
|
for _, c := range item.Categories {
|
||||||
|
if !hasBogo && strings.EqualFold(c, "bogo") {
|
||||||
|
hasBogo = true
|
||||||
|
}
|
||||||
|
if !hasCategory && strings.EqualFold(c, category) {
|
||||||
|
hasCategory = true
|
||||||
|
}
|
||||||
|
if hasBogo && hasCategory {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.Query != "" {
|
if !hasBogo || !hasCategory {
|
||||||
q := strings.ToLower(opts.Query)
|
continue
|
||||||
result = where(result, func(i api.SavingItem) bool {
|
}
|
||||||
title := strings.ToLower(CleanText(Deref(i.Title)))
|
|
||||||
desc := strings.ToLower(CleanText(Deref(i.Description)))
|
|
||||||
return strings.Contains(title, q) || strings.Contains(desc, q)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.Limit > 0 && opts.Limit < len(result) {
|
if wantDepartment && !strings.Contains(strings.ToLower(Deref(item.Department)), department) {
|
||||||
result = result[:opts.Limit]
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if wantQuery {
|
||||||
|
title := strings.ToLower(CleanText(Deref(item.Title)))
|
||||||
|
desc := strings.ToLower(CleanText(Deref(item.Description)))
|
||||||
|
if !strings.Contains(title, query) && !strings.Contains(desc, query) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result = append(result, item)
|
||||||
|
if opts.Limit > 0 && len(result) >= opts.Limit {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(result) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,23 +108,21 @@ func Deref(s *string) string {
|
|||||||
|
|
||||||
// CleanText unescapes HTML entities and normalizes whitespace.
|
// CleanText unescapes HTML entities and normalizes whitespace.
|
||||||
func CleanText(s string) string {
|
func CleanText(s string) string {
|
||||||
|
if !strings.ContainsAny(s, "&\r\n") {
|
||||||
|
return strings.TrimSpace(s)
|
||||||
|
}
|
||||||
|
|
||||||
s = html.UnescapeString(s)
|
s = html.UnescapeString(s)
|
||||||
|
if !strings.ContainsAny(s, "\r\n") {
|
||||||
|
return strings.TrimSpace(s)
|
||||||
|
}
|
||||||
|
|
||||||
s = strings.ReplaceAll(s, "\r\n", " ")
|
s = strings.ReplaceAll(s, "\r\n", " ")
|
||||||
s = strings.ReplaceAll(s, "\r", " ")
|
s = strings.ReplaceAll(s, "\r", " ")
|
||||||
s = strings.ReplaceAll(s, "\n", " ")
|
s = strings.ReplaceAll(s, "\n", " ")
|
||||||
return strings.TrimSpace(s)
|
return strings.TrimSpace(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func where(items []api.SavingItem, fn func(api.SavingItem) bool) []api.SavingItem {
|
|
||||||
var result []api.SavingItem
|
|
||||||
for _, item := range items {
|
|
||||||
if fn(item) {
|
|
||||||
result = append(result, item)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// ContainsIgnoreCase reports whether any element in slice matches val case-insensitively.
|
// ContainsIgnoreCase reports whether any element in slice matches val case-insensitively.
|
||||||
func ContainsIgnoreCase(slice []string, val string) bool {
|
func ContainsIgnoreCase(slice []string, val string) bool {
|
||||||
for _, s := range slice {
|
for _, s := range slice {
|
||||||
|
|||||||
195
internal/filter/filter_equivalence_test.go
Normal file
195
internal/filter/filter_equivalence_test.go
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
package filter_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/tayloree/publix-deals/internal/api"
|
||||||
|
"github.com/tayloree/publix-deals/internal/filter"
|
||||||
|
)
|
||||||
|
|
||||||
|
func referenceApply(items []api.SavingItem, opts filter.Options) []api.SavingItem {
|
||||||
|
result := items
|
||||||
|
|
||||||
|
if opts.BOGO {
|
||||||
|
result = referenceWhere(result, func(i api.SavingItem) bool {
|
||||||
|
return filter.ContainsIgnoreCase(i.Categories, "bogo")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Category != "" {
|
||||||
|
result = referenceWhere(result, func(i api.SavingItem) bool {
|
||||||
|
return filter.ContainsIgnoreCase(i.Categories, opts.Category)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Department != "" {
|
||||||
|
dept := strings.ToLower(opts.Department)
|
||||||
|
result = referenceWhere(result, func(i api.SavingItem) bool {
|
||||||
|
return strings.Contains(strings.ToLower(filter.Deref(i.Department)), dept)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Query != "" {
|
||||||
|
q := strings.ToLower(opts.Query)
|
||||||
|
result = referenceWhere(result, func(i api.SavingItem) bool {
|
||||||
|
title := strings.ToLower(filter.CleanText(filter.Deref(i.Title)))
|
||||||
|
desc := strings.ToLower(filter.CleanText(filter.Deref(i.Description)))
|
||||||
|
return strings.Contains(title, q) || strings.Contains(desc, q)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Limit > 0 && opts.Limit < len(result) {
|
||||||
|
result = result[:opts.Limit]
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func referenceWhere(items []api.SavingItem, fn func(api.SavingItem) bool) []api.SavingItem {
|
||||||
|
var result []api.SavingItem
|
||||||
|
for _, item := range items {
|
||||||
|
if fn(item) {
|
||||||
|
result = append(result, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func randomItem(rng *rand.Rand, idx int) api.SavingItem {
|
||||||
|
makePtr := func(v string) *string { return &v }
|
||||||
|
|
||||||
|
var title *string
|
||||||
|
if rng.Intn(4) != 0 {
|
||||||
|
title = makePtr(fmt.Sprintf("Fresh Deal %d", idx))
|
||||||
|
}
|
||||||
|
|
||||||
|
var desc *string
|
||||||
|
if rng.Intn(3) != 0 {
|
||||||
|
desc = makePtr(fmt.Sprintf("Weekly offer %d", idx))
|
||||||
|
}
|
||||||
|
|
||||||
|
var dept *string
|
||||||
|
deptOptions := []*string{
|
||||||
|
nil,
|
||||||
|
makePtr("Grocery"),
|
||||||
|
makePtr("Produce"),
|
||||||
|
makePtr("Meat"),
|
||||||
|
makePtr("Frozen"),
|
||||||
|
}
|
||||||
|
dept = deptOptions[rng.Intn(len(deptOptions))]
|
||||||
|
|
||||||
|
catPool := []string{"bogo", "grocery", "produce", "meat", "frozen", "dairy"}
|
||||||
|
catCount := rng.Intn(4)
|
||||||
|
cats := make([]string, 0, catCount)
|
||||||
|
for range catCount {
|
||||||
|
cats = append(cats, catPool[rng.Intn(len(catPool))])
|
||||||
|
}
|
||||||
|
|
||||||
|
return api.SavingItem{
|
||||||
|
ID: fmt.Sprintf("id-%d", idx),
|
||||||
|
Title: title,
|
||||||
|
Description: desc,
|
||||||
|
Department: dept,
|
||||||
|
Categories: cats,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func randomOptions(rng *rand.Rand) filter.Options {
|
||||||
|
categories := []string{"", "bogo", "grocery", "produce", "meat"}
|
||||||
|
departments := []string{"", "groc", "prod", "meat"}
|
||||||
|
queries := []string{"", "fresh", "offer", "deal"}
|
||||||
|
limits := []int{0, 1, 3, 5, 10}
|
||||||
|
return filter.Options{
|
||||||
|
BOGO: rng.Intn(2) == 0,
|
||||||
|
Category: categories[rng.Intn(len(categories))],
|
||||||
|
Department: departments[rng.Intn(len(departments))],
|
||||||
|
Query: queries[rng.Intn(len(queries))],
|
||||||
|
Limit: limits[rng.Intn(len(limits))],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApply_ReferenceEquivalence(t *testing.T) {
|
||||||
|
rng := rand.New(rand.NewSource(42))
|
||||||
|
|
||||||
|
for caseNum := 0; caseNum < 500; caseNum++ {
|
||||||
|
itemCount := rng.Intn(60)
|
||||||
|
items := make([]api.SavingItem, 0, itemCount)
|
||||||
|
for i := range itemCount {
|
||||||
|
items = append(items, randomItem(rng, i))
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := randomOptions(rng)
|
||||||
|
got := filter.Apply(items, opts)
|
||||||
|
want := referenceApply(items, opts)
|
||||||
|
|
||||||
|
assert.Equal(t, want, got, "mismatch for opts=%+v case=%d", opts, caseNum)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkApply_ReferenceWorkload_1kDeals(b *testing.B) {
|
||||||
|
rng := rand.New(rand.NewSource(7))
|
||||||
|
items := make([]api.SavingItem, 0, 1000)
|
||||||
|
for i := 0; i < 1000; i++ {
|
||||||
|
items = append(items, randomItem(rng, i))
|
||||||
|
}
|
||||||
|
opts := filter.Options{
|
||||||
|
BOGO: true,
|
||||||
|
Category: "grocery",
|
||||||
|
Department: "groc",
|
||||||
|
Query: "deal",
|
||||||
|
Limit: 50,
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.ResetTimer()
|
||||||
|
for range b.N {
|
||||||
|
_ = filter.Apply(items, opts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApply_AllocationBudget(t *testing.T) {
|
||||||
|
rng := rand.New(rand.NewSource(7))
|
||||||
|
items := make([]api.SavingItem, 0, 1000)
|
||||||
|
for i := 0; i < 1000; i++ {
|
||||||
|
items = append(items, randomItem(rng, i))
|
||||||
|
}
|
||||||
|
opts := filter.Options{
|
||||||
|
BOGO: true,
|
||||||
|
Category: "grocery",
|
||||||
|
Department: "groc",
|
||||||
|
Query: "deal",
|
||||||
|
Limit: 50,
|
||||||
|
}
|
||||||
|
|
||||||
|
allocs := testing.AllocsPerRun(100, func() {
|
||||||
|
_ = filter.Apply(items, opts)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Guardrail for accidental reintroduction of multi-pass intermediate slices.
|
||||||
|
assert.LessOrEqual(t, allocs, 80.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkApply_LegacyReference_1kDeals(b *testing.B) {
|
||||||
|
rng := rand.New(rand.NewSource(7))
|
||||||
|
items := make([]api.SavingItem, 0, 1000)
|
||||||
|
for i := 0; i < 1000; i++ {
|
||||||
|
items = append(items, randomItem(rng, i))
|
||||||
|
}
|
||||||
|
opts := filter.Options{
|
||||||
|
BOGO: true,
|
||||||
|
Category: "grocery",
|
||||||
|
Department: "groc",
|
||||||
|
Query: "deal",
|
||||||
|
Limit: 50,
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.ResetTimer()
|
||||||
|
for range b.N {
|
||||||
|
_ = referenceApply(items, opts)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
package filter_test
|
package filter_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"html"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
@@ -152,3 +154,47 @@ func TestCleanText(t *testing.T) {
|
|||||||
assert.Equal(t, tt.want, filter.CleanText(tt.input), "CleanText(%q)", tt.input)
|
assert.Equal(t, tt.want, filter.CleanText(tt.input), "CleanText(%q)", tt.input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func legacyCleanText(s string) string {
|
||||||
|
s = html.UnescapeString(s)
|
||||||
|
s = strings.ReplaceAll(s, "\r\n", " ")
|
||||||
|
s = strings.ReplaceAll(s, "\r", " ")
|
||||||
|
s = strings.ReplaceAll(s, "\n", " ")
|
||||||
|
return strings.TrimSpace(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCleanText_Plain_New(b *testing.B) {
|
||||||
|
const input = "Simple Weekly Deal Title 123"
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.ResetTimer()
|
||||||
|
for range b.N {
|
||||||
|
_ = filter.CleanText(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCleanText_Plain_Legacy(b *testing.B) {
|
||||||
|
const input = "Simple Weekly Deal Title 123"
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.ResetTimer()
|
||||||
|
for range b.N {
|
||||||
|
_ = legacyCleanText(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCleanText_Escaped_New(b *testing.B) {
|
||||||
|
const input = " Eight O'Clock & Tea\r\nSpecial "
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.ResetTimer()
|
||||||
|
for range b.N {
|
||||||
|
_ = filter.CleanText(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCleanText_Escaped_Legacy(b *testing.B) {
|
||||||
|
const input = " Eight O'Clock & Tea\r\nSpecial "
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.ResetTimer()
|
||||||
|
for range b.N {
|
||||||
|
_ = legacyCleanText(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user