Files
cburn/internal/pipeline/bench_test.go
teernisse 24454247a3 feat: add data pipeline with parallel loading, aggregation, and cache integration
Implement the pipeline layer that orchestrates discovery, parsing,
caching, and aggregation:

- pipeline/loader.go: Load() discovers session files via ScanDir,
  optionally filters out subagent files, then parses all files in
  parallel using a bounded worker pool sized to GOMAXPROCS. Workers
  read from a pre-filled channel (no contention on dispatch) and
  report progress via an atomic counter and callback. LoadResult
  tracks total files, parsed files, parse errors, and file errors.

- pipeline/aggregator.go: Five aggregation functions, all operating
  on time-filtered session slices:

  * Aggregate: computes SummaryStats across all sessions — total
    tokens (5 types), estimated cost, cache savings (summed per-model
    via config.CalculateCacheSavings), cache hit rate, and per-active-
    day rates (cost, tokens, sessions, prompts, minutes).

  * AggregateDays: groups sessions by local calendar date, sorted
    most-recent-first.

  * AggregateModels: groups by normalized model name with share
    percentages, sorted by cost descending.

  * AggregateProjects: groups by project name, sorted by cost.

  * AggregateHourly: distributes prompt/session/token counts across
    24 hour buckets (attributed to session start hour).

  Also provides FilterByTime, FilterByProject, FilterByModel with
  case-insensitive substring matching.

- pipeline/incremental.go: LoadWithCache() implements the incremental
  loading strategy — compares discovered files against the cache's
  file_tracker (mtime_ns + size), loads unchanged sessions from
  SQLite, and only reparses files that changed. Reparsed results
  are immediately saved back to cache. CacheDir/CachePath follow
  XDG_CACHE_HOME convention (~/.cache/cburn/metrics.db).

- pipeline/bench_test.go: Benchmarks for ScanDir, ParseFile (worst-
  case largest file), full Load, and LoadWithCache to measure the
  incremental cache speedup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 13:01:56 -05:00

93 lines
1.7 KiB
Go

package pipeline
import (
"os"
"path/filepath"
"testing"
"cburn/internal/source"
"cburn/internal/store"
)
func BenchmarkLoad(b *testing.B) {
homeDir, _ := os.UserHomeDir()
claudeDir := filepath.Join(homeDir, ".claude")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result, err := Load(claudeDir, true, nil)
if err != nil {
b.Fatal(err)
}
_ = result
}
}
func BenchmarkParseFile(b *testing.B) {
homeDir, _ := os.UserHomeDir()
claudeDir := filepath.Join(homeDir, ".claude")
files, err := source.ScanDir(claudeDir)
if err != nil {
b.Fatal(err)
}
// Find the largest file for worst-case benchmarking
var biggest source.DiscoveredFile
var biggestSize int64
for _, f := range files {
info, err := os.Stat(f.Path)
if err != nil {
continue
}
if info.Size() > biggestSize {
biggestSize = info.Size()
biggest = f
}
}
b.Logf("Benchmarking largest file: %s (%.1f KB)", biggest.Path, float64(biggestSize)/1024)
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := source.ParseFile(biggest)
if result.Err != nil {
b.Fatal(result.Err)
}
}
}
func BenchmarkScanDir(b *testing.B) {
homeDir, _ := os.UserHomeDir()
claudeDir := filepath.Join(homeDir, ".claude")
b.ResetTimer()
for i := 0; i < b.N; i++ {
files, err := source.ScanDir(claudeDir)
if err != nil {
b.Fatal(err)
}
_ = files
}
}
func BenchmarkLoadWithCache(b *testing.B) {
homeDir, _ := os.UserHomeDir()
claudeDir := filepath.Join(homeDir, ".claude")
cache, err := store.Open(CachePath())
if err != nil {
b.Fatal(err)
}
defer cache.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
cr, err := LoadWithCache(claudeDir, true, cache, nil)
if err != nil {
b.Fatal(err)
}
_ = cr
}
}