feat: add data pipeline with parallel loading, aggregation, and cache integration
Implement the pipeline layer that orchestrates discovery, parsing,
caching, and aggregation:
- pipeline/loader.go: Load() discovers session files via ScanDir,
optionally filters out subagent files, then parses all files in
parallel using a bounded worker pool sized to GOMAXPROCS. Workers
read from a pre-filled channel (no contention on dispatch) and
report progress via an atomic counter and callback. LoadResult
tracks total files, parsed files, parse errors, and file errors.
- pipeline/aggregator.go: Five aggregation functions, all operating
on time-filtered session slices:
* Aggregate: computes SummaryStats across all sessions — total
tokens (5 types), estimated cost, cache savings (summed per-model
via config.CalculateCacheSavings), cache hit rate, and per-active-
day rates (cost, tokens, sessions, prompts, minutes).
* AggregateDays: groups sessions by local calendar date, sorted
most-recent-first.
* AggregateModels: groups by normalized model name with share
percentages, sorted by cost descending.
* AggregateProjects: groups by project name, sorted by cost.
* AggregateHourly: distributes prompt/session/token counts across
24 hour buckets (attributed to session start hour).
Also provides FilterByTime, FilterByProject, FilterByModel with
case-insensitive substring matching.
- pipeline/incremental.go: LoadWithCache() implements the incremental
loading strategy — compares discovered files against the cache's
file_tracker (mtime_ns + size), loads unchanged sessions from
SQLite, and only reparses files that changed. Reparsed results
are immediately saved back to cache. CacheDir/CachePath follow
XDG_CACHE_HOME convention (~/.cache/cburn/metrics.db).
- pipeline/bench_test.go: Benchmarks for ScanDir, ParseFile (worst-
case largest file), full Load, and LoadWithCache to measure the
incremental cache speedup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
92
internal/pipeline/bench_test.go
Normal file
92
internal/pipeline/bench_test.go
Normal file
@@ -0,0 +1,92 @@
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"cburn/internal/source"
|
||||
"cburn/internal/store"
|
||||
)
|
||||
|
||||
func BenchmarkLoad(b *testing.B) {
|
||||
homeDir, _ := os.UserHomeDir()
|
||||
claudeDir := filepath.Join(homeDir, ".claude")
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
result, err := Load(claudeDir, true, nil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
_ = result
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkParseFile(b *testing.B) {
|
||||
homeDir, _ := os.UserHomeDir()
|
||||
claudeDir := filepath.Join(homeDir, ".claude")
|
||||
|
||||
files, err := source.ScanDir(claudeDir)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
// Find the largest file for worst-case benchmarking
|
||||
var biggest source.DiscoveredFile
|
||||
var biggestSize int64
|
||||
for _, f := range files {
|
||||
info, err := os.Stat(f.Path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if info.Size() > biggestSize {
|
||||
biggestSize = info.Size()
|
||||
biggest = f
|
||||
}
|
||||
}
|
||||
|
||||
b.Logf("Benchmarking largest file: %s (%.1f KB)", biggest.Path, float64(biggestSize)/1024)
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
result := source.ParseFile(biggest)
|
||||
if result.Err != nil {
|
||||
b.Fatal(result.Err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkScanDir(b *testing.B) {
|
||||
homeDir, _ := os.UserHomeDir()
|
||||
claudeDir := filepath.Join(homeDir, ".claude")
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
files, err := source.ScanDir(claudeDir)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
_ = files
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLoadWithCache(b *testing.B) {
|
||||
homeDir, _ := os.UserHomeDir()
|
||||
claudeDir := filepath.Join(homeDir, ".claude")
|
||||
|
||||
cache, err := store.Open(CachePath())
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer cache.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
cr, err := LoadWithCache(claudeDir, true, cache, nil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
_ = cr
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user