Implement the bottom of the data pipeline — discovery and parsing of
Claude Code session files:
- source/types.go: Raw JSON deserialization types (RawEntry,
RawMessage, RawUsage, CacheCreation) matching the Claude Code
JSONL schema. DiscoveredFile carries file metadata including
decoded project name, session ID, and subagent relationship info.
- source/scanner.go: ScanDir walks ~/.claude/projects/ to discover
all .jsonl session files. Detects subagent files by the
<project>/<session>/subagents/agent-<id>.jsonl path pattern and
links them to parent sessions. decodeProjectName reverses Claude
Code's path-encoding convention (/-delimited path segments joined
with hyphens) by scanning for known parent markers (projects,
repos, src, code, workspace, dev) and extracting the project name
after the last marker.
- source/parser.go: ParseFile processes a single JSONL session file.
Uses a hybrid parsing strategy for performance:
* "user" and "system" entries: byte-level field extraction for
timestamps, cwd, and turn_duration (avoids JSON allocation).
extractTopLevelType tracks brace depth and string boundaries to
find only the top-level "type" field, early-exiting ~400 bytes
in for O(1) per line cost regardless of line length.
* "assistant" entries: full JSON unmarshal to extract token usage,
model name, and cost data.
Deduplicates API calls by message.id (keeping the last entry per
ID, which holds the final billed usage). Computes per-model cost
breakdown using config.CalculateCost and aggregates cache hit rate.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
122 lines
3.1 KiB
Go
122 lines
3.1 KiB
Go
package source
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
// ScanDir walks the Claude projects directory and discovers all JSONL session files.
|
|
// It returns discovered files categorized as main sessions or subagent sessions.
|
|
func ScanDir(claudeDir string) ([]DiscoveredFile, error) {
|
|
projectsDir := filepath.Join(claudeDir, "projects")
|
|
|
|
info, err := os.Stat(projectsDir)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
if !info.IsDir() {
|
|
return nil, nil
|
|
}
|
|
|
|
var files []DiscoveredFile
|
|
|
|
err = filepath.WalkDir(projectsDir, func(path string, d os.DirEntry, err error) error {
|
|
if err != nil {
|
|
return nil // skip unreadable entries
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
if filepath.Ext(path) != ".jsonl" {
|
|
return nil
|
|
}
|
|
|
|
// Skip sessions-index.json and other non-session files
|
|
name := d.Name()
|
|
if !strings.HasSuffix(name, ".jsonl") {
|
|
return nil
|
|
}
|
|
|
|
rel, _ := filepath.Rel(projectsDir, path)
|
|
parts := strings.Split(rel, string(filepath.Separator))
|
|
if len(parts) < 2 {
|
|
return nil
|
|
}
|
|
|
|
projectDir := parts[0]
|
|
project := decodeProjectName(projectDir)
|
|
|
|
df := DiscoveredFile{
|
|
Path: path,
|
|
Project: project,
|
|
ProjectDir: projectDir,
|
|
}
|
|
|
|
// Determine if this is a subagent file
|
|
// Pattern: <project>/<session-uuid>/subagents/agent-<id>.jsonl
|
|
if len(parts) >= 4 && parts[2] == "subagents" {
|
|
df.IsSubagent = true
|
|
df.ParentSession = parts[1]
|
|
// Use parent+agent to avoid collisions across sessions
|
|
df.SessionID = parts[1] + "/" + strings.TrimSuffix(name, ".jsonl")
|
|
} else {
|
|
// Main session: <project>/<session-uuid>.jsonl
|
|
df.SessionID = strings.TrimSuffix(name, ".jsonl")
|
|
}
|
|
|
|
files = append(files, df)
|
|
return nil
|
|
})
|
|
|
|
return files, err
|
|
}
|
|
|
|
// decodeProjectName extracts a human-readable project name from the encoded directory name.
|
|
// Claude Code encodes absolute paths by replacing "/" with "-", so:
|
|
// "-Users-tayloreernisse-projects-gitlore" -> "gitlore"
|
|
// "-Users-tayloreernisse-projects-my-cool-project" -> "my-cool-project"
|
|
//
|
|
// We find the last known path component ("projects", "repos", "src", "code", "home")
|
|
// and take everything after it. Falls back to the last non-empty segment.
|
|
func decodeProjectName(dirName string) string {
|
|
parts := strings.Split(dirName, "-")
|
|
|
|
// Known parent directory names that commonly precede the project name
|
|
knownParents := map[string]bool{
|
|
"projects": true, "repos": true, "src": true,
|
|
"code": true, "workspace": true, "dev": true,
|
|
}
|
|
|
|
// Scan for the last known parent marker and join everything after it
|
|
for i := len(parts) - 2; i >= 0; i-- {
|
|
if knownParents[strings.ToLower(parts[i])] {
|
|
name := strings.Join(parts[i+1:], "-")
|
|
if name != "" {
|
|
return name
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback: return the last non-empty segment
|
|
for i := len(parts) - 1; i >= 0; i-- {
|
|
if parts[i] != "" {
|
|
return parts[i]
|
|
}
|
|
}
|
|
|
|
return dirName
|
|
}
|
|
|
|
// CountProjects returns the number of unique projects in a set of discovered files.
|
|
func CountProjects(files []DiscoveredFile) int {
|
|
seen := make(map[string]struct{})
|
|
for _, f := range files {
|
|
seen[f.Project] = struct{}{}
|
|
}
|
|
return len(seen)
|
|
}
|