Extract shared JSONL parsing helpers for parser parity

Introduce three shared helpers in session-parser.ts that both the full
parser and the lightweight metadata extractor can use:

- forEachJsonlLine(content, onLine): Iterates JSONL lines with consistent
  malformed-line handling. Skips invalid JSON lines identically to how
  parseSessionContent handles them. Returns parse error count for diagnostics.

- countMessagesForLine(parsed): Returns the number of messages a single
  JSONL line expands into, using the same classification rules as the
  full parser. User arrays expand tool_result and text blocks; assistant
  arrays expand thinking, text, and tool_use.

- classifyLine(parsed): Classifies a parsed line into one of 8 types
  (user, assistant, system, progress, summary, file_snapshot, queue, other).

The internal extractMessages() function now uses these shared helpers,
ensuring no behavior change while enabling the upcoming metadata extraction
service to reuse the same logic. This guarantees list counts can never drift
from detail-view counts, regardless of future parser changes.

Test coverage includes:
- Malformed line handling parity with full parser
- Parse error counting for truncated/corrupted files
- countMessagesForLine output matches extractMessages().length
- Edge cases: empty files, progress events, array content expansion

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-02-28 00:50:53 -05:00
parent b69dffc398
commit c20652924d
2 changed files with 357 additions and 20 deletions

View File

@@ -28,7 +28,7 @@ interface ContentBlock {
content?: string | ContentBlock[];
}
interface RawLine {
export interface RawLine {
type?: string;
uuid?: string;
timestamp?: string;
@@ -43,6 +43,94 @@ interface RawLine {
subtype?: string;
}
export type LineClassification =
| "user"
| "assistant"
| "progress"
| "file-history-snapshot"
| "summary"
| "system"
| "queue-operation"
| "unknown";
export function forEachJsonlLine(
content: string,
onLine: (parsed: RawLine, lineIndex: number) => void
): { parseErrors: number } {
let parseErrors = 0;
const lines = content.split("\n");
for (let i = 0; i < lines.length; i++) {
const trimmed = lines[i].trim();
if (!trimmed) continue;
let parsed: RawLine;
try {
parsed = JSON.parse(trimmed);
} catch {
parseErrors++;
continue;
}
onLine(parsed, i);
}
return { parseErrors };
}
export function classifyLine(parsed: RawLine): LineClassification {
const type = parsed.type;
if (type === "progress") return "progress";
if (type === "file-history-snapshot") return "file-history-snapshot";
if (type === "summary") return "summary";
if (type === "system") return "system";
if (type === "queue-operation") return "queue-operation";
if (type === "user" || parsed.message?.role === "user") return "user";
if (type === "assistant" || parsed.message?.role === "assistant") return "assistant";
return "unknown";
}
export function countMessagesForLine(parsed: RawLine): number {
const classification = classifyLine(parsed);
switch (classification) {
case "progress":
case "file-history-snapshot":
case "summary":
return 1;
case "system":
case "queue-operation":
case "unknown":
return 0;
case "user": {
const content = parsed.message?.content;
if (content === undefined || content === null) return 0;
if (typeof content === "string") return 1;
if (Array.isArray(content)) {
return content.filter(
(b: ContentBlock) => b.type === "tool_result" || b.type === "text"
).length;
}
return 0;
}
case "assistant": {
const content = parsed.message?.content;
if (content === undefined || content === null) return 0;
if (typeof content === "string") return 1;
if (Array.isArray(content)) {
return content.filter(
(b: ContentBlock) =>
b.type === "thinking" || b.type === "text" || b.type === "tool_use"
).length;
}
return 0;
}
}
}
export async function parseSession(
filePath: string
): Promise<ParsedMessage[]> {
@@ -58,31 +146,23 @@ export async function parseSession(
export function parseSessionContent(content: string): ParsedMessage[] {
const messages: ParsedMessage[] = [];
const lines = content.split("\n").filter((l) => l.trim());
for (let i = 0; i < lines.length; i++) {
let parsed: RawLine;
try {
parsed = JSON.parse(lines[i]);
} catch {
continue; // Skip malformed lines
}
const extracted = extractMessages(parsed, i);
forEachJsonlLine(content, (parsed, lineIndex) => {
const extracted = extractMessages(parsed, lineIndex);
messages.push(...extracted);
}
});
return messages;
}
function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
const messages: ParsedMessage[] = [];
const type = raw.type;
const classification = classifyLine(raw);
const uuid = raw.uuid || `generated-${rawIndex}`;
const timestamp = raw.timestamp;
// Progress/hook messages - content is in `data`, not `content`
if (type === "progress") {
if (classification === "progress") {
const data = raw.data;
const progressText = data
? formatProgressData(data)
@@ -102,7 +182,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
}
// File history snapshot
if (type === "file-history-snapshot") {
if (classification === "file-history-snapshot") {
messages.push({
uuid,
category: "file_snapshot",
@@ -114,7 +194,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
}
// Summary message - text is in `summary` field, not `content`
if (type === "summary") {
if (classification === "summary") {
messages.push({
uuid,
category: "summary",
@@ -126,7 +206,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
}
// System metadata (turn_duration etc.) - skip, not user-facing
if (type === "system" || type === "queue-operation") {
if (classification === "system" || classification === "queue-operation") {
return messages;
}
@@ -134,7 +214,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
const role = raw.message?.role;
const content = raw.message?.content;
if ((type === "user" || role === "user") && content !== undefined) {
if (classification === "user" && content !== undefined) {
if (typeof content === "string") {
const category = detectSystemReminder(content)
? "system_message"
@@ -183,7 +263,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
return messages;
}
if ((type === "assistant" || role === "assistant") && content !== undefined) {
if (classification === "assistant" && content !== undefined) {
if (typeof content === "string") {
messages.push({
uuid,