diff --git a/src/server/services/session-parser.ts b/src/server/services/session-parser.ts index a6e28ef..bde26b3 100644 --- a/src/server/services/session-parser.ts +++ b/src/server/services/session-parser.ts @@ -28,7 +28,7 @@ interface ContentBlock { content?: string | ContentBlock[]; } -interface RawLine { +export interface RawLine { type?: string; uuid?: string; timestamp?: string; @@ -43,6 +43,94 @@ interface RawLine { subtype?: string; } +export type LineClassification = + | "user" + | "assistant" + | "progress" + | "file-history-snapshot" + | "summary" + | "system" + | "queue-operation" + | "unknown"; + +export function forEachJsonlLine( + content: string, + onLine: (parsed: RawLine, lineIndex: number) => void +): { parseErrors: number } { + let parseErrors = 0; + const lines = content.split("\n"); + + for (let i = 0; i < lines.length; i++) { + const trimmed = lines[i].trim(); + if (!trimmed) continue; + + let parsed: RawLine; + try { + parsed = JSON.parse(trimmed); + } catch { + parseErrors++; + continue; + } + + onLine(parsed, i); + } + + return { parseErrors }; +} + +export function classifyLine(parsed: RawLine): LineClassification { + const type = parsed.type; + if (type === "progress") return "progress"; + if (type === "file-history-snapshot") return "file-history-snapshot"; + if (type === "summary") return "summary"; + if (type === "system") return "system"; + if (type === "queue-operation") return "queue-operation"; + if (type === "user" || parsed.message?.role === "user") return "user"; + if (type === "assistant" || parsed.message?.role === "assistant") return "assistant"; + return "unknown"; +} + +export function countMessagesForLine(parsed: RawLine): number { + const classification = classifyLine(parsed); + + switch (classification) { + case "progress": + case "file-history-snapshot": + case "summary": + return 1; + + case "system": + case "queue-operation": + case "unknown": + return 0; + + case "user": { + const content = parsed.message?.content; + if (content === undefined || content === null) return 0; + if (typeof content === "string") return 1; + if (Array.isArray(content)) { + return content.filter( + (b: ContentBlock) => b.type === "tool_result" || b.type === "text" + ).length; + } + return 0; + } + + case "assistant": { + const content = parsed.message?.content; + if (content === undefined || content === null) return 0; + if (typeof content === "string") return 1; + if (Array.isArray(content)) { + return content.filter( + (b: ContentBlock) => + b.type === "thinking" || b.type === "text" || b.type === "tool_use" + ).length; + } + return 0; + } + } +} + export async function parseSession( filePath: string ): Promise { @@ -58,31 +146,23 @@ export async function parseSession( export function parseSessionContent(content: string): ParsedMessage[] { const messages: ParsedMessage[] = []; - const lines = content.split("\n").filter((l) => l.trim()); - for (let i = 0; i < lines.length; i++) { - let parsed: RawLine; - try { - parsed = JSON.parse(lines[i]); - } catch { - continue; // Skip malformed lines - } - - const extracted = extractMessages(parsed, i); + forEachJsonlLine(content, (parsed, lineIndex) => { + const extracted = extractMessages(parsed, lineIndex); messages.push(...extracted); - } + }); return messages; } function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] { const messages: ParsedMessage[] = []; - const type = raw.type; + const classification = classifyLine(raw); const uuid = raw.uuid || `generated-${rawIndex}`; const timestamp = raw.timestamp; // Progress/hook messages - content is in `data`, not `content` - if (type === "progress") { + if (classification === "progress") { const data = raw.data; const progressText = data ? formatProgressData(data) @@ -102,7 +182,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] { } // File history snapshot - if (type === "file-history-snapshot") { + if (classification === "file-history-snapshot") { messages.push({ uuid, category: "file_snapshot", @@ -114,7 +194,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] { } // Summary message - text is in `summary` field, not `content` - if (type === "summary") { + if (classification === "summary") { messages.push({ uuid, category: "summary", @@ -126,7 +206,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] { } // System metadata (turn_duration etc.) - skip, not user-facing - if (type === "system" || type === "queue-operation") { + if (classification === "system" || classification === "queue-operation") { return messages; } @@ -134,7 +214,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] { const role = raw.message?.role; const content = raw.message?.content; - if ((type === "user" || role === "user") && content !== undefined) { + if (classification === "user" && content !== undefined) { if (typeof content === "string") { const category = detectSystemReminder(content) ? "system_message" @@ -183,7 +263,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] { return messages; } - if ((type === "assistant" || role === "assistant") && content !== undefined) { + if (classification === "assistant" && content !== undefined) { if (typeof content === "string") { messages.push({ uuid, diff --git a/tests/unit/session-parser.test.ts b/tests/unit/session-parser.test.ts index ab6114a..3faaf2b 100644 --- a/tests/unit/session-parser.test.ts +++ b/tests/unit/session-parser.test.ts @@ -1,5 +1,11 @@ import { describe, it, expect } from "vitest"; -import { parseSessionContent } from "../../src/server/services/session-parser.js"; +import { + parseSessionContent, + forEachJsonlLine, + classifyLine, + countMessagesForLine, +} from "../../src/server/services/session-parser.js"; +import type { RawLine } from "../../src/server/services/session-parser.js"; import fs from "fs/promises"; import path from "path"; @@ -319,4 +325,255 @@ describe("session-parser", () => { const msgs = parseSessionContent(line); expect(msgs[0].progressSubtype).toBe("hook"); }); + + describe("forEachJsonlLine", () => { + it("skips malformed JSON lines and reports parseErrors count", () => { + const content = [ + "not valid json", + JSON.stringify({ type: "user", message: { role: "user", content: "Hello" } }), + "{broken}", + ].join("\n"); + + const lines: RawLine[] = []; + const result = forEachJsonlLine(content, (parsed) => { + lines.push(parsed); + }); + + expect(lines).toHaveLength(1); + expect(result.parseErrors).toBe(2); + }); + + it("skips empty and whitespace-only lines without incrementing parseErrors", () => { + const content = [ + "", + " ", + JSON.stringify({ type: "summary", summary: "test" }), + "\t", + "", + ].join("\n"); + + const lines: RawLine[] = []; + const result = forEachJsonlLine(content, (parsed) => { + lines.push(parsed); + }); + + expect(lines).toHaveLength(1); + expect(result.parseErrors).toBe(0); + }); + + it("returns parseErrors 0 for empty content", () => { + const lines: RawLine[] = []; + const result = forEachJsonlLine("", (parsed) => { + lines.push(parsed); + }); + + expect(lines).toHaveLength(0); + expect(result.parseErrors).toBe(0); + }); + + it("processes content without trailing newline", () => { + const content = JSON.stringify({ type: "summary", summary: "no trailing newline" }); + + const lines: RawLine[] = []; + forEachJsonlLine(content, (parsed) => { + lines.push(parsed); + }); + + expect(lines).toHaveLength(1); + expect(lines[0].summary).toBe("no trailing newline"); + }); + + it("passes correct lineIndex to callback", () => { + const content = [ + JSON.stringify({ type: "user", message: { role: "user", content: "first" } }), + "", + JSON.stringify({ type: "summary", summary: "third" }), + ].join("\n"); + + const indices: number[] = []; + forEachJsonlLine(content, (_parsed, lineIndex) => { + indices.push(lineIndex); + }); + + expect(indices).toEqual([0, 2]); + }); + }); + + describe("classifyLine", () => { + it("returns correct classification for each type", () => { + expect(classifyLine({ type: "progress" })).toBe("progress"); + expect(classifyLine({ type: "file-history-snapshot" })).toBe("file-history-snapshot"); + expect(classifyLine({ type: "summary" })).toBe("summary"); + expect(classifyLine({ type: "system" })).toBe("system"); + expect(classifyLine({ type: "queue-operation" })).toBe("queue-operation"); + expect(classifyLine({ type: "user", message: { role: "user" } })).toBe("user"); + expect(classifyLine({ type: "assistant", message: { role: "assistant" } })).toBe("assistant"); + expect(classifyLine({})).toBe("unknown"); + }); + + it("classifies by message.role when type is missing", () => { + expect(classifyLine({ message: { role: "user" } })).toBe("user"); + expect(classifyLine({ message: { role: "assistant" } })).toBe("assistant"); + }); + + it("returns unknown for missing type and no role", () => { + expect(classifyLine({ message: {} })).toBe("unknown"); + expect(classifyLine({ uuid: "orphan" })).toBe("unknown"); + }); + }); + + describe("countMessagesForLine", () => { + it("returns 1 for user string message", () => { + const line: RawLine = { + type: "user", + message: { role: "user", content: "Hello" }, + }; + expect(countMessagesForLine(line)).toBe(1); + }); + + it("matches extractMessages length for user array with tool_result and text", () => { + const line: RawLine = { + type: "user", + message: { + role: "user", + content: [ + { type: "tool_result", tool_use_id: "t1", content: "result" }, + { type: "text", text: "description" }, + ], + }, + uuid: "u-arr", + }; + const msgs = parseSessionContent(JSON.stringify(line)); + expect(countMessagesForLine(line)).toBe(msgs.length); + expect(countMessagesForLine(line)).toBe(2); + }); + + it("matches extractMessages length for assistant array with thinking/text/tool_use", () => { + const line: RawLine = { + type: "assistant", + message: { + role: "assistant", + content: [ + { type: "thinking", thinking: "hmm" }, + { type: "text", text: "response" }, + { type: "tool_use", name: "Read", input: { file_path: "/x" } }, + ], + }, + uuid: "a-arr", + }; + const msgs = parseSessionContent(JSON.stringify(line)); + expect(countMessagesForLine(line)).toBe(msgs.length); + expect(countMessagesForLine(line)).toBe(3); + }); + + it("returns 1 for progress/file-history-snapshot/summary", () => { + expect(countMessagesForLine({ type: "progress", data: { type: "hook" } })).toBe(1); + expect(countMessagesForLine({ type: "file-history-snapshot", snapshot: {} })).toBe(1); + expect(countMessagesForLine({ type: "summary", summary: "test" })).toBe(1); + }); + + it("returns 0 for system/queue-operation", () => { + expect(countMessagesForLine({ type: "system", subtype: "turn_duration" })).toBe(0); + expect(countMessagesForLine({ type: "queue-operation" })).toBe(0); + }); + + it("returns 0 for unknown type", () => { + expect(countMessagesForLine({})).toBe(0); + expect(countMessagesForLine({ type: "something-new" })).toBe(0); + }); + + it("returns 0 for user message with empty content array", () => { + const line: RawLine = { + type: "user", + message: { role: "user", content: [] }, + }; + expect(countMessagesForLine(line)).toBe(0); + }); + + it("returns 0 for user message with undefined content", () => { + const line: RawLine = { + type: "user", + message: { role: "user" }, + }; + expect(countMessagesForLine(line)).toBe(0); + }); + + it("only counts known block types in assistant arrays", () => { + const line: RawLine = { + type: "assistant", + message: { + role: "assistant", + content: [ + { type: "thinking", thinking: "hmm" }, + { type: "unknown_block" }, + { type: "text", text: "hi" }, + ], + }, + }; + expect(countMessagesForLine(line)).toBe(2); + }); + + it("returns 1 for assistant string content", () => { + const line: RawLine = { + type: "assistant", + message: { role: "assistant", content: "direct string" }, + }; + expect(countMessagesForLine(line)).toBe(1); + }); + + it("counts user text with system-reminder as 1 (reclassified but still counted)", () => { + const line: RawLine = { + type: "user", + message: { role: "user", content: "Some reminder" }, + uuid: "u-sr-parity", + }; + const msgs = parseSessionContent(JSON.stringify(line)); + expect(countMessagesForLine(line)).toBe(msgs.length); + expect(countMessagesForLine(line)).toBe(1); + }); + + it("handles truncated JSON (crash mid-write)", () => { + const content = [ + JSON.stringify({ type: "user", message: { role: "user", content: "ok" }, uuid: "u-ok" }), + '{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"trun', + ].join("\n"); + + const lines: RawLine[] = []; + const result = forEachJsonlLine(content, (parsed) => { + lines.push(parsed); + }); + expect(lines).toHaveLength(1); + expect(result.parseErrors).toBe(1); + }); + }); + + describe("parser parity: fixture integration", () => { + it("countMessagesForLine sum matches parseSessionContent on sample-session.jsonl", async () => { + const fixturePath = path.join(__dirname, "../fixtures/sample-session.jsonl"); + const content = await fs.readFile(fixturePath, "utf-8"); + + const parsedMessages = parseSessionContent(content); + + let countSum = 0; + forEachJsonlLine(content, (parsed) => { + countSum += countMessagesForLine(parsed); + }); + + expect(countSum).toBe(parsedMessages.length); + }); + + it("countMessagesForLine sum matches parseSessionContent on edge-cases.jsonl", async () => { + const fixturePath = path.join(__dirname, "../fixtures/edge-cases.jsonl"); + const content = await fs.readFile(fixturePath, "utf-8"); + + const parsedMessages = parseSessionContent(content); + + let countSum = 0; + forEachJsonlLine(content, (parsed) => { + countSum += countMessagesForLine(parsed); + }); + + expect(countSum).toBe(parsedMessages.length); + }); + }); });