Introduce three shared helpers in session-parser.ts that both the full parser and the lightweight metadata extractor can use: - forEachJsonlLine(content, onLine): Iterates JSONL lines with consistent malformed-line handling. Skips invalid JSON lines identically to how parseSessionContent handles them. Returns parse error count for diagnostics. - countMessagesForLine(parsed): Returns the number of messages a single JSONL line expands into, using the same classification rules as the full parser. User arrays expand tool_result and text blocks; assistant arrays expand thinking, text, and tool_use. - classifyLine(parsed): Classifies a parsed line into one of 8 types (user, assistant, system, progress, summary, file_snapshot, queue, other). The internal extractMessages() function now uses these shared helpers, ensuring no behavior change while enabling the upcoming metadata extraction service to reuse the same logic. This guarantees list counts can never drift from detail-view counts, regardless of future parser changes. Test coverage includes: - Malformed line handling parity with full parser - Parse error counting for truncated/corrupted files - countMessagesForLine output matches extractMessages().length - Edge cases: empty files, progress events, array content expansion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
580 lines
19 KiB
TypeScript
580 lines
19 KiB
TypeScript
import { describe, it, expect } from "vitest";
|
|
import {
|
|
parseSessionContent,
|
|
forEachJsonlLine,
|
|
classifyLine,
|
|
countMessagesForLine,
|
|
} from "../../src/server/services/session-parser.js";
|
|
import type { RawLine } from "../../src/server/services/session-parser.js";
|
|
import fs from "fs/promises";
|
|
import path from "path";
|
|
|
|
describe("session-parser", () => {
|
|
it("parses user messages with string content", () => {
|
|
const line = JSON.stringify({
|
|
type: "user",
|
|
message: { role: "user", content: "Hello world" },
|
|
uuid: "u-1",
|
|
timestamp: "2025-10-15T10:00:00Z",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("user_message");
|
|
expect(msgs[0].content).toBe("Hello world");
|
|
expect(msgs[0].uuid).toBe("u-1");
|
|
expect(msgs[0].timestamp).toBe("2025-10-15T10:00:00Z");
|
|
});
|
|
|
|
it("parses user messages with tool_result array content", () => {
|
|
const line = JSON.stringify({
|
|
type: "user",
|
|
message: {
|
|
role: "user",
|
|
content: [
|
|
{ type: "tool_result", tool_use_id: "toolu_01", content: "File contents here" },
|
|
],
|
|
},
|
|
uuid: "u-2",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("tool_result");
|
|
expect(msgs[0].content).toBe("File contents here");
|
|
});
|
|
|
|
it("parses assistant text blocks", () => {
|
|
const line = JSON.stringify({
|
|
type: "assistant",
|
|
message: {
|
|
role: "assistant",
|
|
content: [{ type: "text", text: "Here is my response" }],
|
|
},
|
|
uuid: "a-1",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("assistant_text");
|
|
expect(msgs[0].content).toBe("Here is my response");
|
|
});
|
|
|
|
it("parses thinking blocks", () => {
|
|
const line = JSON.stringify({
|
|
type: "assistant",
|
|
message: {
|
|
role: "assistant",
|
|
content: [{ type: "thinking", thinking: "Let me think about this..." }],
|
|
},
|
|
uuid: "a-2",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("thinking");
|
|
expect(msgs[0].content).toBe("Let me think about this...");
|
|
});
|
|
|
|
it("parses tool_use blocks", () => {
|
|
const line = JSON.stringify({
|
|
type: "assistant",
|
|
message: {
|
|
role: "assistant",
|
|
content: [
|
|
{ type: "tool_use", name: "Read", input: { file_path: "/src/index.ts" } },
|
|
],
|
|
},
|
|
uuid: "a-3",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("tool_call");
|
|
expect(msgs[0].toolName).toBe("Read");
|
|
expect(msgs[0].toolInput).toContain("/src/index.ts");
|
|
});
|
|
|
|
it("parses progress messages from data field", () => {
|
|
const line = JSON.stringify({
|
|
type: "progress",
|
|
data: { type: "hook", hookEvent: "PreToolUse", hookName: "security_check" },
|
|
uuid: "p-1",
|
|
timestamp: "2025-10-15T10:00:00Z",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("hook_progress");
|
|
expect(msgs[0].content).toContain("PreToolUse");
|
|
expect(msgs[0].content).toContain("security_check");
|
|
});
|
|
|
|
it("parses file-history-snapshot messages", () => {
|
|
const line = JSON.stringify({
|
|
type: "file-history-snapshot",
|
|
messageId: "snap-1",
|
|
snapshot: { messageId: "snap-1", trackedFileBackups: [], timestamp: "2025-10-15T10:00:00Z" },
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("file_snapshot");
|
|
});
|
|
|
|
it("parses summary messages from summary field", () => {
|
|
const line = JSON.stringify({
|
|
type: "summary",
|
|
summary: "Session summary here",
|
|
leafUuid: "msg-10",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("summary");
|
|
expect(msgs[0].content).toBe("Session summary here");
|
|
});
|
|
|
|
it("skips system metadata lines (turn_duration)", () => {
|
|
const line = JSON.stringify({
|
|
type: "system",
|
|
subtype: "turn_duration",
|
|
durationMs: 50000,
|
|
uuid: "sys-1",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(0);
|
|
});
|
|
|
|
it("skips queue-operation lines", () => {
|
|
const line = JSON.stringify({
|
|
type: "queue-operation",
|
|
uuid: "qo-1",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(0);
|
|
});
|
|
|
|
it("detects system-reminder content in user messages", () => {
|
|
const line = JSON.stringify({
|
|
type: "user",
|
|
message: {
|
|
role: "user",
|
|
content: "<system-reminder>Some reminder</system-reminder>",
|
|
},
|
|
uuid: "u-sr",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("system_message");
|
|
});
|
|
|
|
it("skips malformed JSONL lines without crashing", () => {
|
|
const content = [
|
|
"not valid json",
|
|
JSON.stringify({
|
|
type: "user",
|
|
message: { role: "user", content: "Valid message" },
|
|
uuid: "u-valid",
|
|
}),
|
|
"{broken}",
|
|
].join("\n");
|
|
const msgs = parseSessionContent(content);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].content).toBe("Valid message");
|
|
});
|
|
|
|
it("returns empty array for empty files", () => {
|
|
const msgs = parseSessionContent("");
|
|
expect(msgs).toEqual([]);
|
|
});
|
|
|
|
it("uses uuid from the JSONL line, not random", () => {
|
|
const line = JSON.stringify({
|
|
type: "user",
|
|
message: { role: "user", content: "Test" },
|
|
uuid: "my-specific-uuid-123",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs[0].uuid).toBe("my-specific-uuid-123");
|
|
});
|
|
|
|
it("parses the full sample session fixture", async () => {
|
|
const fixturePath = path.join(
|
|
__dirname,
|
|
"../fixtures/sample-session.jsonl"
|
|
);
|
|
const content = await fs.readFile(fixturePath, "utf-8");
|
|
const msgs = parseSessionContent(content);
|
|
|
|
const categories = new Set(msgs.map((m) => m.category));
|
|
expect(categories.has("user_message")).toBe(true);
|
|
expect(categories.has("assistant_text")).toBe(true);
|
|
expect(categories.has("thinking")).toBe(true);
|
|
expect(categories.has("tool_call")).toBe(true);
|
|
expect(categories.has("tool_result")).toBe(true);
|
|
expect(categories.has("system_message")).toBe(true);
|
|
expect(categories.has("hook_progress")).toBe(true);
|
|
expect(categories.has("summary")).toBe(true);
|
|
expect(categories.has("file_snapshot")).toBe(true);
|
|
});
|
|
|
|
it("handles edge-cases fixture (corrupt lines)", async () => {
|
|
const fixturePath = path.join(
|
|
__dirname,
|
|
"../fixtures/edge-cases.jsonl"
|
|
);
|
|
const content = await fs.readFile(fixturePath, "utf-8");
|
|
const msgs = parseSessionContent(content);
|
|
expect(msgs).toHaveLength(2);
|
|
expect(msgs[0].category).toBe("user_message");
|
|
expect(msgs[1].category).toBe("assistant_text");
|
|
});
|
|
|
|
it("extracts toolUseId from tool_use blocks with id field", () => {
|
|
const line = JSON.stringify({
|
|
type: "assistant",
|
|
message: {
|
|
role: "assistant",
|
|
content: [
|
|
{ type: "tool_use", id: "toolu_abc123", name: "Read", input: { file_path: "/src/index.ts" } },
|
|
],
|
|
},
|
|
uuid: "a-tu-1",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("tool_call");
|
|
expect(msgs[0].toolUseId).toBe("toolu_abc123");
|
|
});
|
|
|
|
it("toolUseId is undefined when tool_use block has no id field", () => {
|
|
const line = JSON.stringify({
|
|
type: "assistant",
|
|
message: {
|
|
role: "assistant",
|
|
content: [
|
|
{ type: "tool_use", name: "Read", input: { file_path: "/src/index.ts" } },
|
|
],
|
|
},
|
|
uuid: "a-tu-2",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].toolUseId).toBeUndefined();
|
|
});
|
|
|
|
it("extracts parentToolUseId and progressSubtype from hook_progress", () => {
|
|
const line = JSON.stringify({
|
|
type: "progress",
|
|
data: { type: "hook_progress", hookEvent: "PreToolUse", hookName: "check" },
|
|
parentToolUseID: "toolu_abc123",
|
|
uuid: "p-linked",
|
|
timestamp: "2025-10-15T10:00:00Z",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs).toHaveLength(1);
|
|
expect(msgs[0].category).toBe("hook_progress");
|
|
expect(msgs[0].parentToolUseId).toBe("toolu_abc123");
|
|
expect(msgs[0].progressSubtype).toBe("hook");
|
|
});
|
|
|
|
it("derives progressSubtype 'bash' from bash_progress data type", () => {
|
|
const line = JSON.stringify({
|
|
type: "progress",
|
|
data: { type: "bash_progress", status: "running" },
|
|
parentToolUseID: "toolu_bash1",
|
|
uuid: "p-bash",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs[0].progressSubtype).toBe("bash");
|
|
expect(msgs[0].parentToolUseId).toBe("toolu_bash1");
|
|
});
|
|
|
|
it("derives progressSubtype 'mcp' from mcp_progress data type", () => {
|
|
const line = JSON.stringify({
|
|
type: "progress",
|
|
data: { type: "mcp_progress", serverName: "morph-mcp" },
|
|
parentToolUseID: "toolu_mcp1",
|
|
uuid: "p-mcp",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs[0].progressSubtype).toBe("mcp");
|
|
});
|
|
|
|
it("derives progressSubtype 'agent' from agent_progress data type", () => {
|
|
const line = JSON.stringify({
|
|
type: "progress",
|
|
data: { type: "agent_progress", status: "started" },
|
|
parentToolUseID: "toolu_agent1",
|
|
uuid: "p-agent",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs[0].progressSubtype).toBe("agent");
|
|
});
|
|
|
|
it("parentToolUseId is undefined when progress has no parentToolUseID", () => {
|
|
const line = JSON.stringify({
|
|
type: "progress",
|
|
data: { type: "hook_progress", hookEvent: "SessionStart" },
|
|
uuid: "p-orphan",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs[0].parentToolUseId).toBeUndefined();
|
|
expect(msgs[0].progressSubtype).toBe("hook");
|
|
});
|
|
|
|
it("progressSubtype defaults to 'hook' for unknown data types", () => {
|
|
const line = JSON.stringify({
|
|
type: "progress",
|
|
data: { type: "unknown_thing", status: "ok" },
|
|
uuid: "p-unknown",
|
|
});
|
|
const msgs = parseSessionContent(line);
|
|
expect(msgs[0].progressSubtype).toBe("hook");
|
|
});
|
|
|
|
describe("forEachJsonlLine", () => {
|
|
it("skips malformed JSON lines and reports parseErrors count", () => {
|
|
const content = [
|
|
"not valid json",
|
|
JSON.stringify({ type: "user", message: { role: "user", content: "Hello" } }),
|
|
"{broken}",
|
|
].join("\n");
|
|
|
|
const lines: RawLine[] = [];
|
|
const result = forEachJsonlLine(content, (parsed) => {
|
|
lines.push(parsed);
|
|
});
|
|
|
|
expect(lines).toHaveLength(1);
|
|
expect(result.parseErrors).toBe(2);
|
|
});
|
|
|
|
it("skips empty and whitespace-only lines without incrementing parseErrors", () => {
|
|
const content = [
|
|
"",
|
|
" ",
|
|
JSON.stringify({ type: "summary", summary: "test" }),
|
|
"\t",
|
|
"",
|
|
].join("\n");
|
|
|
|
const lines: RawLine[] = [];
|
|
const result = forEachJsonlLine(content, (parsed) => {
|
|
lines.push(parsed);
|
|
});
|
|
|
|
expect(lines).toHaveLength(1);
|
|
expect(result.parseErrors).toBe(0);
|
|
});
|
|
|
|
it("returns parseErrors 0 for empty content", () => {
|
|
const lines: RawLine[] = [];
|
|
const result = forEachJsonlLine("", (parsed) => {
|
|
lines.push(parsed);
|
|
});
|
|
|
|
expect(lines).toHaveLength(0);
|
|
expect(result.parseErrors).toBe(0);
|
|
});
|
|
|
|
it("processes content without trailing newline", () => {
|
|
const content = JSON.stringify({ type: "summary", summary: "no trailing newline" });
|
|
|
|
const lines: RawLine[] = [];
|
|
forEachJsonlLine(content, (parsed) => {
|
|
lines.push(parsed);
|
|
});
|
|
|
|
expect(lines).toHaveLength(1);
|
|
expect(lines[0].summary).toBe("no trailing newline");
|
|
});
|
|
|
|
it("passes correct lineIndex to callback", () => {
|
|
const content = [
|
|
JSON.stringify({ type: "user", message: { role: "user", content: "first" } }),
|
|
"",
|
|
JSON.stringify({ type: "summary", summary: "third" }),
|
|
].join("\n");
|
|
|
|
const indices: number[] = [];
|
|
forEachJsonlLine(content, (_parsed, lineIndex) => {
|
|
indices.push(lineIndex);
|
|
});
|
|
|
|
expect(indices).toEqual([0, 2]);
|
|
});
|
|
});
|
|
|
|
describe("classifyLine", () => {
|
|
it("returns correct classification for each type", () => {
|
|
expect(classifyLine({ type: "progress" })).toBe("progress");
|
|
expect(classifyLine({ type: "file-history-snapshot" })).toBe("file-history-snapshot");
|
|
expect(classifyLine({ type: "summary" })).toBe("summary");
|
|
expect(classifyLine({ type: "system" })).toBe("system");
|
|
expect(classifyLine({ type: "queue-operation" })).toBe("queue-operation");
|
|
expect(classifyLine({ type: "user", message: { role: "user" } })).toBe("user");
|
|
expect(classifyLine({ type: "assistant", message: { role: "assistant" } })).toBe("assistant");
|
|
expect(classifyLine({})).toBe("unknown");
|
|
});
|
|
|
|
it("classifies by message.role when type is missing", () => {
|
|
expect(classifyLine({ message: { role: "user" } })).toBe("user");
|
|
expect(classifyLine({ message: { role: "assistant" } })).toBe("assistant");
|
|
});
|
|
|
|
it("returns unknown for missing type and no role", () => {
|
|
expect(classifyLine({ message: {} })).toBe("unknown");
|
|
expect(classifyLine({ uuid: "orphan" })).toBe("unknown");
|
|
});
|
|
});
|
|
|
|
describe("countMessagesForLine", () => {
|
|
it("returns 1 for user string message", () => {
|
|
const line: RawLine = {
|
|
type: "user",
|
|
message: { role: "user", content: "Hello" },
|
|
};
|
|
expect(countMessagesForLine(line)).toBe(1);
|
|
});
|
|
|
|
it("matches extractMessages length for user array with tool_result and text", () => {
|
|
const line: RawLine = {
|
|
type: "user",
|
|
message: {
|
|
role: "user",
|
|
content: [
|
|
{ type: "tool_result", tool_use_id: "t1", content: "result" },
|
|
{ type: "text", text: "description" },
|
|
],
|
|
},
|
|
uuid: "u-arr",
|
|
};
|
|
const msgs = parseSessionContent(JSON.stringify(line));
|
|
expect(countMessagesForLine(line)).toBe(msgs.length);
|
|
expect(countMessagesForLine(line)).toBe(2);
|
|
});
|
|
|
|
it("matches extractMessages length for assistant array with thinking/text/tool_use", () => {
|
|
const line: RawLine = {
|
|
type: "assistant",
|
|
message: {
|
|
role: "assistant",
|
|
content: [
|
|
{ type: "thinking", thinking: "hmm" },
|
|
{ type: "text", text: "response" },
|
|
{ type: "tool_use", name: "Read", input: { file_path: "/x" } },
|
|
],
|
|
},
|
|
uuid: "a-arr",
|
|
};
|
|
const msgs = parseSessionContent(JSON.stringify(line));
|
|
expect(countMessagesForLine(line)).toBe(msgs.length);
|
|
expect(countMessagesForLine(line)).toBe(3);
|
|
});
|
|
|
|
it("returns 1 for progress/file-history-snapshot/summary", () => {
|
|
expect(countMessagesForLine({ type: "progress", data: { type: "hook" } })).toBe(1);
|
|
expect(countMessagesForLine({ type: "file-history-snapshot", snapshot: {} })).toBe(1);
|
|
expect(countMessagesForLine({ type: "summary", summary: "test" })).toBe(1);
|
|
});
|
|
|
|
it("returns 0 for system/queue-operation", () => {
|
|
expect(countMessagesForLine({ type: "system", subtype: "turn_duration" })).toBe(0);
|
|
expect(countMessagesForLine({ type: "queue-operation" })).toBe(0);
|
|
});
|
|
|
|
it("returns 0 for unknown type", () => {
|
|
expect(countMessagesForLine({})).toBe(0);
|
|
expect(countMessagesForLine({ type: "something-new" })).toBe(0);
|
|
});
|
|
|
|
it("returns 0 for user message with empty content array", () => {
|
|
const line: RawLine = {
|
|
type: "user",
|
|
message: { role: "user", content: [] },
|
|
};
|
|
expect(countMessagesForLine(line)).toBe(0);
|
|
});
|
|
|
|
it("returns 0 for user message with undefined content", () => {
|
|
const line: RawLine = {
|
|
type: "user",
|
|
message: { role: "user" },
|
|
};
|
|
expect(countMessagesForLine(line)).toBe(0);
|
|
});
|
|
|
|
it("only counts known block types in assistant arrays", () => {
|
|
const line: RawLine = {
|
|
type: "assistant",
|
|
message: {
|
|
role: "assistant",
|
|
content: [
|
|
{ type: "thinking", thinking: "hmm" },
|
|
{ type: "unknown_block" },
|
|
{ type: "text", text: "hi" },
|
|
],
|
|
},
|
|
};
|
|
expect(countMessagesForLine(line)).toBe(2);
|
|
});
|
|
|
|
it("returns 1 for assistant string content", () => {
|
|
const line: RawLine = {
|
|
type: "assistant",
|
|
message: { role: "assistant", content: "direct string" },
|
|
};
|
|
expect(countMessagesForLine(line)).toBe(1);
|
|
});
|
|
|
|
it("counts user text with system-reminder as 1 (reclassified but still counted)", () => {
|
|
const line: RawLine = {
|
|
type: "user",
|
|
message: { role: "user", content: "<system-reminder>Some reminder</system-reminder>" },
|
|
uuid: "u-sr-parity",
|
|
};
|
|
const msgs = parseSessionContent(JSON.stringify(line));
|
|
expect(countMessagesForLine(line)).toBe(msgs.length);
|
|
expect(countMessagesForLine(line)).toBe(1);
|
|
});
|
|
|
|
it("handles truncated JSON (crash mid-write)", () => {
|
|
const content = [
|
|
JSON.stringify({ type: "user", message: { role: "user", content: "ok" }, uuid: "u-ok" }),
|
|
'{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"trun',
|
|
].join("\n");
|
|
|
|
const lines: RawLine[] = [];
|
|
const result = forEachJsonlLine(content, (parsed) => {
|
|
lines.push(parsed);
|
|
});
|
|
expect(lines).toHaveLength(1);
|
|
expect(result.parseErrors).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe("parser parity: fixture integration", () => {
|
|
it("countMessagesForLine sum matches parseSessionContent on sample-session.jsonl", async () => {
|
|
const fixturePath = path.join(__dirname, "../fixtures/sample-session.jsonl");
|
|
const content = await fs.readFile(fixturePath, "utf-8");
|
|
|
|
const parsedMessages = parseSessionContent(content);
|
|
|
|
let countSum = 0;
|
|
forEachJsonlLine(content, (parsed) => {
|
|
countSum += countMessagesForLine(parsed);
|
|
});
|
|
|
|
expect(countSum).toBe(parsedMessages.length);
|
|
});
|
|
|
|
it("countMessagesForLine sum matches parseSessionContent on edge-cases.jsonl", async () => {
|
|
const fixturePath = path.join(__dirname, "../fixtures/edge-cases.jsonl");
|
|
const content = await fs.readFile(fixturePath, "utf-8");
|
|
|
|
const parsedMessages = parseSessionContent(content);
|
|
|
|
let countSum = 0;
|
|
forEachJsonlLine(content, (parsed) => {
|
|
countSum += countMessagesForLine(parsed);
|
|
});
|
|
|
|
expect(countSum).toBe(parsedMessages.length);
|
|
});
|
|
});
|
|
});
|