Extract shared JSONL parsing helpers for parser parity
Introduce three shared helpers in session-parser.ts that both the full parser and the lightweight metadata extractor can use: - forEachJsonlLine(content, onLine): Iterates JSONL lines with consistent malformed-line handling. Skips invalid JSON lines identically to how parseSessionContent handles them. Returns parse error count for diagnostics. - countMessagesForLine(parsed): Returns the number of messages a single JSONL line expands into, using the same classification rules as the full parser. User arrays expand tool_result and text blocks; assistant arrays expand thinking, text, and tool_use. - classifyLine(parsed): Classifies a parsed line into one of 8 types (user, assistant, system, progress, summary, file_snapshot, queue, other). The internal extractMessages() function now uses these shared helpers, ensuring no behavior change while enabling the upcoming metadata extraction service to reuse the same logic. This guarantees list counts can never drift from detail-view counts, regardless of future parser changes. Test coverage includes: - Malformed line handling parity with full parser - Parse error counting for truncated/corrupted files - countMessagesForLine output matches extractMessages().length - Edge cases: empty files, progress events, array content expansion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -28,7 +28,7 @@ interface ContentBlock {
|
||||
content?: string | ContentBlock[];
|
||||
}
|
||||
|
||||
interface RawLine {
|
||||
export interface RawLine {
|
||||
type?: string;
|
||||
uuid?: string;
|
||||
timestamp?: string;
|
||||
@@ -43,6 +43,94 @@ interface RawLine {
|
||||
subtype?: string;
|
||||
}
|
||||
|
||||
export type LineClassification =
|
||||
| "user"
|
||||
| "assistant"
|
||||
| "progress"
|
||||
| "file-history-snapshot"
|
||||
| "summary"
|
||||
| "system"
|
||||
| "queue-operation"
|
||||
| "unknown";
|
||||
|
||||
export function forEachJsonlLine(
|
||||
content: string,
|
||||
onLine: (parsed: RawLine, lineIndex: number) => void
|
||||
): { parseErrors: number } {
|
||||
let parseErrors = 0;
|
||||
const lines = content.split("\n");
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const trimmed = lines[i].trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
let parsed: RawLine;
|
||||
try {
|
||||
parsed = JSON.parse(trimmed);
|
||||
} catch {
|
||||
parseErrors++;
|
||||
continue;
|
||||
}
|
||||
|
||||
onLine(parsed, i);
|
||||
}
|
||||
|
||||
return { parseErrors };
|
||||
}
|
||||
|
||||
export function classifyLine(parsed: RawLine): LineClassification {
|
||||
const type = parsed.type;
|
||||
if (type === "progress") return "progress";
|
||||
if (type === "file-history-snapshot") return "file-history-snapshot";
|
||||
if (type === "summary") return "summary";
|
||||
if (type === "system") return "system";
|
||||
if (type === "queue-operation") return "queue-operation";
|
||||
if (type === "user" || parsed.message?.role === "user") return "user";
|
||||
if (type === "assistant" || parsed.message?.role === "assistant") return "assistant";
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
export function countMessagesForLine(parsed: RawLine): number {
|
||||
const classification = classifyLine(parsed);
|
||||
|
||||
switch (classification) {
|
||||
case "progress":
|
||||
case "file-history-snapshot":
|
||||
case "summary":
|
||||
return 1;
|
||||
|
||||
case "system":
|
||||
case "queue-operation":
|
||||
case "unknown":
|
||||
return 0;
|
||||
|
||||
case "user": {
|
||||
const content = parsed.message?.content;
|
||||
if (content === undefined || content === null) return 0;
|
||||
if (typeof content === "string") return 1;
|
||||
if (Array.isArray(content)) {
|
||||
return content.filter(
|
||||
(b: ContentBlock) => b.type === "tool_result" || b.type === "text"
|
||||
).length;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
case "assistant": {
|
||||
const content = parsed.message?.content;
|
||||
if (content === undefined || content === null) return 0;
|
||||
if (typeof content === "string") return 1;
|
||||
if (Array.isArray(content)) {
|
||||
return content.filter(
|
||||
(b: ContentBlock) =>
|
||||
b.type === "thinking" || b.type === "text" || b.type === "tool_use"
|
||||
).length;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function parseSession(
|
||||
filePath: string
|
||||
): Promise<ParsedMessage[]> {
|
||||
@@ -58,31 +146,23 @@ export async function parseSession(
|
||||
|
||||
export function parseSessionContent(content: string): ParsedMessage[] {
|
||||
const messages: ParsedMessage[] = [];
|
||||
const lines = content.split("\n").filter((l) => l.trim());
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
let parsed: RawLine;
|
||||
try {
|
||||
parsed = JSON.parse(lines[i]);
|
||||
} catch {
|
||||
continue; // Skip malformed lines
|
||||
}
|
||||
|
||||
const extracted = extractMessages(parsed, i);
|
||||
forEachJsonlLine(content, (parsed, lineIndex) => {
|
||||
const extracted = extractMessages(parsed, lineIndex);
|
||||
messages.push(...extracted);
|
||||
}
|
||||
});
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
|
||||
const messages: ParsedMessage[] = [];
|
||||
const type = raw.type;
|
||||
const classification = classifyLine(raw);
|
||||
const uuid = raw.uuid || `generated-${rawIndex}`;
|
||||
const timestamp = raw.timestamp;
|
||||
|
||||
// Progress/hook messages - content is in `data`, not `content`
|
||||
if (type === "progress") {
|
||||
if (classification === "progress") {
|
||||
const data = raw.data;
|
||||
const progressText = data
|
||||
? formatProgressData(data)
|
||||
@@ -102,7 +182,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
|
||||
}
|
||||
|
||||
// File history snapshot
|
||||
if (type === "file-history-snapshot") {
|
||||
if (classification === "file-history-snapshot") {
|
||||
messages.push({
|
||||
uuid,
|
||||
category: "file_snapshot",
|
||||
@@ -114,7 +194,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
|
||||
}
|
||||
|
||||
// Summary message - text is in `summary` field, not `content`
|
||||
if (type === "summary") {
|
||||
if (classification === "summary") {
|
||||
messages.push({
|
||||
uuid,
|
||||
category: "summary",
|
||||
@@ -126,7 +206,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
|
||||
}
|
||||
|
||||
// System metadata (turn_duration etc.) - skip, not user-facing
|
||||
if (type === "system" || type === "queue-operation") {
|
||||
if (classification === "system" || classification === "queue-operation") {
|
||||
return messages;
|
||||
}
|
||||
|
||||
@@ -134,7 +214,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
|
||||
const role = raw.message?.role;
|
||||
const content = raw.message?.content;
|
||||
|
||||
if ((type === "user" || role === "user") && content !== undefined) {
|
||||
if (classification === "user" && content !== undefined) {
|
||||
if (typeof content === "string") {
|
||||
const category = detectSystemReminder(content)
|
||||
? "system_message"
|
||||
@@ -183,7 +263,7 @@ function extractMessages(raw: RawLine, rawIndex: number): ParsedMessage[] {
|
||||
return messages;
|
||||
}
|
||||
|
||||
if ((type === "assistant" || role === "assistant") && content !== undefined) {
|
||||
if (classification === "assistant" && content !== undefined) {
|
||||
if (typeof content === "string") {
|
||||
messages.push({
|
||||
uuid,
|
||||
|
||||
Reference in New Issue
Block a user