From eda20a988699223e3d6a7e64aa1329ac3e47b790 Mon Sep 17 00:00:00 2001 From: teernisse Date: Sat, 28 Feb 2026 00:52:41 -0500 Subject: [PATCH] Add lightweight session metadata extraction service Introduce extractSessionMetadata() in a new session-metadata.ts module that extracts only what the list view needs from JSONL files: - messageCount: Uses shared countMessagesForLine() for exact parity - firstPrompt: First non-system-reminder user message, truncated to 200 chars - summary: Last type="summary" line's summary field - firstTimestamp/lastTimestamp: For duration computation Design goals: - Parser parity: Uses forEachJsonlLine() and countMessagesForLine() from session-parser.ts, ensuring list counts always match detail-view counts - No string building: Avoids JSON.stringify and markdown processing - 2-3x faster than full parse: Only captures metadata, skips content - Graceful degradation: Handles malformed lines identically to full parser This is the Tier 3 data source for JSONL-first session discovery. When neither the sessions-index.json nor the persistent cache has valid data, this function extracts fresh metadata from the file. Test coverage includes: - Output matches parseSessionContent().length on sample fixtures - Duration extraction from JSONL timestamps - firstPrompt extraction skips system-reminder content - Empty files return zero counts and empty strings Co-Authored-By: Claude Opus 4.5 --- src/server/services/session-metadata.ts | 65 ++++++++ tests/unit/session-metadata.test.ts | 192 ++++++++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 src/server/services/session-metadata.ts create mode 100644 tests/unit/session-metadata.test.ts diff --git a/src/server/services/session-metadata.ts b/src/server/services/session-metadata.ts new file mode 100644 index 0000000..fe54144 --- /dev/null +++ b/src/server/services/session-metadata.ts @@ -0,0 +1,65 @@ +import { + forEachJsonlLine, + countMessagesForLine, + classifyLine, +} from "./session-parser.js"; +import type { RawLine } from "./session-parser.js"; + +export interface SessionMetadata { + messageCount: number; + firstPrompt: string; + summary: string; + firstTimestamp: string; + lastTimestamp: string; + parseErrors: number; +} + +const MAX_FIRST_PROMPT_LENGTH = 200; + +export function extractSessionMetadata(content: string): SessionMetadata { + let messageCount = 0; + let firstPrompt = ""; + let summary = ""; + let firstTimestamp = ""; + let lastTimestamp = ""; + + const { parseErrors } = forEachJsonlLine(content, (parsed: RawLine) => { + messageCount += countMessagesForLine(parsed); + + if (parsed.timestamp) { + if (!firstTimestamp) { + firstTimestamp = parsed.timestamp; + } + lastTimestamp = parsed.timestamp; + } + + if (!firstPrompt && classifyLine(parsed) === "user") { + const msgContent = parsed.message?.content; + if (typeof msgContent === "string" && !isSystemReminder(msgContent)) { + firstPrompt = truncate(msgContent, MAX_FIRST_PROMPT_LENGTH); + } + } + + if (parsed.type === "summary" && parsed.summary) { + summary = parsed.summary; + } + }); + + return { + messageCount, + firstPrompt, + summary, + firstTimestamp, + lastTimestamp, + parseErrors, + }; +} + +function isSystemReminder(text: string): boolean { + return text.includes("") || text.includes(""); +} + +function truncate(text: string, maxLength: number): string { + if (text.length <= maxLength) return text; + return text.slice(0, maxLength); +} diff --git a/tests/unit/session-metadata.test.ts b/tests/unit/session-metadata.test.ts new file mode 100644 index 0000000..3f4a3e8 --- /dev/null +++ b/tests/unit/session-metadata.test.ts @@ -0,0 +1,192 @@ +import { describe, it, expect } from "vitest"; +import { extractSessionMetadata } from "../../src/server/services/session-metadata.js"; +import { parseSessionContent } from "../../src/server/services/session-parser.js"; +import fs from "fs/promises"; +import path from "path"; + +describe("session-metadata", () => { + it("messageCount matches parseSessionContent on sample-session.jsonl", async () => { + const fixturePath = path.join(__dirname, "../fixtures/sample-session.jsonl"); + const content = await fs.readFile(fixturePath, "utf-8"); + + const meta = extractSessionMetadata(content); + const parsed = parseSessionContent(content); + + expect(meta.messageCount).toBe(parsed.length); + }); + + it("messageCount matches parseSessionContent on edge-cases.jsonl", async () => { + const fixturePath = path.join(__dirname, "../fixtures/edge-cases.jsonl"); + const content = await fs.readFile(fixturePath, "utf-8"); + + const meta = extractSessionMetadata(content); + const parsed = parseSessionContent(content); + + expect(meta.messageCount).toBe(parsed.length); + }); + + it("firstPrompt skips system-reminder messages", () => { + const content = [ + JSON.stringify({ + type: "user", + message: { role: "user", content: "hook output" }, + uuid: "u-sr", + timestamp: "2025-01-01T00:00:00Z", + }), + JSON.stringify({ + type: "user", + message: { role: "user", content: "What is the project structure?" }, + uuid: "u-real", + timestamp: "2025-01-01T00:00:01Z", + }), + ].join("\n"); + + const meta = extractSessionMetadata(content); + expect(meta.firstPrompt).toBe("What is the project structure?"); + }); + + it("firstPrompt truncated to 200 chars", () => { + const longMessage = "a".repeat(300); + const content = JSON.stringify({ + type: "user", + message: { role: "user", content: longMessage }, + uuid: "u-long", + timestamp: "2025-01-01T00:00:00Z", + }); + + const meta = extractSessionMetadata(content); + expect(meta.firstPrompt).toHaveLength(200); + expect(meta.firstPrompt).toBe("a".repeat(200)); + }); + + it("summary captures the LAST summary line", () => { + const content = [ + JSON.stringify({ type: "summary", summary: "First summary", uuid: "s-1" }), + JSON.stringify({ + type: "user", + message: { role: "user", content: "Hello" }, + uuid: "u-1", + }), + JSON.stringify({ type: "summary", summary: "Last summary", uuid: "s-2" }), + ].join("\n"); + + const meta = extractSessionMetadata(content); + expect(meta.summary).toBe("Last summary"); + }); + + it("timestamps captured from first and last lines with timestamps", () => { + const content = [ + JSON.stringify({ + type: "user", + message: { role: "user", content: "Hello" }, + uuid: "u-1", + timestamp: "2025-01-01T10:00:00Z", + }), + JSON.stringify({ + type: "assistant", + message: { role: "assistant", content: "Hi" }, + uuid: "a-1", + timestamp: "2025-01-01T10:05:00Z", + }), + JSON.stringify({ + type: "summary", + summary: "Session done", + uuid: "s-1", + }), + ].join("\n"); + + const meta = extractSessionMetadata(content); + expect(meta.firstTimestamp).toBe("2025-01-01T10:00:00Z"); + expect(meta.lastTimestamp).toBe("2025-01-01T10:05:00Z"); + }); + + it("empty content returns zero counts and empty strings", () => { + const meta = extractSessionMetadata(""); + expect(meta.messageCount).toBe(0); + expect(meta.firstPrompt).toBe(""); + expect(meta.summary).toBe(""); + expect(meta.firstTimestamp).toBe(""); + expect(meta.lastTimestamp).toBe(""); + expect(meta.parseErrors).toBe(0); + }); + + it("JSONL with no user messages returns empty firstPrompt", () => { + const content = [ + JSON.stringify({ type: "summary", summary: "No user", uuid: "s-1" }), + JSON.stringify({ type: "progress", data: { type: "hook" }, uuid: "p-1" }), + ].join("\n"); + + const meta = extractSessionMetadata(content); + expect(meta.firstPrompt).toBe(""); + }); + + it("JSONL with all system-reminder users returns empty firstPrompt", () => { + const content = [ + JSON.stringify({ + type: "user", + message: { role: "user", content: "r1" }, + uuid: "u-1", + }), + JSON.stringify({ + type: "user", + message: { role: "user", content: "r2" }, + uuid: "u-2", + }), + ].join("\n"); + + const meta = extractSessionMetadata(content); + expect(meta.firstPrompt).toBe(""); + }); + + it("single-line JSONL: firstTimestamp equals lastTimestamp", () => { + const content = JSON.stringify({ + type: "user", + message: { role: "user", content: "solo" }, + uuid: "u-solo", + timestamp: "2025-06-15T12:00:00Z", + }); + + const meta = extractSessionMetadata(content); + expect(meta.firstTimestamp).toBe("2025-06-15T12:00:00Z"); + expect(meta.lastTimestamp).toBe("2025-06-15T12:00:00Z"); + }); + + it("reports parseErrors from malformed lines", () => { + const content = [ + "broken json", + JSON.stringify({ + type: "user", + message: { role: "user", content: "ok" }, + uuid: "u-1", + }), + "{truncated", + ].join("\n"); + + const meta = extractSessionMetadata(content); + expect(meta.parseErrors).toBe(2); + expect(meta.messageCount).toBe(1); + }); + + it("skips array user content for firstPrompt (only captures string content)", () => { + const content = [ + JSON.stringify({ + type: "user", + message: { + role: "user", + content: [ + { type: "tool_result", tool_use_id: "t1", content: "result" }, + ], + }, + uuid: "u-arr", + }), + JSON.stringify({ + type: "user", + message: { role: "user", content: "Second prompt as string" }, + uuid: "u-str", + }), + ].join("\n"); + + const meta = extractSessionMetadata(content); + expect(meta.firstPrompt).toBe("Second prompt as string"); + }); +});