Files
session-viewer/tests/unit/session-discovery.test.ts
teernisse 8fddd50193 Implement JSONL-first session discovery with tiered lookup
Rewrite session discovery to be filesystem-first, addressing the widespread
bug where Claude Code's sessions-index.json files are unreliable (87 MB of
unindexed sessions, 17% loss rate across all projects).

Architecture: Three-tier metadata lookup

Tier 1 - Index validation (instant):
  - Parse sessions-index.json into Map<sessionId, IndexEntry>
  - Validate entry.modified against actual file stat.mtimeMs
  - Use 1s tolerance to account for ISO string → filesystem mtime rounding
  - Trust content fields only (messageCount, summary, firstPrompt)
  - Timestamps always come from fs.stat, never from index

Tier 2 - Persistent cache hit (instant):
  - Check MetadataCache by (filePath, mtimeMs, size)
  - If match, use cached metadata
  - Survives server restarts

Tier 3 - Full JSONL parse (~5-50ms/file):
  - Call extractSessionMetadata() with shared parser helpers
  - Cache result for future lookups

Key correctness guarantees:
- All .jsonl files appear regardless of index state
- SessionEntry timestamps always from fs.stat (list ordering never stale)
- Message counts exact (shared helpers ensure parser parity)
- Duration computed from JSONL timestamps, not index

Performance:
- Bounded concurrency: 32 concurrent operations per project
- mapWithLimit() prevents file handle exhaustion
- Warm start <1s (stat all files, in-memory lookups)
- Cold start ~3-5s for 3,103 files (stat + parse phases)

TOCTOU handling:
- Files that disappear between readdir and stat: silently skipped
- Files that disappear between stat and read: silently skipped
- File actively being written: partial parse handled gracefully

Include PRD document that drove this implementation with detailed
requirements, edge cases, and verification plan.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-28 00:53:20 -05:00

459 lines
14 KiB
TypeScript

import { describe, it, expect, beforeEach } from "vitest";
import { discoverSessions, setCache } from "../../src/server/services/session-discovery.js";
import { MetadataCache } from "../../src/server/services/metadata-cache.js";
import path from "path";
import fs from "fs/promises";
import os from "os";
function makeJsonlContent(lines: Record<string, unknown>[]): string {
return lines.map((l) => JSON.stringify(l)).join("\n");
}
function makeIndex(entries: Record<string, unknown>[]): string {
return JSON.stringify({ version: 1, entries });
}
async function makeTmpProject(
suffix: string
): Promise<{ tmpDir: string; projectDir: string; cachePath: string; cleanup: () => Promise<void> }> {
const tmpDir = path.join(os.tmpdir(), `sv-test-${suffix}-${Date.now()}`);
const projectDir = path.join(tmpDir, "test-project");
const cachePath = path.join(tmpDir, ".cache", "metadata.json");
await fs.mkdir(projectDir, { recursive: true });
return {
tmpDir,
projectDir,
cachePath,
cleanup: () => fs.rm(tmpDir, { recursive: true }),
};
}
describe("session-discovery", () => {
beforeEach(() => {
// Reset global cache between tests to prevent cross-contamination
setCache(new MetadataCache(path.join(os.tmpdir(), `sv-cache-${Date.now()}.json`)));
});
it("discovers sessions from .jsonl files without index", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("no-index");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Hello world" },
uuid: "u-1",
timestamp: "2025-10-15T10:00:00Z",
},
{
type: "assistant",
message: {
role: "assistant",
content: [{ type: "text", text: "Hi there" }],
},
uuid: "a-1",
timestamp: "2025-10-15T10:01:00Z",
},
]);
await fs.writeFile(path.join(projectDir, "sess-001.jsonl"), content);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
expect(sessions[0].id).toBe("sess-001");
expect(sessions[0].project).toBe("test-project");
expect(sessions[0].messageCount).toBe(2);
expect(sessions[0].firstPrompt).toBe("Hello world");
expect(sessions[0].path).toBe(path.join(projectDir, "sess-001.jsonl"));
await cleanup();
});
it("timestamps come from stat, not JSONL content", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("stat-times");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Hello" },
uuid: "u-1",
timestamp: "2020-01-01T00:00:00Z",
},
]);
const filePath = path.join(projectDir, "sess-stat.jsonl");
await fs.writeFile(filePath, content);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
// created and modified should be from stat (recent), not from the 2020 timestamp
const createdDate = new Date(sessions[0].created);
const now = new Date();
const diffMs = now.getTime() - createdDate.getTime();
expect(diffMs).toBeLessThan(60_000); // within last minute
await cleanup();
});
it("silently skips files deleted between readdir and stat", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("toctou");
// Write a session, discover will find it
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Survives" },
uuid: "u-1",
},
]);
await fs.writeFile(path.join(projectDir, "survivor.jsonl"), content);
// Write and immediately delete another
await fs.writeFile(path.join(projectDir, "ghost.jsonl"), content);
await fs.unlink(path.join(projectDir, "ghost.jsonl"));
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
expect(sessions[0].id).toBe("survivor");
await cleanup();
});
it("handles missing projects directory gracefully", async () => {
const sessions = await discoverSessions("/nonexistent/path");
expect(sessions).toEqual([]);
});
it("aggregates across multiple project directories", async () => {
const tmpDir = path.join(os.tmpdir(), `sv-test-multi-${Date.now()}`);
const proj1 = path.join(tmpDir, "project-a");
const proj2 = path.join(tmpDir, "project-b");
await fs.mkdir(proj1, { recursive: true });
await fs.mkdir(proj2, { recursive: true });
const contentA = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Project A" },
uuid: "u-a",
timestamp: "2025-01-01T00:00:00Z",
},
]);
const contentB = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Project B" },
uuid: "u-b",
timestamp: "2025-01-02T00:00:00Z",
},
]);
await fs.writeFile(path.join(proj1, "a-001.jsonl"), contentA);
await fs.writeFile(path.join(proj2, "b-001.jsonl"), contentB);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(2);
const ids = sessions.map((s) => s.id);
expect(ids).toContain("a-001");
expect(ids).toContain("b-001");
await fs.rm(tmpDir, { recursive: true });
});
it("ignores non-.jsonl files in project directories", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("filter-ext");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Hello" },
uuid: "u-1",
},
]);
await fs.writeFile(path.join(projectDir, "session.jsonl"), content);
await fs.writeFile(
path.join(projectDir, "sessions-index.json"),
'{"version":1,"entries":[]}'
);
await fs.writeFile(path.join(projectDir, "notes.txt"), "notes");
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
expect(sessions[0].id).toBe("session");
await cleanup();
});
it("duration computed from JSONL timestamps", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("duration");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Start" },
uuid: "u-1",
timestamp: "2025-10-15T10:00:00Z",
},
{
type: "assistant",
message: {
role: "assistant",
content: [{ type: "text", text: "End" }],
},
uuid: "a-1",
timestamp: "2025-10-15T10:30:00Z",
},
]);
await fs.writeFile(path.join(projectDir, "sess-dur.jsonl"), content);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
// 30 minutes = 1800000 ms
expect(sessions[0].duration).toBe(1_800_000);
await cleanup();
});
it("handles empty .jsonl files", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("empty");
await fs.writeFile(path.join(projectDir, "empty.jsonl"), "");
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
expect(sessions[0].id).toBe("empty");
expect(sessions[0].messageCount).toBe(0);
expect(sessions[0].firstPrompt).toBe("");
await cleanup();
});
it("sorts by modified descending", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("sort");
const content1 = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "First" },
uuid: "u-1",
},
]);
const content2 = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Second" },
uuid: "u-2",
},
]);
await fs.writeFile(path.join(projectDir, "older.jsonl"), content1);
// Small delay to ensure different mtime
await new Promise((r) => setTimeout(r, 50));
await fs.writeFile(path.join(projectDir, "newer.jsonl"), content2);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(2);
expect(sessions[0].id).toBe("newer");
expect(sessions[1].id).toBe("older");
await cleanup();
});
describe("Tier 1 index validation", () => {
it("uses index data when modified matches stat mtime within 1s", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("tier1-hit");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Hello" },
uuid: "u-1",
timestamp: "2025-10-15T10:00:00Z",
},
]);
const filePath = path.join(projectDir, "sess-idx.jsonl");
await fs.writeFile(filePath, content);
// Get the actual mtime from the file
const stat = await fs.stat(filePath);
const mtimeIso = new Date(stat.mtimeMs).toISOString();
// Write an index with the matching modified timestamp and different metadata
await fs.writeFile(
path.join(projectDir, "sessions-index.json"),
makeIndex([
{
sessionId: "sess-idx",
summary: "Index summary",
firstPrompt: "Index prompt",
messageCount: 99,
modified: mtimeIso,
created: "2025-10-15T09:00:00Z",
},
])
);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
// Should use index data (Tier 1 hit)
expect(sessions[0].messageCount).toBe(99);
expect(sessions[0].summary).toBe("Index summary");
expect(sessions[0].firstPrompt).toBe("Index prompt");
await cleanup();
});
it("rejects index data when mtime mismatch > 1s", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("tier1-miss");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Real content" },
uuid: "u-1",
timestamp: "2025-10-15T10:00:00Z",
},
]);
await fs.writeFile(path.join(projectDir, "sess-stale.jsonl"), content);
// Write an index with a very old modified timestamp (stale)
await fs.writeFile(
path.join(projectDir, "sessions-index.json"),
makeIndex([
{
sessionId: "sess-stale",
summary: "Stale index summary",
firstPrompt: "Stale prompt",
messageCount: 99,
modified: "2020-01-01T00:00:00Z",
created: "2020-01-01T00:00:00Z",
},
])
);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
// Should NOT use index data (Tier 1 miss) — falls through to Tier 3
expect(sessions[0].messageCount).toBe(1); // Actual parse count
expect(sessions[0].firstPrompt).toBe("Real content");
await cleanup();
});
it("skips Tier 1 when entry has no modified field", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("tier1-no-mod");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Real content" },
uuid: "u-1",
},
]);
await fs.writeFile(path.join(projectDir, "sess-nomod.jsonl"), content);
await fs.writeFile(
path.join(projectDir, "sessions-index.json"),
makeIndex([
{
sessionId: "sess-nomod",
summary: "Index summary",
messageCount: 99,
// No modified field
},
])
);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
// Falls through to Tier 3 parse
expect(sessions[0].messageCount).toBe(1);
await cleanup();
});
it("handles missing sessions-index.json", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("tier1-missing");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "No index" },
uuid: "u-1",
},
]);
await fs.writeFile(path.join(projectDir, "sess-noindex.jsonl"), content);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
expect(sessions[0].firstPrompt).toBe("No index");
await cleanup();
});
it("handles corrupt sessions-index.json", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("tier1-corrupt");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Corrupt index" },
uuid: "u-1",
},
]);
await fs.writeFile(path.join(projectDir, "sess-corrupt.jsonl"), content);
await fs.writeFile(
path.join(projectDir, "sessions-index.json"),
"not valid json {"
);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
expect(sessions[0].firstPrompt).toBe("Corrupt index");
await cleanup();
});
it("timestamps always from stat even on Tier 1 hit", async () => {
const { tmpDir, projectDir, cleanup } = await makeTmpProject("tier1-stat-ts");
const content = makeJsonlContent([
{
type: "user",
message: { role: "user", content: "Hello" },
uuid: "u-1",
},
]);
const filePath = path.join(projectDir, "sess-ts.jsonl");
await fs.writeFile(filePath, content);
const stat = await fs.stat(filePath);
const mtimeIso = new Date(stat.mtimeMs).toISOString();
await fs.writeFile(
path.join(projectDir, "sessions-index.json"),
makeIndex([
{
sessionId: "sess-ts",
messageCount: 1,
modified: mtimeIso,
created: "1990-01-01T00:00:00Z",
},
])
);
const sessions = await discoverSessions(tmpDir);
expect(sessions).toHaveLength(1);
// created/modified should be from stat (recent), not from index's 1990 date
const createdDate = new Date(sessions[0].created);
const now = new Date();
expect(now.getTime() - createdDate.getTime()).toBeLessThan(60_000);
await cleanup();
});
});
});