From c4e15bf0826474e3bc5e9f02bd9976c25d225fb6 Mon Sep 17 00:00:00 2001 From: teernisse Date: Thu, 29 Jan 2026 22:55:48 -0500 Subject: [PATCH] Add shared type definitions and sensitive content redactor Shared module consumed by both the Express server and the React client: types.ts: - ParsedMessage: the normalized message unit (uuid, category, content, toolName, toolInput, timestamp, rawIndex) that the parser emits and every downstream consumer (viewer, filter, export) operates on - MessageCategory: 9-value union covering user_message, assistant_text, thinking, tool_call, tool_result, system_message, hook_progress, file_snapshot, and summary - SessionEntry / SessionListResponse / SessionDetailResponse / ExportRequest: API contract types for the sessions list, session detail, and HTML export endpoints - ALL_CATEGORIES, CATEGORY_LABELS, DEFAULT_HIDDEN_CATEGORIES: constants for the filter panel UI and presets (thinking + hook_progress hidden by default) sensitive-redactor.ts: - 34 regex patterns derived from gitleaks production config, organized into Tier 1 (known secret formats: AWS, GitHub, GitLab, OpenAI, Anthropic, HuggingFace, Perplexity, Stripe, Slack, SendGrid, Twilio, GCP, Azure AD, Heroku, npm, PyPI, Sentry, JWT, PEM private keys, generic API key assignments) and Tier 2 (PII/system info: home directory paths, connection strings, URLs with credentials, email addresses, IPv4 addresses, Bearer tokens, env var secret assignments) - Keyword pre-filtering: each pattern declares keywords that must appear in the text before the expensive regex is evaluated, following the gitleaks performance optimization approach - False-positive allowlists: example/test email domains, localhost/ documentation IPs (RFC 5737), noreply@anthropic.com - Pure functions: redactSensitiveContent returns {sanitized, count, categories}, redactString returns just the string, redactMessage returns a new ParsedMessage with content and toolInput redacted Co-Authored-By: Claude Opus 4.5 --- src/shared/sensitive-redactor.ts | 437 +++++++++++++++++++++++++++++++ src/shared/types.ts | 77 ++++++ 2 files changed, 514 insertions(+) create mode 100644 src/shared/sensitive-redactor.ts create mode 100644 src/shared/types.ts diff --git a/src/shared/sensitive-redactor.ts b/src/shared/sensitive-redactor.ts new file mode 100644 index 0000000..4db528f --- /dev/null +++ b/src/shared/sensitive-redactor.ts @@ -0,0 +1,437 @@ +import type { ParsedMessage } from "./types.js"; + +/** + * Sensitive information detection and redaction module. + * + * Uses a curated set of ~37 regex patterns derived from gitleaks' production + * config, focused on patterns relevant to Claude Code session logs. + * Employs keyword pre-filtering (gitleaks technique) to skip expensive regex + * evaluation for messages that contain no potential secrets. + * + * Shared between client (display redaction) and server (export redaction). + */ + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface SensitivePattern { + id: string; + label: string; // Placeholder text, e.g. "[AWS_KEY]" + regex: RegExp; + keywords: string[]; // At least one keyword must appear before running regex + falsePositiveCheck?: (match: string) => boolean; +} + +export interface RedactionResult { + sanitized: string; + redactionCount: number; + categories: string[]; +} + +// --------------------------------------------------------------------------- +// False-positive helpers +// --------------------------------------------------------------------------- + +const ALLOWLISTED_EMAILS = [ + "@example.com", + "@example.org", + "@test.com", + "@test.org", + "@localhost", + "noreply@anthropic.com", +]; + +const ALLOWLISTED_IPS = ["127.0.0.1", "0.0.0.0"]; + +const DOCUMENTATION_IP_PREFIXES = ["192.0.2.", "198.51.100.", "203.0.113."]; + +function isAllowlistedEmail(match: string): boolean { + const lower = match.toLowerCase(); + return ALLOWLISTED_EMAILS.some( + (suffix) => lower.endsWith(suffix) || lower === suffix.slice(1) + ); +} + +function isAllowlistedIp(match: string): boolean { + if (ALLOWLISTED_IPS.includes(match)) return true; + return DOCUMENTATION_IP_PREFIXES.some((prefix) => match.startsWith(prefix)); +} + +// --------------------------------------------------------------------------- +// Pattern Definitions +// --------------------------------------------------------------------------- + +export const SENSITIVE_PATTERNS: SensitivePattern[] = [ + // ---- Tier 1: Known Secret Formats ---- + + // #1 AWS Access Key + { + id: "aws_access_key", + label: "[AWS_KEY]", + regex: /\b(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]{16}\b/g, + keywords: ["AKIA", "ASIA", "ABIA", "ACCA", "A3T"], + }, + + // #2 AWS Bedrock + { + id: "aws_bedrock", + label: "[AWS_BEDROCK_KEY]", + regex: /\bABSK[A-Za-z0-9+/]{109,269}={0,2}\b/g, + keywords: ["ABSK"], + }, + + // #3 GitHub PAT + { + id: "github_pat", + label: "[GITHUB_TOKEN]", + regex: /\bghp_[0-9a-zA-Z]{36}\b/g, + keywords: ["ghp_"], + }, + + // #4 GitHub Fine-Grained PAT + { + id: "github_fine_grained_pat", + label: "[GITHUB_TOKEN]", + regex: /\bgithub_pat_\w{82}\b/g, + keywords: ["github_pat_"], + }, + + // #5 GitHub App Token + { + id: "github_app_token", + label: "[GITHUB_TOKEN]", + regex: /\b(?:ghu|ghs)_[0-9a-zA-Z]{36}\b/g, + keywords: ["ghu_", "ghs_"], + }, + + // #6 GitLab PAT + { + id: "gitlab_pat", + label: "[GITLAB_TOKEN]", + regex: /\bglpat-[\w-]{20}\b/g, + keywords: ["glpat-"], + }, + + // #7 GitLab Runner Token + { + id: "gitlab_runner", + label: "[GITLAB_TOKEN]", + regex: /\bglrt-[0-9a-zA-Z_-]{20}\b/g, + keywords: ["glrt-"], + }, + + // #8 OpenAI Key + { + id: "openai_key", + label: "[OPENAI_KEY]", + regex: + /\b(?:sk-(?:proj|svcacct|admin)-[A-Za-z0-9_-]{58,74}T3BlbkFJ[A-Za-z0-9_-]{58,74}|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})\b/g, + keywords: ["sk-proj-", "sk-svcacct-", "sk-admin-", "T3BlbkFJ"], + }, + + // #9 Anthropic Key + { + id: "anthropic_key", + label: "[ANTHROPIC_KEY]", + regex: /\bsk-ant-api03-[a-zA-Z0-9_-]{93}AA\b/g, + keywords: ["sk-ant-api"], + }, + + // #10 Anthropic Admin Key + { + id: "anthropic_admin_key", + label: "[ANTHROPIC_KEY]", + regex: /\bsk-ant-admin01-[a-zA-Z0-9_-]{93}AA\b/g, + keywords: ["sk-ant-admin"], + }, + + // #11 HuggingFace Token + { + id: "huggingface_token", + label: "[HF_TOKEN]", + regex: /\bhf_[a-zA-Z]{34}\b/g, + keywords: ["hf_"], + }, + + // #12 Perplexity Key + { + id: "perplexity_key", + label: "[PERPLEXITY_KEY]", + regex: /\bpplx-[a-zA-Z0-9]{48}\b/g, + keywords: ["pplx-"], + }, + + // #13 Stripe Key + { + id: "stripe_key", + label: "[STRIPE_KEY]", + regex: /\b(?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99}\b/g, + keywords: ["sk_live_", "sk_test_", "sk_prod_", "rk_live_", "rk_test_", "rk_prod_"], + }, + + // #14 Slack Bot Token + { + id: "slack_bot_token", + label: "[SLACK_TOKEN]", + regex: /\bxoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*\b/g, + keywords: ["xoxb-"], + }, + + // #15 Slack User Token + { + id: "slack_user_token", + label: "[SLACK_TOKEN]", + regex: /\bxox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}\b/g, + keywords: ["xoxp-", "xoxe-"], + }, + + // #16 Slack Webhook + { + id: "slack_webhook", + label: "[SLACK_WEBHOOK]", + regex: + /(?:https?:\/\/)?hooks\.slack\.com\/(?:services|workflows|triggers)\/[A-Za-z0-9+/]{43,56}/g, + keywords: ["hooks.slack.com"], + }, + + // #17 SendGrid Token + { + id: "sendgrid_token", + label: "[SENDGRID_TOKEN]", + regex: /\bSG\.[a-z0-9=_\-.]{66}\b/gi, + keywords: ["SG."], + }, + + // #18 Twilio Key + { + id: "twilio_key", + label: "[TWILIO_KEY]", + regex: /\bSK[0-9a-fA-F]{32}\b/g, + keywords: ["SK"], + }, + + // #19 GCP API Key + { + id: "gcp_api_key", + label: "[GCP_KEY]", + regex: /\bAIza[\w-]{35}\b/g, + keywords: ["AIza"], + }, + + // #20 Azure AD Client Secret + { + id: "azure_ad_secret", + label: "[AZURE_SECRET]", + regex: /[a-zA-Z0-9_~.]{3}\dQ~[a-zA-Z0-9_~.-]{31,34}/g, + keywords: ["Q~"], + }, + + // #21 Heroku Key + { + id: "heroku_key", + label: "[HEROKU_KEY]", + regex: /\bHRKU-AA[0-9a-zA-Z_-]{58}\b/g, + keywords: ["HRKU-"], + }, + + // #22 npm Token + { + id: "npm_token", + label: "[NPM_TOKEN]", + regex: /\bnpm_[a-z0-9]{36}\b/gi, + keywords: ["npm_"], + }, + + // #23 PyPI Token + { + id: "pypi_token", + label: "[PYPI_TOKEN]", + regex: /\bpypi-AgEIcHlwaS5vcmc[\w-]{50,1000}\b/g, + keywords: ["pypi-"], + }, + + // #24 Sentry Token + { + id: "sentry_token", + label: "[SENTRY_TOKEN]", + regex: /\b(?:sntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}|sntryu_[a-f0-9]{64})\b/g, + keywords: ["sntrys_", "sntryu_"], + }, + + // #25 JWT + { + id: "jwt", + label: "[JWT]", + regex: + /\bey[a-zA-Z0-9]{17,}\.ey[a-zA-Z0-9/\\_-]{17,}\.(?:[a-zA-Z0-9/\\_-]{10,}={0,2})\b/g, + keywords: ["eyJ"], + }, + + // #26 Private Key (PEM) + { + id: "private_key", + label: "[PRIVATE_KEY]", + regex: + /-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\s+BLOCK)?-----[\s\S]{64,}?-----END[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\s+BLOCK)?-----/g, + keywords: ["PRIVATE KEY"], + }, + + // #27 Generic API Key (contextual: secret-like variable name + value) + { + id: "generic_api_key", + label: "[API_KEY]", + regex: + /(?:access|auth|api|credential|creds|key|passw(?:or)?d|secret|token)(?:[\t \w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=\-/+]{10,150})/gi, + keywords: [ + "secret", + "token", + "password", + "passwd", + "api_key", + "apikey", + "access_key", + "auth", + "credential", + ], + }, + + // ---- Tier 2: PII / System Info ---- + + // #28 Home directory paths + { + id: "home_directory", + label: "[HOME_PATH]", + regex: + /(?:\/home\/[a-zA-Z0-9_.-]+|\/Users\/[a-zA-Z0-9_.-]+|C:\\Users\\[a-zA-Z0-9_.-]+)(?:[/\\][^\s"'`<>)}\]]*)?/g, + keywords: ["/home/", "/Users/", "C:\\Users\\"], + }, + + // #29 Connection strings + { + id: "connection_string", + label: "[CONNECTION_STRING]", + regex: + /\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp|mssql):\/\/[^\s"'`]+/gi, + keywords: ["postgres", "mysql", "mongodb", "redis", "amqp", "mssql"], + }, + + // #30 URLs with credentials + { + id: "url_with_creds", + label: "[URL_WITH_CREDS]", + regex: /https?:\/\/[^\s:@]+:[^\s:@]+@[^\s"'`]+/g, + keywords: ["://"], + }, + + // #31 Email addresses + { + id: "email", + label: "[EMAIL]", + regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g, + keywords: ["@"], + falsePositiveCheck: isAllowlistedEmail, + }, + + // #32 IPv4 addresses + { + id: "ipv4", + label: "[IP_ADDR]", + regex: + /\b(?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\b/g, + keywords: ["."], + falsePositiveCheck: isAllowlistedIp, + }, + + // #33 Bearer tokens + { + id: "bearer_token", + label: "[BEARER_TOKEN]", + regex: /Bearer\s+[A-Za-z0-9\-._~+/]+=*/g, + keywords: ["Bearer"], + }, + + // #34 Env var secret assignments + { + id: "env_var_secret", + label: "[ENV_SECRET]", + regex: + /\b[A-Z_]*(?:SECRET|TOKEN|PASSWORD|PASSWD|PRIVATE)[A-Z_]*\s*[=:]\s*["']?[^\s"']{4,}["']?\b/g, + keywords: ["SECRET", "TOKEN", "PASSWORD", "PASSWD", "PRIVATE"], + }, +]; + +// --------------------------------------------------------------------------- +// Core redaction functions +// --------------------------------------------------------------------------- + +/** + * Check if any keyword from the pattern appears in the content (case-sensitive + * for most patterns, lowered for a cheap pre-check). + */ +function hasKeyword(content: string, keywords: string[]): boolean { + for (const kw of keywords) { + if (content.includes(kw)) return true; + } + return false; +} + +/** + * Replaces sensitive content in the input string with placeholder labels. + * Pure function — no side effects. + */ +export function redactSensitiveContent(input: string): RedactionResult { + if (!input) { + return { sanitized: "", redactionCount: 0, categories: [] }; + } + + let result = input; + let count = 0; + const matchedCategories = new Set(); + + for (const pattern of SENSITIVE_PATTERNS) { + // Keyword pre-filter: skip expensive regex if no keyword found + if (!hasKeyword(result, pattern.keywords)) { + continue; + } + + // Use a fresh regex each time to avoid lastIndex issues with /g flag + const regex = new RegExp(pattern.regex.source, pattern.regex.flags); + + result = result.replace(regex, (match: string) => { + // Check false positive + if (pattern.falsePositiveCheck && pattern.falsePositiveCheck(match)) { + return match; + } + count++; + matchedCategories.add(pattern.id); + return pattern.label; + }); + } + + return { + sanitized: result, + redactionCount: count, + categories: [...matchedCategories], + }; +} + +/** + * Convenience wrapper returning just the sanitized string. + */ +export function redactString(input: string): string { + return redactSensitiveContent(input).sanitized; +} + +/** + * Returns a new ParsedMessage with sensitive content redacted from + * content and toolInput fields. Does NOT mutate the original. + */ +export function redactMessage(msg: ParsedMessage): ParsedMessage { + return { + ...msg, + content: redactString(msg.content), + toolInput: msg.toolInput ? redactString(msg.toolInput) : msg.toolInput, + // toolName is typically safe (e.g. "Bash", "Read") — pass through unchanged + }; +} diff --git a/src/shared/types.ts b/src/shared/types.ts new file mode 100644 index 0000000..f27a92a --- /dev/null +++ b/src/shared/types.ts @@ -0,0 +1,77 @@ +export type MessageCategory = + | "user_message" + | "assistant_text" + | "thinking" + | "tool_call" + | "tool_result" + | "system_message" + | "hook_progress" + | "file_snapshot" + | "summary"; + +export interface ParsedMessage { + uuid: string; + category: MessageCategory; + content: string; + toolName?: string; + toolInput?: string; + timestamp?: string; + rawIndex: number; +} + +export interface SessionEntry { + id: string; + summary: string; + firstPrompt: string; + project: string; + created: string; + modified: string; + messageCount: number; + path: string; +} + +export interface SessionListResponse { + sessions: SessionEntry[]; +} + +export interface SessionDetailResponse { + id: string; + project: string; + messages: ParsedMessage[]; +} + +export interface ExportRequest { + session: SessionDetailResponse; + visibleMessageUuids: string[]; + redactedMessageUuids: string[]; + autoRedactEnabled?: boolean; +} + +export const ALL_CATEGORIES: MessageCategory[] = [ + "user_message", + "assistant_text", + "thinking", + "tool_call", + "tool_result", + "system_message", + "hook_progress", + "file_snapshot", + "summary", +]; + +export const CATEGORY_LABELS: Record = { + user_message: "User Messages", + assistant_text: "Assistant Text", + thinking: "Thinking Blocks", + tool_call: "Tool Calls", + tool_result: "Tool Results", + system_message: "System Messages", + hook_progress: "Hook/Progress", + file_snapshot: "File Snapshots", + summary: "Summaries", +}; + +export const DEFAULT_HIDDEN_CATEGORIES: MessageCategory[] = [ + "thinking", + "hook_progress", +];