Add shared type definitions and sensitive content redactor

Shared module consumed by both the Express server and the React client:

types.ts:
- ParsedMessage: the normalized message unit (uuid, category, content,
  toolName, toolInput, timestamp, rawIndex) that the parser emits and
  every downstream consumer (viewer, filter, export) operates on
- MessageCategory: 9-value union covering user_message, assistant_text,
  thinking, tool_call, tool_result, system_message, hook_progress,
  file_snapshot, and summary
- SessionEntry / SessionListResponse / SessionDetailResponse / ExportRequest:
  API contract types for the sessions list, session detail, and HTML
  export endpoints
- ALL_CATEGORIES, CATEGORY_LABELS, DEFAULT_HIDDEN_CATEGORIES: constants
  for the filter panel UI and presets (thinking + hook_progress hidden
  by default)

sensitive-redactor.ts:
- 34 regex patterns derived from gitleaks production config, organized
  into Tier 1 (known secret formats: AWS, GitHub, GitLab, OpenAI,
  Anthropic, HuggingFace, Perplexity, Stripe, Slack, SendGrid, Twilio,
  GCP, Azure AD, Heroku, npm, PyPI, Sentry, JWT, PEM private keys,
  generic API key assignments) and Tier 2 (PII/system info: home
  directory paths, connection strings, URLs with credentials, email
  addresses, IPv4 addresses, Bearer tokens, env var secret assignments)
- Keyword pre-filtering: each pattern declares keywords that must appear
  in the text before the expensive regex is evaluated, following the
  gitleaks performance optimization approach
- False-positive allowlists: example/test email domains, localhost/
  documentation IPs (RFC 5737), noreply@anthropic.com
- Pure functions: redactSensitiveContent returns {sanitized, count,
  categories}, redactString returns just the string, redactMessage
  returns a new ParsedMessage with content and toolInput redacted

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-29 22:55:48 -05:00
parent 7e15c36e2f
commit c4e15bf082
2 changed files with 514 additions and 0 deletions

View File

@@ -0,0 +1,437 @@
import type { ParsedMessage } from "./types.js";
/**
* Sensitive information detection and redaction module.
*
* Uses a curated set of ~37 regex patterns derived from gitleaks' production
* config, focused on patterns relevant to Claude Code session logs.
* Employs keyword pre-filtering (gitleaks technique) to skip expensive regex
* evaluation for messages that contain no potential secrets.
*
* Shared between client (display redaction) and server (export redaction).
*/
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface SensitivePattern {
id: string;
label: string; // Placeholder text, e.g. "[AWS_KEY]"
regex: RegExp;
keywords: string[]; // At least one keyword must appear before running regex
falsePositiveCheck?: (match: string) => boolean;
}
export interface RedactionResult {
sanitized: string;
redactionCount: number;
categories: string[];
}
// ---------------------------------------------------------------------------
// False-positive helpers
// ---------------------------------------------------------------------------
const ALLOWLISTED_EMAILS = [
"@example.com",
"@example.org",
"@test.com",
"@test.org",
"@localhost",
"noreply@anthropic.com",
];
const ALLOWLISTED_IPS = ["127.0.0.1", "0.0.0.0"];
const DOCUMENTATION_IP_PREFIXES = ["192.0.2.", "198.51.100.", "203.0.113."];
function isAllowlistedEmail(match: string): boolean {
const lower = match.toLowerCase();
return ALLOWLISTED_EMAILS.some(
(suffix) => lower.endsWith(suffix) || lower === suffix.slice(1)
);
}
function isAllowlistedIp(match: string): boolean {
if (ALLOWLISTED_IPS.includes(match)) return true;
return DOCUMENTATION_IP_PREFIXES.some((prefix) => match.startsWith(prefix));
}
// ---------------------------------------------------------------------------
// Pattern Definitions
// ---------------------------------------------------------------------------
export const SENSITIVE_PATTERNS: SensitivePattern[] = [
// ---- Tier 1: Known Secret Formats ----
// #1 AWS Access Key
{
id: "aws_access_key",
label: "[AWS_KEY]",
regex: /\b(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]{16}\b/g,
keywords: ["AKIA", "ASIA", "ABIA", "ACCA", "A3T"],
},
// #2 AWS Bedrock
{
id: "aws_bedrock",
label: "[AWS_BEDROCK_KEY]",
regex: /\bABSK[A-Za-z0-9+/]{109,269}={0,2}\b/g,
keywords: ["ABSK"],
},
// #3 GitHub PAT
{
id: "github_pat",
label: "[GITHUB_TOKEN]",
regex: /\bghp_[0-9a-zA-Z]{36}\b/g,
keywords: ["ghp_"],
},
// #4 GitHub Fine-Grained PAT
{
id: "github_fine_grained_pat",
label: "[GITHUB_TOKEN]",
regex: /\bgithub_pat_\w{82}\b/g,
keywords: ["github_pat_"],
},
// #5 GitHub App Token
{
id: "github_app_token",
label: "[GITHUB_TOKEN]",
regex: /\b(?:ghu|ghs)_[0-9a-zA-Z]{36}\b/g,
keywords: ["ghu_", "ghs_"],
},
// #6 GitLab PAT
{
id: "gitlab_pat",
label: "[GITLAB_TOKEN]",
regex: /\bglpat-[\w-]{20}\b/g,
keywords: ["glpat-"],
},
// #7 GitLab Runner Token
{
id: "gitlab_runner",
label: "[GITLAB_TOKEN]",
regex: /\bglrt-[0-9a-zA-Z_-]{20}\b/g,
keywords: ["glrt-"],
},
// #8 OpenAI Key
{
id: "openai_key",
label: "[OPENAI_KEY]",
regex:
/\b(?:sk-(?:proj|svcacct|admin)-[A-Za-z0-9_-]{58,74}T3BlbkFJ[A-Za-z0-9_-]{58,74}|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})\b/g,
keywords: ["sk-proj-", "sk-svcacct-", "sk-admin-", "T3BlbkFJ"],
},
// #9 Anthropic Key
{
id: "anthropic_key",
label: "[ANTHROPIC_KEY]",
regex: /\bsk-ant-api03-[a-zA-Z0-9_-]{93}AA\b/g,
keywords: ["sk-ant-api"],
},
// #10 Anthropic Admin Key
{
id: "anthropic_admin_key",
label: "[ANTHROPIC_KEY]",
regex: /\bsk-ant-admin01-[a-zA-Z0-9_-]{93}AA\b/g,
keywords: ["sk-ant-admin"],
},
// #11 HuggingFace Token
{
id: "huggingface_token",
label: "[HF_TOKEN]",
regex: /\bhf_[a-zA-Z]{34}\b/g,
keywords: ["hf_"],
},
// #12 Perplexity Key
{
id: "perplexity_key",
label: "[PERPLEXITY_KEY]",
regex: /\bpplx-[a-zA-Z0-9]{48}\b/g,
keywords: ["pplx-"],
},
// #13 Stripe Key
{
id: "stripe_key",
label: "[STRIPE_KEY]",
regex: /\b(?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99}\b/g,
keywords: ["sk_live_", "sk_test_", "sk_prod_", "rk_live_", "rk_test_", "rk_prod_"],
},
// #14 Slack Bot Token
{
id: "slack_bot_token",
label: "[SLACK_TOKEN]",
regex: /\bxoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*\b/g,
keywords: ["xoxb-"],
},
// #15 Slack User Token
{
id: "slack_user_token",
label: "[SLACK_TOKEN]",
regex: /\bxox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}\b/g,
keywords: ["xoxp-", "xoxe-"],
},
// #16 Slack Webhook
{
id: "slack_webhook",
label: "[SLACK_WEBHOOK]",
regex:
/(?:https?:\/\/)?hooks\.slack\.com\/(?:services|workflows|triggers)\/[A-Za-z0-9+/]{43,56}/g,
keywords: ["hooks.slack.com"],
},
// #17 SendGrid Token
{
id: "sendgrid_token",
label: "[SENDGRID_TOKEN]",
regex: /\bSG\.[a-z0-9=_\-.]{66}\b/gi,
keywords: ["SG."],
},
// #18 Twilio Key
{
id: "twilio_key",
label: "[TWILIO_KEY]",
regex: /\bSK[0-9a-fA-F]{32}\b/g,
keywords: ["SK"],
},
// #19 GCP API Key
{
id: "gcp_api_key",
label: "[GCP_KEY]",
regex: /\bAIza[\w-]{35}\b/g,
keywords: ["AIza"],
},
// #20 Azure AD Client Secret
{
id: "azure_ad_secret",
label: "[AZURE_SECRET]",
regex: /[a-zA-Z0-9_~.]{3}\dQ~[a-zA-Z0-9_~.-]{31,34}/g,
keywords: ["Q~"],
},
// #21 Heroku Key
{
id: "heroku_key",
label: "[HEROKU_KEY]",
regex: /\bHRKU-AA[0-9a-zA-Z_-]{58}\b/g,
keywords: ["HRKU-"],
},
// #22 npm Token
{
id: "npm_token",
label: "[NPM_TOKEN]",
regex: /\bnpm_[a-z0-9]{36}\b/gi,
keywords: ["npm_"],
},
// #23 PyPI Token
{
id: "pypi_token",
label: "[PYPI_TOKEN]",
regex: /\bpypi-AgEIcHlwaS5vcmc[\w-]{50,1000}\b/g,
keywords: ["pypi-"],
},
// #24 Sentry Token
{
id: "sentry_token",
label: "[SENTRY_TOKEN]",
regex: /\b(?:sntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}|sntryu_[a-f0-9]{64})\b/g,
keywords: ["sntrys_", "sntryu_"],
},
// #25 JWT
{
id: "jwt",
label: "[JWT]",
regex:
/\bey[a-zA-Z0-9]{17,}\.ey[a-zA-Z0-9/\\_-]{17,}\.(?:[a-zA-Z0-9/\\_-]{10,}={0,2})\b/g,
keywords: ["eyJ"],
},
// #26 Private Key (PEM)
{
id: "private_key",
label: "[PRIVATE_KEY]",
regex:
/-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\s+BLOCK)?-----[\s\S]{64,}?-----END[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\s+BLOCK)?-----/g,
keywords: ["PRIVATE KEY"],
},
// #27 Generic API Key (contextual: secret-like variable name + value)
{
id: "generic_api_key",
label: "[API_KEY]",
regex:
/(?:access|auth|api|credential|creds|key|passw(?:or)?d|secret|token)(?:[\t \w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=\-/+]{10,150})/gi,
keywords: [
"secret",
"token",
"password",
"passwd",
"api_key",
"apikey",
"access_key",
"auth",
"credential",
],
},
// ---- Tier 2: PII / System Info ----
// #28 Home directory paths
{
id: "home_directory",
label: "[HOME_PATH]",
regex:
/(?:\/home\/[a-zA-Z0-9_.-]+|\/Users\/[a-zA-Z0-9_.-]+|C:\\Users\\[a-zA-Z0-9_.-]+)(?:[/\\][^\s"'`<>)}\]]*)?/g,
keywords: ["/home/", "/Users/", "C:\\Users\\"],
},
// #29 Connection strings
{
id: "connection_string",
label: "[CONNECTION_STRING]",
regex:
/\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp|mssql):\/\/[^\s"'`]+/gi,
keywords: ["postgres", "mysql", "mongodb", "redis", "amqp", "mssql"],
},
// #30 URLs with credentials
{
id: "url_with_creds",
label: "[URL_WITH_CREDS]",
regex: /https?:\/\/[^\s:@]+:[^\s:@]+@[^\s"'`]+/g,
keywords: ["://"],
},
// #31 Email addresses
{
id: "email",
label: "[EMAIL]",
regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
keywords: ["@"],
falsePositiveCheck: isAllowlistedEmail,
},
// #32 IPv4 addresses
{
id: "ipv4",
label: "[IP_ADDR]",
regex:
/\b(?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\b/g,
keywords: ["."],
falsePositiveCheck: isAllowlistedIp,
},
// #33 Bearer tokens
{
id: "bearer_token",
label: "[BEARER_TOKEN]",
regex: /Bearer\s+[A-Za-z0-9\-._~+/]+=*/g,
keywords: ["Bearer"],
},
// #34 Env var secret assignments
{
id: "env_var_secret",
label: "[ENV_SECRET]",
regex:
/\b[A-Z_]*(?:SECRET|TOKEN|PASSWORD|PASSWD|PRIVATE)[A-Z_]*\s*[=:]\s*["']?[^\s"']{4,}["']?\b/g,
keywords: ["SECRET", "TOKEN", "PASSWORD", "PASSWD", "PRIVATE"],
},
];
// ---------------------------------------------------------------------------
// Core redaction functions
// ---------------------------------------------------------------------------
/**
* Check if any keyword from the pattern appears in the content (case-sensitive
* for most patterns, lowered for a cheap pre-check).
*/
function hasKeyword(content: string, keywords: string[]): boolean {
for (const kw of keywords) {
if (content.includes(kw)) return true;
}
return false;
}
/**
* Replaces sensitive content in the input string with placeholder labels.
* Pure function — no side effects.
*/
export function redactSensitiveContent(input: string): RedactionResult {
if (!input) {
return { sanitized: "", redactionCount: 0, categories: [] };
}
let result = input;
let count = 0;
const matchedCategories = new Set<string>();
for (const pattern of SENSITIVE_PATTERNS) {
// Keyword pre-filter: skip expensive regex if no keyword found
if (!hasKeyword(result, pattern.keywords)) {
continue;
}
// Use a fresh regex each time to avoid lastIndex issues with /g flag
const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
result = result.replace(regex, (match: string) => {
// Check false positive
if (pattern.falsePositiveCheck && pattern.falsePositiveCheck(match)) {
return match;
}
count++;
matchedCategories.add(pattern.id);
return pattern.label;
});
}
return {
sanitized: result,
redactionCount: count,
categories: [...matchedCategories],
};
}
/**
* Convenience wrapper returning just the sanitized string.
*/
export function redactString(input: string): string {
return redactSensitiveContent(input).sanitized;
}
/**
* Returns a new ParsedMessage with sensitive content redacted from
* content and toolInput fields. Does NOT mutate the original.
*/
export function redactMessage(msg: ParsedMessage): ParsedMessage {
return {
...msg,
content: redactString(msg.content),
toolInput: msg.toolInput ? redactString(msg.toolInput) : msg.toolInput,
// toolName is typically safe (e.g. "Bash", "Read") — pass through unchanged
};
}

77
src/shared/types.ts Normal file
View File

@@ -0,0 +1,77 @@
export type MessageCategory =
| "user_message"
| "assistant_text"
| "thinking"
| "tool_call"
| "tool_result"
| "system_message"
| "hook_progress"
| "file_snapshot"
| "summary";
export interface ParsedMessage {
uuid: string;
category: MessageCategory;
content: string;
toolName?: string;
toolInput?: string;
timestamp?: string;
rawIndex: number;
}
export interface SessionEntry {
id: string;
summary: string;
firstPrompt: string;
project: string;
created: string;
modified: string;
messageCount: number;
path: string;
}
export interface SessionListResponse {
sessions: SessionEntry[];
}
export interface SessionDetailResponse {
id: string;
project: string;
messages: ParsedMessage[];
}
export interface ExportRequest {
session: SessionDetailResponse;
visibleMessageUuids: string[];
redactedMessageUuids: string[];
autoRedactEnabled?: boolean;
}
export const ALL_CATEGORIES: MessageCategory[] = [
"user_message",
"assistant_text",
"thinking",
"tool_call",
"tool_result",
"system_message",
"hook_progress",
"file_snapshot",
"summary",
];
export const CATEGORY_LABELS: Record<MessageCategory, string> = {
user_message: "User Messages",
assistant_text: "Assistant Text",
thinking: "Thinking Blocks",
tool_call: "Tool Calls",
tool_result: "Tool Results",
system_message: "System Messages",
hook_progress: "Hook/Progress",
file_snapshot: "File Snapshots",
summary: "Summaries",
};
export const DEFAULT_HIDDEN_CATEGORIES: MessageCategory[] = [
"thinking",
"hook_progress",
];