import type { ParsedMessage } from "./types.js"; /** * Sensitive information detection and redaction module. * * Uses a curated set of ~37 regex patterns derived from gitleaks' production * config, focused on patterns relevant to Claude Code session logs. * Employs keyword pre-filtering (gitleaks technique) to skip expensive regex * evaluation for messages that contain no potential secrets. * * Shared between client (display redaction) and server (export redaction). */ // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- export interface SensitivePattern { id: string; label: string; // Placeholder text, e.g. "[AWS_KEY]" regex: RegExp; keywords: string[]; // At least one keyword must appear before running regex falsePositiveCheck?: (match: string) => boolean; } export interface RedactionResult { sanitized: string; redactionCount: number; categories: string[]; } // --------------------------------------------------------------------------- // False-positive helpers // --------------------------------------------------------------------------- const ALLOWLISTED_EMAILS = [ "@example.com", "@example.org", "@test.com", "@test.org", "@localhost", "noreply@anthropic.com", ]; const ALLOWLISTED_IPS = ["127.0.0.1", "0.0.0.0"]; const DOCUMENTATION_IP_PREFIXES = ["192.0.2.", "198.51.100.", "203.0.113."]; function isAllowlistedEmail(match: string): boolean { const lower = match.toLowerCase(); return ALLOWLISTED_EMAILS.some( (suffix) => lower.endsWith(suffix) || lower === suffix.slice(1) ); } function isAllowlistedIp(match: string): boolean { if (ALLOWLISTED_IPS.includes(match)) return true; return DOCUMENTATION_IP_PREFIXES.some((prefix) => match.startsWith(prefix)); } // --------------------------------------------------------------------------- // Pattern Definitions // --------------------------------------------------------------------------- export const SENSITIVE_PATTERNS: SensitivePattern[] = [ // ---- Tier 1: Known Secret Formats ---- // #1 AWS Access Key { id: "aws_access_key", label: "[AWS_KEY]", regex: /\b(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]{16}\b/g, keywords: ["AKIA", "ASIA", "ABIA", "ACCA", "A3T"], }, // #2 AWS Bedrock { id: "aws_bedrock", label: "[AWS_BEDROCK_KEY]", regex: /\bABSK[A-Za-z0-9+/]{109,269}={0,2}\b/g, keywords: ["ABSK"], }, // #3 GitHub PAT { id: "github_pat", label: "[GITHUB_TOKEN]", regex: /\bghp_[0-9a-zA-Z]{36}\b/g, keywords: ["ghp_"], }, // #4 GitHub Fine-Grained PAT { id: "github_fine_grained_pat", label: "[GITHUB_TOKEN]", regex: /\bgithub_pat_\w{82}\b/g, keywords: ["github_pat_"], }, // #5 GitHub App Token { id: "github_app_token", label: "[GITHUB_TOKEN]", regex: /\b(?:ghu|ghs)_[0-9a-zA-Z]{36}\b/g, keywords: ["ghu_", "ghs_"], }, // #6 GitLab PAT { id: "gitlab_pat", label: "[GITLAB_TOKEN]", regex: /\bglpat-[\w-]{20}\b/g, keywords: ["glpat-"], }, // #7 GitLab Runner Token { id: "gitlab_runner", label: "[GITLAB_TOKEN]", regex: /\bglrt-[0-9a-zA-Z_-]{20}\b/g, keywords: ["glrt-"], }, // #8 OpenAI Key { id: "openai_key", label: "[OPENAI_KEY]", regex: /\b(?:sk-(?:proj|svcacct|admin)-[A-Za-z0-9_-]{58,74}T3BlbkFJ[A-Za-z0-9_-]{58,74}|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})\b/g, keywords: ["sk-proj-", "sk-svcacct-", "sk-admin-", "T3BlbkFJ"], }, // #9 Anthropic Key { id: "anthropic_key", label: "[ANTHROPIC_KEY]", regex: /\bsk-ant-api03-[a-zA-Z0-9_-]{93}AA\b/g, keywords: ["sk-ant-api"], }, // #10 Anthropic Admin Key { id: "anthropic_admin_key", label: "[ANTHROPIC_KEY]", regex: /\bsk-ant-admin01-[a-zA-Z0-9_-]{93}AA\b/g, keywords: ["sk-ant-admin"], }, // #11 HuggingFace Token { id: "huggingface_token", label: "[HF_TOKEN]", regex: /\bhf_[a-zA-Z]{34}\b/g, keywords: ["hf_"], }, // #12 Perplexity Key { id: "perplexity_key", label: "[PERPLEXITY_KEY]", regex: /\bpplx-[a-zA-Z0-9]{48}\b/g, keywords: ["pplx-"], }, // #13 Stripe Key { id: "stripe_key", label: "[STRIPE_KEY]", regex: /\b(?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99}\b/g, keywords: ["sk_live_", "sk_test_", "sk_prod_", "rk_live_", "rk_test_", "rk_prod_"], }, // #14 Slack Bot Token { id: "slack_bot_token", label: "[SLACK_TOKEN]", regex: /\bxoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*\b/g, keywords: ["xoxb-"], }, // #15 Slack User Token { id: "slack_user_token", label: "[SLACK_TOKEN]", regex: /\bxox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}\b/g, keywords: ["xoxp-", "xoxe-"], }, // #16 Slack Webhook { id: "slack_webhook", label: "[SLACK_WEBHOOK]", regex: /(?:https?:\/\/)?hooks\.slack\.com\/(?:services|workflows|triggers)\/[A-Za-z0-9+/]{43,56}/g, keywords: ["hooks.slack.com"], }, // #17 SendGrid Token { id: "sendgrid_token", label: "[SENDGRID_TOKEN]", regex: /\bSG\.[a-z0-9=_\-.]{66}\b/gi, keywords: ["SG."], }, // #18 Twilio Key { id: "twilio_key", label: "[TWILIO_KEY]", regex: /\bSK[0-9a-fA-F]{32}\b/g, keywords: ["SK"], }, // #19 GCP API Key { id: "gcp_api_key", label: "[GCP_KEY]", regex: /\bAIza[\w-]{35}\b/g, keywords: ["AIza"], }, // #20 Azure AD Client Secret { id: "azure_ad_secret", label: "[AZURE_SECRET]", regex: /[a-zA-Z0-9_~.]{3}\dQ~[a-zA-Z0-9_~.-]{31,34}/g, keywords: ["Q~"], }, // #21 Heroku Key { id: "heroku_key", label: "[HEROKU_KEY]", regex: /\bHRKU-AA[0-9a-zA-Z_-]{58}\b/g, keywords: ["HRKU-"], }, // #22 npm Token { id: "npm_token", label: "[NPM_TOKEN]", regex: /\bnpm_[a-z0-9]{36}\b/gi, keywords: ["npm_"], }, // #23 PyPI Token { id: "pypi_token", label: "[PYPI_TOKEN]", regex: /\bpypi-AgEIcHlwaS5vcmc[\w-]{50,1000}\b/g, keywords: ["pypi-"], }, // #24 Sentry Token { id: "sentry_token", label: "[SENTRY_TOKEN]", regex: /\b(?:sntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}|sntryu_[a-f0-9]{64})\b/g, keywords: ["sntrys_", "sntryu_"], }, // #25 JWT { id: "jwt", label: "[JWT]", regex: /\bey[a-zA-Z0-9]{17,}\.ey[a-zA-Z0-9/\\_-]{17,}\.(?:[a-zA-Z0-9/\\_-]{10,}={0,2})\b/g, keywords: ["eyJ"], }, // #26 Private Key (PEM) { id: "private_key", label: "[PRIVATE_KEY]", regex: /-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\s+BLOCK)?-----[\s\S]{64,}?-----END[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\s+BLOCK)?-----/g, keywords: ["PRIVATE KEY"], }, // #27 Generic API Key (contextual: secret-like variable name + value) { id: "generic_api_key", label: "[API_KEY]", regex: /(?:access|auth|api|credential|creds|key|passw(?:or)?d|secret|token)(?:[\t \w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=\-/+]{10,150})/gi, keywords: [ "secret", "token", "password", "passwd", "api_key", "apikey", "access_key", "auth", "credential", ], }, // ---- Tier 2: PII / System Info ---- // #28 Home directory paths { id: "home_directory", label: "[HOME_PATH]", regex: /(?:\/home\/[a-zA-Z0-9_.-]+|\/Users\/[a-zA-Z0-9_.-]+|C:\\Users\\[a-zA-Z0-9_.-]+)(?:[/\\][^\s"'`<>)}\]]*)?/g, keywords: ["/home/", "/Users/", "C:\\Users\\"], }, // #29 Connection strings { id: "connection_string", label: "[CONNECTION_STRING]", regex: /\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp|mssql):\/\/[^\s"'`]+/gi, keywords: ["postgres", "mysql", "mongodb", "redis", "amqp", "mssql"], }, // #30 URLs with credentials (user:pass@host pattern) { id: "url_with_creds", label: "[URL_WITH_CREDS]", regex: /https?:\/\/[^\s:@]+:[^\s:@]+@[^\s"'`]+/g, keywords: ["://"], }, // #31 Email addresses { id: "email", label: "[EMAIL]", regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, keywords: ["@"], falsePositiveCheck: isAllowlistedEmail, }, // #32 IPv4 addresses { id: "ipv4", label: "[IP_ADDR]", regex: /\b(?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\b/g, keywords: ["0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."], falsePositiveCheck: isAllowlistedIp, }, // #33 Bearer tokens { id: "bearer_token", label: "[BEARER_TOKEN]", regex: /Bearer\s+[A-Za-z0-9\-._~+/]+=*/g, keywords: ["Bearer"], }, // #34 Env var secret assignments { id: "env_var_secret", label: "[ENV_SECRET]", regex: /\b[A-Z_]*(?:SECRET|TOKEN|PASSWORD|PASSWD|PRIVATE)[A-Z_]*\s*[=:]\s*["']?[^\s"']{4,}["']?\b/g, keywords: ["SECRET", "TOKEN", "PASSWORD", "PASSWD", "PRIVATE"], }, ]; // --------------------------------------------------------------------------- // Core redaction functions // --------------------------------------------------------------------------- /** * Check if any keyword from the pattern appears in the content. * Case-sensitive by default; pass caseInsensitive=true for patterns * with case-insensitive regexes. */ function hasKeyword( content: string, keywords: string[], caseInsensitive = false ): boolean { if (caseInsensitive) { const lower = content.toLowerCase(); for (const kw of keywords) { if (lower.includes(kw.toLowerCase())) return true; } return false; } for (const kw of keywords) { if (content.includes(kw)) return true; } return false; } /** * Replaces sensitive content in the input string with placeholder labels. * Pure function — no side effects. */ export function redactSensitiveContent(input: string): RedactionResult { if (!input) { return { sanitized: "", redactionCount: 0, categories: [] }; } let result = input; let count = 0; const matchedCategories = new Set(); for (const pattern of SENSITIVE_PATTERNS) { // Keyword pre-filter: skip expensive regex if no keyword found. // Use case-insensitive matching when the regex has the /i flag. const isCaseInsensitive = pattern.regex.flags.includes("i"); if (!hasKeyword(result, pattern.keywords, isCaseInsensitive)) { continue; } // Use a fresh regex each time to avoid lastIndex issues with /g flag const regex = new RegExp(pattern.regex.source, pattern.regex.flags); result = result.replace(regex, (match: string) => { // Check false positive if (pattern.falsePositiveCheck && pattern.falsePositiveCheck(match)) { return match; } count++; matchedCategories.add(pattern.id); return pattern.label; }); } return { sanitized: result, redactionCount: count, categories: [...matchedCategories], }; } /** * Convenience wrapper returning just the sanitized string. */ export function redactString(input: string): string { return redactSensitiveContent(input).sanitized; } /** * Returns a new ParsedMessage with sensitive content redacted from * content and toolInput fields. Does NOT mutate the original. */ export function redactMessage(msg: ParsedMessage): ParsedMessage { return { ...msg, content: redactString(msg.content), toolInput: msg.toolInput ? redactString(msg.toolInput) : msg.toolInput, // toolName is typically safe (e.g. "Bash", "Read") — pass through unchanged }; }