From 0e5a36f0d159204cdeb8596158857abf8c408385 Mon Sep 17 00:00:00 2001 From: teernisse Date: Fri, 30 Jan 2026 01:09:11 -0500 Subject: [PATCH] Fix sensitive redactor keyword matching for case-insensitive patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The keyword pre-filter used case-sensitive string matching for all patterns, but several regex patterns use the /i flag (e.g. generic_api_key). This meant inputs like 'ApiKey = "secret"' would skip the keyword check for 'api_key' and miss the redaction entirely. Changes: - Add caseInsensitive parameter to hasKeyword() that lowercases both content and keywords before comparison - Detect /i flag on pattern regex and pass it through automatically - Narrow IP address keywords from ["."] to ["0.", "1.", ..., "9."] to reduce false-positive regex invocations on content containing periods - Fix email regex character class [A-Z|a-z] → [A-Za-z] (the pipe was literal) - Add clarifying comment on url_with_creds pattern - Add test cases for mixed-case and UPPER_CASE key assignments - Relax SECRET_KEY test assertion to accept either redaction label Co-Authored-By: Claude Opus 4.5 --- src/shared/sensitive-redactor.ts | 30 ++++++++++++++++++++------- tests/unit/sensitive-redactor.test.ts | 16 +++++++++++++- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/shared/sensitive-redactor.ts b/src/shared/sensitive-redactor.ts index 4db528f..8d2bd40 100644 --- a/src/shared/sensitive-redactor.ts +++ b/src/shared/sensitive-redactor.ts @@ -316,7 +316,7 @@ export const SENSITIVE_PATTERNS: SensitivePattern[] = [ keywords: ["postgres", "mysql", "mongodb", "redis", "amqp", "mssql"], }, - // #30 URLs with credentials + // #30 URLs with credentials (user:pass@host pattern) { id: "url_with_creds", label: "[URL_WITH_CREDS]", @@ -328,7 +328,7 @@ export const SENSITIVE_PATTERNS: SensitivePattern[] = [ { id: "email", label: "[EMAIL]", - regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g, + regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, keywords: ["@"], falsePositiveCheck: isAllowlistedEmail, }, @@ -339,7 +339,7 @@ export const SENSITIVE_PATTERNS: SensitivePattern[] = [ label: "[IP_ADDR]", regex: /\b(?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\b/g, - keywords: ["."], + keywords: ["0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."], falsePositiveCheck: isAllowlistedIp, }, @@ -366,10 +366,22 @@ export const SENSITIVE_PATTERNS: SensitivePattern[] = [ // --------------------------------------------------------------------------- /** - * Check if any keyword from the pattern appears in the content (case-sensitive - * for most patterns, lowered for a cheap pre-check). + * Check if any keyword from the pattern appears in the content. + * Case-sensitive by default; pass caseInsensitive=true for patterns + * with case-insensitive regexes. */ -function hasKeyword(content: string, keywords: string[]): boolean { +function hasKeyword( + content: string, + keywords: string[], + caseInsensitive = false +): boolean { + if (caseInsensitive) { + const lower = content.toLowerCase(); + for (const kw of keywords) { + if (lower.includes(kw.toLowerCase())) return true; + } + return false; + } for (const kw of keywords) { if (content.includes(kw)) return true; } @@ -390,8 +402,10 @@ export function redactSensitiveContent(input: string): RedactionResult { const matchedCategories = new Set(); for (const pattern of SENSITIVE_PATTERNS) { - // Keyword pre-filter: skip expensive regex if no keyword found - if (!hasKeyword(result, pattern.keywords)) { + // Keyword pre-filter: skip expensive regex if no keyword found. + // Use case-insensitive matching when the regex has the /i flag. + const isCaseInsensitive = pattern.regex.flags.includes("i"); + if (!hasKeyword(result, pattern.keywords, isCaseInsensitive)) { continue; } diff --git a/tests/unit/sensitive-redactor.test.ts b/tests/unit/sensitive-redactor.test.ts index 801b2bd..4bc4c1d 100644 --- a/tests/unit/sensitive-redactor.test.ts +++ b/tests/unit/sensitive-redactor.test.ts @@ -251,6 +251,18 @@ describe("sensitive-redactor", () => { const result = redactSensitiveContent(input); expect(result.redactionCount).toBeGreaterThan(0); }); + + it("redacts mixed-case key assignments (case-insensitive keyword matching)", () => { + const input = 'ApiKey = "abcdefghijklmnopqrst"'; + const result = redactSensitiveContent(input); + expect(result.redactionCount).toBeGreaterThan(0); + }); + + it("redacts UPPER_CASE key assignments via generic pattern", () => { + const input = 'AUTH_TOKEN: SuperSecretVal1234'; + const result = redactSensitiveContent(input); + expect(result.redactionCount).toBeGreaterThan(0); + }); }); // --- Tier 2: PII/System Info --- @@ -387,7 +399,9 @@ describe("sensitive-redactor", () => { it("redacts SECRET_KEY assignments", () => { const input = "SECRET_KEY=abcdefghij1234567890"; const result = redactSensitiveContent(input); - expect(result.sanitized).toContain("[ENV_SECRET]"); + // May be matched by generic_api_key or env_var_secret depending on order + expect(result.redactionCount).toBeGreaterThan(0); + expect(result.sanitized).not.toContain("abcdefghij1234567890"); }); it("redacts DATABASE_PASSWORD assignments", () => {