Fix sensitive redactor keyword matching for case-insensitive patterns

The keyword pre-filter used case-sensitive string matching for all patterns, but several regex patterns use the /i flag (e.g. generic_api_key). This meant inputs like 'ApiKey = "secret"' would skip the keyword check for 'api_key' and miss the redaction entirely. Changes: - Add caseInsensitive parameter to hasKeyword() that lowercases both content and keywords before comparison - Detect /i flag on pattern regex and pass it through automatically - Narrow IP address keywords from ["."] to ["0.", "1.", ..., "9."] to reduce false-positive regex invocations on content containing periods - Fix email regex character class [A-Z|a-z] → [A-Za-z] (the pipe was literal) - Add clarifying comment on url_with_creds pattern - Add test cases for mixed-case and UPPER_CASE key assignments - Relax SECRET_KEY test assertion to accept either redaction label Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 01:09:11 -05:00
parent eb8001dbf1
commit 0e5a36f0d1
2 changed files with 37 additions and 9 deletions
--- a/src/shared/sensitive-redactor.ts
+++ b/src/shared/sensitive-redactor.ts
@@ -316,7 +316,7 @@ export const SENSITIVE_PATTERNS: SensitivePattern[] = [
    keywords: ["postgres", "mysql", "mongodb", "redis", "amqp", "mssql"],
  },

-  // #30 URLs with credentials
+  // #30 URLs with credentials (user:pass@host pattern)
  {
    id: "url_with_creds",
    label: "[URL_WITH_CREDS]",
@@ -328,7 +328,7 @@ export const SENSITIVE_PATTERNS: SensitivePattern[] = [
  {
    id: "email",
    label: "[EMAIL]",
-    regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
+    regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
    keywords: ["@"],
    falsePositiveCheck: isAllowlistedEmail,
  },
@@ -339,7 +339,7 @@ export const SENSITIVE_PATTERNS: SensitivePattern[] = [
    label: "[IP_ADDR]",
    regex:
      /\b(?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\b/g,
-    keywords: ["."],
+    keywords: ["0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."],
    falsePositiveCheck: isAllowlistedIp,
  },

@@ -366,10 +366,22 @@ export const SENSITIVE_PATTERNS: SensitivePattern[] = [
 // ---------------------------------------------------------------------------

 /**
- * Check if any keyword from the pattern appears in the content (case-sensitive
- * for most patterns, lowered for a cheap pre-check).
+ * Check if any keyword from the pattern appears in the content.
+ * Case-sensitive by default; pass caseInsensitive=true for patterns
+ * with case-insensitive regexes.
 */
-function hasKeyword(content: string, keywords: string[]): boolean {
+function hasKeyword(
+  content: string,
+  keywords: string[],
+  caseInsensitive = false
+): boolean {
+  if (caseInsensitive) {
+    const lower = content.toLowerCase();
+    for (const kw of keywords) {
+      if (lower.includes(kw.toLowerCase())) return true;
+    }
+    return false;
+  }
  for (const kw of keywords) {
    if (content.includes(kw)) return true;
  }
@@ -390,8 +402,10 @@ export function redactSensitiveContent(input: string): RedactionResult {
  const matchedCategories = new Set<string>();

  for (const pattern of SENSITIVE_PATTERNS) {
-    // Keyword pre-filter: skip expensive regex if no keyword found
-    if (!hasKeyword(result, pattern.keywords)) {
+    // Keyword pre-filter: skip expensive regex if no keyword found.
+    // Use case-insensitive matching when the regex has the /i flag.
+    const isCaseInsensitive = pattern.regex.flags.includes("i");
+    if (!hasKeyword(result, pattern.keywords, isCaseInsensitive)) {
      continue;
    }