import type { ParsedMessage } from "./types.js";

/**
 * Sensitive information detection and redaction module.
 *
 * Uses a curated set of ~37 regex patterns derived from gitleaks' production
 * config, focused on patterns relevant to Claude Code session logs.
 * Employs keyword pre-filtering (gitleaks technique) to skip expensive regex
 * evaluation for messages that contain no potential secrets.
 *
 * Shared between client (display redaction) and server (export redaction).
 */

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

export interface SensitivePattern {
  id: string;
  label: string; // Placeholder text, e.g. "[AWS_KEY]"
  regex: RegExp;
  keywords: string[]; // At least one keyword must appear before running regex
  falsePositiveCheck?: (match: string) => boolean;
}

export interface RedactionResult {
  sanitized: string;
  redactionCount: number;
  categories: string[];
}

// ---------------------------------------------------------------------------
// False-positive helpers
// ---------------------------------------------------------------------------

const ALLOWLISTED_EMAILS = [
  "@example.com",
  "@example.org",
  "@test.com",
  "@test.org",
  "@localhost",
  "noreply@anthropic.com",
];

const ALLOWLISTED_IPS = ["127.0.0.1", "0.0.0.0"];

const DOCUMENTATION_IP_PREFIXES = ["192.0.2.", "198.51.100.", "203.0.113."];

function isAllowlistedEmail(match: string): boolean {
  const lower = match.toLowerCase();
  return ALLOWLISTED_EMAILS.some(
    (suffix) => lower.endsWith(suffix) || lower === suffix.slice(1)
  );
}

function isAllowlistedIp(match: string): boolean {
  if (ALLOWLISTED_IPS.includes(match)) return true;
  return DOCUMENTATION_IP_PREFIXES.some((prefix) => match.startsWith(prefix));
}

// ---------------------------------------------------------------------------
// Pattern Definitions
// ---------------------------------------------------------------------------

export const SENSITIVE_PATTERNS: SensitivePattern[] = [
  // ---- Tier 1: Known Secret Formats ----

  // #1 AWS Access Key
  {
    id: "aws_access_key",
    label: "[AWS_KEY]",
    regex: /\b(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]{16}\b/g,
    keywords: ["AKIA", "ASIA", "ABIA", "ACCA", "A3T"],
  },

  // #2 AWS Bedrock
  {
    id: "aws_bedrock",
    label: "[AWS_BEDROCK_KEY]",
    regex: /\bABSK[A-Za-z0-9+/]{109,269}={0,2}\b/g,
    keywords: ["ABSK"],
  },

  // #3 GitHub PAT
  {
    id: "github_pat",
    label: "[GITHUB_TOKEN]",
    regex: /\bghp_[0-9a-zA-Z]{36}\b/g,
    keywords: ["ghp_"],
  },

  // #4 GitHub Fine-Grained PAT
  {
    id: "github_fine_grained_pat",
    label: "[GITHUB_TOKEN]",
    regex: /\bgithub_pat_\w{82}\b/g,
    keywords: ["github_pat_"],
  },

  // #5 GitHub App Token
  {
    id: "github_app_token",
    label: "[GITHUB_TOKEN]",
    regex: /\b(?:ghu|ghs)_[0-9a-zA-Z]{36}\b/g,
    keywords: ["ghu_", "ghs_"],
  },

  // #6 GitLab PAT
  {
    id: "gitlab_pat",
    label: "[GITLAB_TOKEN]",
    regex: /\bglpat-[\w-]{20}\b/g,
    keywords: ["glpat-"],
  },

  // #7 GitLab Runner Token
  {
    id: "gitlab_runner",
    label: "[GITLAB_TOKEN]",
    regex: /\bglrt-[0-9a-zA-Z_-]{20}\b/g,
    keywords: ["glrt-"],
  },

  // #8 OpenAI Key
  {
    id: "openai_key",
    label: "[OPENAI_KEY]",
    regex:
      /\b(?:sk-(?:proj|svcacct|admin)-[A-Za-z0-9_-]{58,74}T3BlbkFJ[A-Za-z0-9_-]{58,74}|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})\b/g,
    keywords: ["sk-proj-", "sk-svcacct-", "sk-admin-", "T3BlbkFJ"],
  },

  // #9 Anthropic Key
  {
    id: "anthropic_key",
    label: "[ANTHROPIC_KEY]",
    regex: /\bsk-ant-api03-[a-zA-Z0-9_-]{93}AA\b/g,
    keywords: ["sk-ant-api"],
  },

  // #10 Anthropic Admin Key
  {
    id: "anthropic_admin_key",
    label: "[ANTHROPIC_KEY]",
    regex: /\bsk-ant-admin01-[a-zA-Z0-9_-]{93}AA\b/g,
    keywords: ["sk-ant-admin"],
  },

  // #11 HuggingFace Token
  {
    id: "huggingface_token",
    label: "[HF_TOKEN]",
    regex: /\bhf_[a-zA-Z]{34}\b/g,
    keywords: ["hf_"],
  },

  // #12 Perplexity Key
  {
    id: "perplexity_key",
    label: "[PERPLEXITY_KEY]",
    regex: /\bpplx-[a-zA-Z0-9]{48}\b/g,
    keywords: ["pplx-"],
  },

  // #13 Stripe Key
  {
    id: "stripe_key",
    label: "[STRIPE_KEY]",
    regex: /\b(?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99}\b/g,
    keywords: ["sk_live_", "sk_test_", "sk_prod_", "rk_live_", "rk_test_", "rk_prod_"],
  },

  // #14 Slack Bot Token
  {
    id: "slack_bot_token",
    label: "[SLACK_TOKEN]",
    regex: /\bxoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*\b/g,
    keywords: ["xoxb-"],
  },

  // #15 Slack User Token
  {
    id: "slack_user_token",
    label: "[SLACK_TOKEN]",
    regex: /\bxox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}\b/g,
    keywords: ["xoxp-", "xoxe-"],
  },

  // #16 Slack Webhook
  {
    id: "slack_webhook",
    label: "[SLACK_WEBHOOK]",
    regex:
      /(?:https?:\/\/)?hooks\.slack\.com\/(?:services|workflows|triggers)\/[A-Za-z0-9+/]{43,56}/g,
    keywords: ["hooks.slack.com"],
  },

  // #17 SendGrid Token
  {
    id: "sendgrid_token",
    label: "[SENDGRID_TOKEN]",
    regex: /\bSG\.[a-z0-9=_\-.]{66}\b/gi,
    keywords: ["SG."],
  },

  // #18 Twilio Key
  {
    id: "twilio_key",
    label: "[TWILIO_KEY]",
    regex: /\bSK[0-9a-fA-F]{32}\b/g,
    keywords: ["SK"],
  },

  // #19 GCP API Key
  {
    id: "gcp_api_key",
    label: "[GCP_KEY]",
    regex: /\bAIza[\w-]{35}\b/g,
    keywords: ["AIza"],
  },

  // #20 Azure AD Client Secret
  {
    id: "azure_ad_secret",
    label: "[AZURE_SECRET]",
    regex: /[a-zA-Z0-9_~.]{3}\dQ~[a-zA-Z0-9_~.-]{31,34}/g,
    keywords: ["Q~"],
  },

  // #21 Heroku Key
  {
    id: "heroku_key",
    label: "[HEROKU_KEY]",
    regex: /\bHRKU-AA[0-9a-zA-Z_-]{58}\b/g,
    keywords: ["HRKU-"],
  },

  // #22 npm Token
  {
    id: "npm_token",
    label: "[NPM_TOKEN]",
    regex: /\bnpm_[a-z0-9]{36}\b/gi,
    keywords: ["npm_"],
  },

  // #23 PyPI Token
  {
    id: "pypi_token",
    label: "[PYPI_TOKEN]",
    regex: /\bpypi-AgEIcHlwaS5vcmc[\w-]{50,1000}\b/g,
    keywords: ["pypi-"],
  },

  // #24 Sentry Token
  {
    id: "sentry_token",
    label: "[SENTRY_TOKEN]",
    regex: /\b(?:sntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}|sntryu_[a-f0-9]{64})\b/g,
    keywords: ["sntrys_", "sntryu_"],
  },

  // #25 JWT
  {
    id: "jwt",
    label: "[JWT]",
    regex:
      /\bey[a-zA-Z0-9]{17,}\.ey[a-zA-Z0-9/\\_-]{17,}\.(?:[a-zA-Z0-9/\\_-]{10,}={0,2})\b/g,
    keywords: ["eyJ"],
  },

  // #26 Private Key (PEM)
  {
    id: "private_key",
    label: "[PRIVATE_KEY]",
    regex:
      /-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\s+BLOCK)?-----[\s\S]{64,}?-----END[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\s+BLOCK)?-----/g,
    keywords: ["PRIVATE KEY"],
  },

  // #27 Generic API Key (contextual: secret-like variable name + value)
  {
    id: "generic_api_key",
    label: "[API_KEY]",
    regex:
      /(?:access|auth|api|credential|creds|key|passw(?:or)?d|secret|token)(?:[\t \w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=\-/+]{10,150})/gi,
    keywords: [
      "secret",
      "token",
      "password",
      "passwd",
      "api_key",
      "apikey",
      "access_key",
      "auth",
      "credential",
    ],
  },

  // ---- Tier 2: PII / System Info ----

  // #28 Home directory paths
  {
    id: "home_directory",
    label: "[HOME_PATH]",
    regex:
      /(?:\/home\/[a-zA-Z0-9_.-]+|\/Users\/[a-zA-Z0-9_.-]+|C:\\Users\\[a-zA-Z0-9_.-]+)(?:[/\\][^\s"'`<>)}\]]*)?/g,
    keywords: ["/home/", "/Users/", "C:\\Users\\"],
  },

  // #29 Connection strings
  {
    id: "connection_string",
    label: "[CONNECTION_STRING]",
    regex:
      /\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp|mssql):\/\/[^\s"'`]+/gi,
    keywords: ["postgres", "mysql", "mongodb", "redis", "amqp", "mssql"],
  },

  // #30 URLs with credentials (user:pass@host pattern)
  {
    id: "url_with_creds",
    label: "[URL_WITH_CREDS]",
    regex: /https?:\/\/[^\s:@]+:[^\s:@]+@[^\s"'`]+/g,
    keywords: ["://"],
  },

  // #31 Email addresses
  {
    id: "email",
    label: "[EMAIL]",
    regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
    keywords: ["@"],
    falsePositiveCheck: isAllowlistedEmail,
  },

  // #32 IPv4 addresses
  {
    id: "ipv4",
    label: "[IP_ADDR]",
    regex:
      /\b(?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\b/g,
    keywords: ["0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."],
    falsePositiveCheck: isAllowlistedIp,
  },

  // #33 Bearer tokens
  {
    id: "bearer_token",
    label: "[BEARER_TOKEN]",
    regex: /Bearer\s+[A-Za-z0-9\-._~+/]+=*/g,
    keywords: ["Bearer"],
  },

  // #34 Env var secret assignments
  {
    id: "env_var_secret",
    label: "[ENV_SECRET]",
    regex:
      /\b[A-Z_]*(?:SECRET|TOKEN|PASSWORD|PASSWD|PRIVATE)[A-Z_]*\s*[=:]\s*["']?[^\s"']{4,}["']?\b/g,
    keywords: ["SECRET", "TOKEN", "PASSWORD", "PASSWD", "PRIVATE"],
  },
];

// ---------------------------------------------------------------------------
// Core redaction functions
// ---------------------------------------------------------------------------

/**
 * Check if any keyword from the pattern appears in the content.
 * Case-sensitive by default; pass caseInsensitive=true for patterns
 * with case-insensitive regexes.
 */
function hasKeyword(
  content: string,
  keywords: string[],
  caseInsensitive = false
): boolean {
  if (caseInsensitive) {
    const lower = content.toLowerCase();
    for (const kw of keywords) {
      if (lower.includes(kw.toLowerCase())) return true;
    }
    return false;
  }
  for (const kw of keywords) {
    if (content.includes(kw)) return true;
  }
  return false;
}

/**
 * Replaces sensitive content in the input string with placeholder labels.
 * Pure function — no side effects.
 */
export function redactSensitiveContent(input: string): RedactionResult {
  if (!input) {
    return { sanitized: "", redactionCount: 0, categories: [] };
  }

  let result = input;
  let count = 0;
  const matchedCategories = new Set<string>();

  for (const pattern of SENSITIVE_PATTERNS) {
    // Keyword pre-filter: skip expensive regex if no keyword found.
    // Use case-insensitive matching when the regex has the /i flag.
    const isCaseInsensitive = pattern.regex.flags.includes("i");
    if (!hasKeyword(result, pattern.keywords, isCaseInsensitive)) {
      continue;
    }

    // Use a fresh regex each time to avoid lastIndex issues with /g flag
    const regex = new RegExp(pattern.regex.source, pattern.regex.flags);

    result = result.replace(regex, (match: string) => {
      // Check false positive
      if (pattern.falsePositiveCheck && pattern.falsePositiveCheck(match)) {
        return match;
      }
      count++;
      matchedCategories.add(pattern.id);
      return pattern.label;
    });
  }

  return {
    sanitized: result,
    redactionCount: count,
    categories: [...matchedCategories],
  };
}

/**
 * Convenience wrapper returning just the sanitized string.
 */
export function redactString(input: string): string {
  return redactSensitiveContent(input).sanitized;
}

/**
 * Returns a new ParsedMessage with sensitive content redacted from
 * content and toolInput fields. Does NOT mutate the original.
 */
export function redactMessage(msg: ParsedMessage): ParsedMessage {
  return {
    ...msg,
    content: redactString(msg.content),
    toolInput: msg.toolInput ? redactString(msg.toolInput) : msg.toolInput,
    // toolName is typically safe (e.g. "Bash", "Read") — pass through unchanged
  };
}