openai · gabor-openai · Nov 6, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/docs/ref/checks/pii.md b/docs/ref/checks/pii.md
@@ -1,26 +1,37 @@
 # Contains PII
 
-Detects personally identifiable information (PII) such as SSNs, phone numbers, credit card numbers, and email addresses using Microsoft's [Presidio library](https://microsoft.github.io/presidio/). Will automatically mask detected PII or block content based on configuration.
+Detects personally identifiable information (PII) such as SSNs, phone numbers, credit card numbers, and email addresses using Guardrails' built-in TypeScript regex engine. The check can automatically mask detected spans or block the request based on configuration.
+
+**Advanced Security Features:**
+
+- **Unicode normalization**: Prevents bypasses using fullwidth characters (＠) or zero-width spaces
+- **Encoded PII detection**: Optionally detects PII hidden in Base64, URL-encoded, or hex strings
+- **URL context awareness**: Detects emails in query parameters (e.g., `GET /[email protected]`)
+- **Custom patterns**: Extends the default entity list with CVV/CVC codes, BIC/SWIFT identifiers, and other global formats
 
 ## Configuration
 
 ```json
 {
     "name": "Contains PII",
     "config": {
-        "entities": ["EMAIL_ADDRESS", "US_SSN", "CREDIT_CARD", "PHONE_NUMBER"],
-        "block": false
+        "entities": ["EMAIL_ADDRESS", "US_SSN", "CREDIT_CARD", "PHONE_NUMBER", "CVV", "BIC_SWIFT"],
+        "block": false,
+        "detect_encoded_pii": false
     }
 }
 ```
 
 ### Parameters
 
-- **`entities`** (required): List of PII entity types to detect. See the full list of [supported entities](https://microsoft.github.io/presidio/supported_entities/).
+- **`entities`** (required): List of PII entity types to detect. See the `PIIEntity` enum in `src/checks/pii.ts` for the full list, including custom entities such as `CVV` (credit card security codes) and `BIC_SWIFT` (bank identification codes).
 - **`block`** (optional): Whether to block content or just mask PII (default: `false`)
+- **`detect_encoded_pii`** (optional): If `true`, detects PII in Base64/URL-encoded/hex strings (default: `false`)
 
 ## Implementation Notes
 
+Under the hood the TypeScript guardrail normalizes text (Unicode NFKC), strips zero-width characters, and runs curated regex patterns for each configured entity. When `detect_encoded_pii` is enabled the check also decodes Base64, URL-encoded, and hexadecimal substrings before rescanning them for matches, remapping any findings back to the original encoded content.
+
 **Stage-specific behavior is critical:**
 
 - **Pre-flight stage**: Use `block=false` (default) for automatic PII masking of user input
@@ -30,7 +41,7 @@ Detects personally identifiable information (PII) such as SSNs, phone numbers, c
 **PII masking mode** (default, `block=false`):
 
 - Automatically replaces detected PII with placeholder tokens like `<EMAIL_ADDRESS>`, `<US_SSN>`
-- Does not trigger tripwire - allows content through with PII removed
+- Does not trigger tripwire - allows content through with PII masked
 
 **Blocking mode** (`block=true`):
 
@@ -41,6 +52,8 @@ Detects personally identifiable information (PII) such as SSNs, phone numbers, c
 
 Returns a `GuardrailResult` with the following `info` dictionary:
 
+### Basic Example (Plain PII)
+
 ```json
 {
     "guardrail_name": "Contains PII",
@@ -49,14 +62,37 @@ Returns a `GuardrailResult` with the following `info` dictionary:
         "US_SSN": ["123-45-6789"]
     },
     "entity_types_checked": ["EMAIL_ADDRESS", "US_SSN", "CREDIT_CARD"],
-    "checked_text": "Contact me at <EMAIL_ADDRESS>, SSN: <US_SSN>",
     "block_mode": false,
     "pii_detected": true
 }
 ```
 
-- **`detected_entities`**: Detected entities and their values
+### With Encoded PII Detection Enabled
+
+When `detect_encoded_pii: true`, the guardrail also detects and masks encoded PII:
+
+```json
+{
+    "guardrail_name": "Contains PII",
+    "detected_entities": {
+        "EMAIL_ADDRESS": [
+            "[email protected]",
+            "am9obkBleGFtcGxlLmNvbQ==",
+            "%6a%6f%65%40domain.com",
+            "6a6f686e406578616d706c652e636f6d"
+        ]
+    },
+    "entity_types_checked": ["EMAIL_ADDRESS"],
+    "block_mode": false,
+    "pii_detected": true
+}
+```
+
+Note: Encoded PII is masked with `<ENTITY_TYPE_ENCODED>` to distinguish it from plain text PII.
+
+### Field Descriptions
+
+- **`detected_entities`**: Detected entities and their values (includes both plain and encoded forms when `detect_encoded_pii` is enabled)
 - **`entity_types_checked`**: List of entity types that were configured for detection
-- **`checked_text`**: Text with PII masked (if PII was found) or original text (if no PII was found)
 - **`block_mode`**: Whether the check was configured to block or mask
-- **`pii_detected`**: Boolean indicating if any PII was found
+- **`pii_detected`**: Boolean indicating if any PII was found (plain or encoded)
diff --git a/examples/basic/pii_mask_example.ts b/examples/basic/pii_mask_example.ts
@@ -0,0 +1,213 @@
+#!/usr/bin/env node
+/**
+ * PII Masking Example: Interactive chat with GuardrailsOpenAI.
+ *
+ * Demonstrates how to mask PII in the pre-flight stage (block=false) so that
+ * user inputs are sanitized before reaching the model, while also blocking
+ * PII that appears in the model's output (block=true).
+ *
+ * Highlights:
+ * - Pre-flight PII guardrail automatically replaces detected entities with tokens like <EMAIL_ADDRESS>
+ * - Encoded PII detection (Base64/URL/hex) is enabled via detect_encoded_pii
+ * - Output stage blocks responses when PII is detected in the model reply
+ * - Console output shows what was masked and which entities were found
+ *
+ * Run with: npx tsx pii_mask_example.ts
+ *
+ * Prerequisites:
+ * - Set OPENAI_API_KEY in your environment
+ */
+
+import * as readline from 'readline';
+import {
+  GuardrailResult,
+  GuardrailTripwireTriggered,
+  GuardrailsOpenAI,
+  GuardrailsResponse,
+} from '../../src';
+
+type ChatMessage = { role: 'system' | 'user' | 'assistant'; content: string };
+
+const PIPELINE_CONFIG = {
+  version: 1,
+  pre_flight: {
+    version: 1,
+    guardrails: [
+      {
+        name: 'Contains PII',
+        config: {
+          entities: ['EMAIL_ADDRESS', 'PHONE_NUMBER', 'US_SSN'],
+          block: false,
+          detect_encoded_pii: true,
+        },
+      },
+    ],
+  },
+  input: {
+    version: 1,
+    guardrails: [
+      {
+        name: 'Moderation',
+        config: {
+          categories: ['hate', 'violence'],
+        },
+      },
+    ],
+  },
+  output: {
+    version: 1,
+    guardrails: [
+      {
+        name: 'Contains PII',
+        config: {
+          entities: ['EMAIL_ADDRESS', 'PHONE_NUMBER', 'US_SSN'],
+          block: true,
+          detect_encoded_pii: true,
+        },
+      },
+    ],
+  },
+};
+
+function createInterface(): readline.Interface {
+  return readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+    prompt: '\nEnter a message (or type "exit"): ',
+  });
+}
+
+function formatEntitySummary(entities: Record<string, string[]> | undefined): string {
+  if (!entities) {
+    return 'None';
+  }
+  const parts: string[] = [];
+  for (const [entity, matches] of Object.entries(entities)) {
+    parts.push(`${entity} (${matches.length})`);
+  }
+  return parts.length ? parts.join(', ') : 'None';
+}
+
+function logPiiMasking(result: GuardrailResult, originalInput: string): void {
+  const info = result.info ?? {};
+  const masked = typeof info.checked_text === 'string' ? info.checked_text : originalInput;
+  const detected = info.detected_entities as Record<string, string[]> | undefined;
+  const stage = info.stage_name ?? 'pre_flight';
+
+  console.log(`\n🪪  PII detected and masked (${stage} stage)`);
+  console.log('Original :', originalInput);
+  console.log('Sanitized:', masked);
+  console.log('Entities :', formatEntitySummary(detected));
+}
+
+function logPiiInOutput(result: GuardrailResult): void {
+  const info = result.info ?? {};
+  const detected = info.detected_entities as Record<string, string[]> | undefined;
+  const stage = info.stage_name ?? 'output';
+  console.log(`\n⚠️  PII detected – response blocked (${stage} stage).`);
+  console.log('Entities :', formatEntitySummary(detected));
+}
+
+function inspectGuardrailResults(
+  response: GuardrailsResponse,
+  originalInput: string
+): void {
+  const results = response.guardrail_results;
+
+  if (results.preflight.length > 0) {
+    for (const result of results.preflight) {
+      const info = result.info ?? {};
+      if (info.guardrail_name === 'Contains PII' && info.pii_detected) {
+        logPiiMasking(result, originalInput);
+      }
+    }
+  }
+
+  if (results.output.length > 0) {
+    for (const result of results.output) {
+      const info = result.info ?? {};
+      if (info.guardrail_name === 'Contains PII' && result.tripwireTriggered) {
+        logPiiInOutput(result);
+      }
+    }
+  }
+}
+
+async function processInput(
+  client: GuardrailsOpenAI,
+  userInput: string,
+  conversation: ChatMessage[]
+): Promise<void> {
+  const messages = [...conversation, { role: 'user' as const, content: userInput }];
+
+  const response = await client.chat.completions.create({
+    model: 'gpt-4.1-mini',
+    messages,
+  });
+
+  inspectGuardrailResults(response, userInput);
+
+  const assistantMessage = response.choices[0]?.message?.content ?? '';
+  console.log('\n🤖 Assistant:', assistantMessage.trim());
+
+  conversation.push({ role: 'user', content: userInput });
+  conversation.push({ role: 'assistant', content: assistantMessage });
+}
+
+async function main(): Promise<void> {
+  console.log('🔐 Guardrails PII Masking Example');
+  console.log(' - Pre-flight guardrail masks PII before it hits the model');
+  console.log(' - Output guardrail blocks replies that contain PII');
+
+  const client = await GuardrailsOpenAI.create(PIPELINE_CONFIG);
+  const conversation: ChatMessage[] = [
+    {
+      role: 'system',
+      content: 'You are a helpful assistant. Keep responses concise.',
+    },
+  ];
+
+  const rl = createInterface();
+  rl.prompt();
+
+  rl.on('line', async (line) => {
+    const input = line.trim();
+
+    if (!input) {
+      rl.prompt();
+      return;
+    }
+
+    if (input.toLowerCase() === 'exit') {
+      rl.close();
+      return;
+    }
+
+    try {
+      await processInput(client, input, conversation);
+    } catch (error) {
+      if (error instanceof GuardrailTripwireTriggered) {
+        const info = error.guardrailResult.info ?? {};
+        const stage = info.stage_name ?? 'unknown';
+        console.log(
+          `\n🛑 Guardrail triggered in ${stage} stage: ${info.guardrail_name ?? 'Unknown guardrail'}`
+        );
+        console.log(JSON.stringify(error.guardrailResult, null, 2));
+      } else {
+        console.error('\n❌ Error processing request:', error instanceof Error ? error.message : error);
+      }
+    }
+
+    rl.prompt();
+  });
+
+  rl.on('close', () => {
+    console.log('\n👋 Exiting the program.');
+    process.exit(0);
+  });
+}
+
+main().catch((error) => {
+  console.error('Fatal error:', error);
+  process.exit(1);
+});