vllm-project · Xunzhuo · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025
@@ -0,0 +1,176 @@
+# Category-Level PII Detection Example
+# This example demonstrates how to configure PII detection at the category level
+# Different categories can have different PII detection settings and thresholds based on their sensitivity
+
+# Global PII detection configuration (can be overridden per category)
+classifier:
+  pii_model:
+    model_id: "models/pii_classifier_modernbert-base_model"
+    threshold: 0.7  # Global default threshold - can be overridden per category
+    use_cpu: true
+    pii_mapping_path: "models/pii_classifier_modernbert-base_model/pii_type_mapping.json"
+
+  category_model:
+    model_id: "models/category_classifier_modernbert-base_model"
+    use_modernbert: true
+    threshold: 0.6
+    use_cpu: true
+    category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+
+# Categories with different PII detection settings
+categories:
+  # High-security category: Strict PII detection with high threshold
+  - name: healthcare
+    description: "Healthcare and medical queries"
+    pii_enabled: true  # Explicitly enable (inherits from global by default)
+    pii_threshold: 0.9  # Higher threshold for stricter detection (fewer false positives)
+    system_prompt: "You are a healthcare assistant. Handle all personal information with utmost care."
+    model_scores:
+      - model: secure-llm
+        score: 0.9
+        use_reasoning: false
+
+  # Financial category: Very strict PII detection
+  - name: finance
+    description: "Financial and banking queries"
+    pii_enabled: true
+    pii_threshold: 0.95  # Very high threshold for critical PII like SSN, credit cards
+    system_prompt: "You are a financial advisor. Never store or log any PII information."
+    model_scores:
+      - model: secure-llm
+        score: 0.9
+        use_reasoning: false
+
+  # Customer support: Balanced threshold
+  - name: customer_support
+    description: "Customer support and general inquiries"
+    pii_enabled: true
+    pii_threshold: 0.8  # Slightly higher than global for customer-facing content
+    system_prompt: "You are a friendly customer support agent. Be cautious with customer information."
+    model_scores:
+      - model: general-llm
+        score: 0.8
+        use_reasoning: false
+
+  # Internal tools: Relaxed threshold (trusted environment)
+  - name: code_generation
+    description: "Internal code generation and development tools"
+    pii_enabled: true
+    pii_threshold: 0.5  # Lower threshold to reduce false positives for code/technical content
+    system_prompt: "You are a code generation assistant for internal developers."
+    model_scores:
+      - model: general-llm
+        score: 0.9
+        use_reasoning: true
+
+  # Public documentation: Lower threshold for broader detection
+  - name: documentation
+    description: "Public documentation and help articles"
+    pii_enabled: true
+    pii_threshold: 0.6  # Lower threshold to catch more potential PII in public content
+    system_prompt: "You are a documentation assistant. Help create clear public documentation."
+    model_scores:
+      - model: general-llm
+        score: 0.7
+        use_reasoning: false
+
+  # Testing category: Disable PII detection
+  - name: testing
+    description: "Testing and quality assurance queries"
+    pii_enabled: false  # Disable PII detection for testing purposes
+    system_prompt: "You are a QA assistant helping with test scenarios."
+    model_scores:
+      - model: general-llm
+        score: 0.6
+        use_reasoning: false
+
+  # Default category: Uses global setting
+  - name: general
+    description: "General queries that don't fit into specific categories"
+    # pii_enabled not specified - will inherit from global (enabled if pii_model is configured)
+    # pii_threshold not specified - will inherit from global threshold (0.7)
+    system_prompt: "You are a helpful assistant."
+    model_scores:
+      - model: general-llm
+        score: 0.5
+        use_reasoning: false
+
+# Model configuration
+model_config:
+  "secure-llm":
+    preferred_endpoints: ["secure-endpoint"]
+    pii_policy:
+      allow_by_default: false  # Deny all PII by default for secure model
+      pii_types_allowed:
+        - "GPE"  # Geopolitical entities (cities, countries) are OK
+        - "ORGANIZATION"  # Organization names are OK
+
+  "general-llm":
+    preferred_endpoints: ["general-endpoint"]
+    pii_policy:
+      allow_by_default: true  # Allow all PII for general model
+
+# Default model for fallback
+default_model: general-llm
+
+# vLLM endpoints configuration
+vllm_endpoints:
+  - name: "secure-endpoint"
+    address: "127.0.0.1"
+    port: 8000
+    weight: 1
+
+  - name: "general-endpoint"
+    address: "127.0.0.1"
+    port: 8001
+    weight: 1
+
+# Usage Notes:
+# =============
+# 1. Global Settings:
+#    - classifier.pii_model: Configures the PII detection model and default threshold
+#    - threshold: Sets the default detection threshold (0.0-1.0) for all categories
+# 2. Category Overrides:
+#    - pii_enabled: Override global enabled/disabled setting per category
+#    - pii_threshold: Override global threshold per category
+# 3. Inheritance:
+#    - If pii_enabled is not specified, inherits from global (enabled if pii_model is configured)
+#    - If pii_threshold is not specified, inherits from global classifier.pii_model.threshold
+# 4. Threshold Tuning Guidelines:
+#    - Higher threshold (0.85-0.95): Stricter detection, fewer false positives, may miss subtle PII
+#      * Use for: Healthcare, Finance, Legal categories where precision is critical
+#      * Risk: May miss some PII entities with lower confidence
+#    - Medium threshold (0.65-0.85): Balanced detection, good for most use cases
+#      * Use for: Customer support, HR, General business queries
+#      * Risk: Moderate false positive/negative rate
+#    - Lower threshold (0.4-0.65): More sensitive detection, catches more PII, higher false positive rate
+#      * Use for: Public content, Documentation, Code generation (to avoid false positives)
+#      * Risk: Higher false positive rate, especially with technical content
+# 5. PII Type Considerations:
+#    - Different PII types have different consequences:
+#      * Critical (SSN, Credit Card, Passwords): Use threshold 0.9+
+#      * Sensitive (Email, Phone, Address): Use threshold 0.75-0.9
+#      * General (Names, Organizations, Dates): Use threshold 0.6-0.75
+#    - Consider using different thresholds per category based on expected PII types
+# 6. Use Cases by Category:
+#    - Healthcare: High threshold (0.9+) to avoid false positives on medical terms
+#    - Finance: Very high threshold (0.95+) for critical financial PII
+#    - Customer Support: Medium-high threshold (0.8) for balanced protection
+#    - Code/Technical: Lower threshold (0.5-0.6) to reduce false positives on code artifacts
+#    - Public Content: Lower threshold (0.6) to catch more potential PII before publication
+#    - Testing: Disabled to avoid interference with test data
+# 7. Security Best Practices:
+#    - Enable PII detection by default (configure classifier.pii_model)
+#    - Only disable or use very low thresholds for specific categories where risk is managed
+#    - Consider the consequences of PII exposure on a per-category basis
+#    - Monitor false positive and false negative rates to tune thresholds appropriately
+#    - Combine with model-level PII policies (pii_policy) for defense in depth
+#    - Use different thresholds for different sensitivity levels:
+#      * Public-facing categories: Higher thresholds to reduce false positives
+#      * Internal categories: Lower thresholds for broader detection
+#      * Critical categories: Highest thresholds for precision
+# 8. Testing and Tuning:
+#    - Start with conservative (higher) thresholds and adjust based on false positive rate
+#    - Monitor PII detection metrics to understand category-specific patterns
+#    - Test with representative data for each category to validate threshold settings
+#    - Consider A/B testing different thresholds to find optimal values
@@ -376,6 +376,12 @@ type Category struct {
 	// JailbreakThreshold defines the confidence threshold for jailbreak detection (0.0-1.0)
 	// If nil, uses the global threshold from PromptGuard.Threshold
 	JailbreakThreshold *float32 `yaml:"jailbreak_threshold,omitempty"`
+	// PIIEnabled controls whether PII detection is enabled for this category
+	// If nil, inherits from global PII detection enabled setting (based on classifier.pii_model configuration)
+	PIIEnabled *bool `yaml:"pii_enabled,omitempty"`
+	// PIIThreshold defines the confidence threshold for PII detection (0.0-1.0)
+	// If nil, uses the global threshold from Classifier.PIIModel.Threshold
+	PIIThreshold *float32 `yaml:"pii_threshold,omitempty"`
 }
 
 // GetModelReasoningFamily returns the reasoning family configuration for a given model name
@@ -843,3 +849,25 @@ func (c *RouterConfig) GetJailbreakThresholdForCategory(categoryName string) flo
 	// Fall back to global threshold
 	return c.PromptGuard.Threshold
 }
+
+// IsPIIEnabledForCategory returns whether PII detection is enabled for a specific category
+// If the category has an explicit setting, it takes precedence; otherwise, uses global setting
+func (c *RouterConfig) IsPIIEnabledForCategory(categoryName string) bool {
+	category := c.GetCategoryByName(categoryName)
+	if category != nil && category.PIIEnabled != nil {
+		return *category.PIIEnabled
+	}
+	// Fall back to global setting
+	return c.IsPIIClassifierEnabled()
+}
+
+// GetPIIThresholdForCategory returns the effective PII detection threshold for a category
+// Priority: category-specific > global classifier.pii_model threshold
+func (c *RouterConfig) GetPIIThresholdForCategory(categoryName string) float32 {
+	category := c.GetCategoryByName(categoryName)
+	if category != nil && category.PIIThreshold != nil {
+		return *category.PIIThreshold
+	}
+	// Fall back to global threshold
+	return c.Classifier.PIIModel.Threshold
+}
@@ -2113,4 +2113,150 @@ categories:
 			})
 		})
 	})
+
+	Describe("GetPIIThresholdForCategory", func() {
+		Context("when global threshold is set", func() {
+			It("should return global threshold for category without explicit setting", func() {
+				category := config.Category{
+					Name:        "test",
+					ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
+				}
+
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{category},
+				}
+				cfg.Classifier.PIIModel.Threshold = 0.7
+
+				Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.7)))
+			})
+
+			It("should return category-specific threshold when set", func() {
+				category := config.Category{
+					Name:         "test",
+					PIIThreshold: config.Float32Ptr(0.9),
+					ModelScores:  []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
+				}
+
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{category},
+				}
+				cfg.Classifier.PIIModel.Threshold = 0.7
+
+				Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.9)))
+			})
+
+			It("should allow lower threshold override", func() {
+				category := config.Category{
+					Name:         "test",
+					PIIThreshold: config.Float32Ptr(0.5),
+					ModelScores:  []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
+				}
+
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{category},
+				}
+				cfg.Classifier.PIIModel.Threshold = 0.7
+
+				Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.5)))
+			})
+
+			It("should allow higher threshold override", func() {
+				category := config.Category{
+					Name:         "test",
+					PIIThreshold: config.Float32Ptr(0.95),
+					ModelScores:  []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
+				}
+
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{category},
+				}
+				cfg.Classifier.PIIModel.Threshold = 0.7
+
+				Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.95)))
+			})
+		})
+
+		Context("when category does not exist", func() {
+			It("should fall back to global threshold", func() {
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{},
+				}
+				cfg.Classifier.PIIModel.Threshold = 0.8
+
+				Expect(cfg.GetPIIThresholdForCategory("nonexistent")).To(Equal(float32(0.8)))
+			})
+		})
+	})
+
+	Describe("IsPIIEnabledForCategory", func() {
+		Context("when global PII is enabled", func() {
+			It("should return true for category without explicit setting", func() {
+				category := config.Category{
+					Name:        "test",
+					ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
+				}
+
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{category},
+				}
+				cfg.Classifier.PIIModel.ModelID = "test-model"
+				cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"
+
+				Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue())
+			})
+
+			It("should return category-specific setting when set to false", func() {
+				category := config.Category{
+					Name:        "test",
+					PIIEnabled:  config.BoolPtr(false),
+					ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
+				}
+
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{category},
+				}
+				cfg.Classifier.PIIModel.ModelID = "test-model"
+				cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"
+
+				Expect(cfg.IsPIIEnabledForCategory("test")).To(BeFalse())
+			})
+
+			It("should return category-specific setting when set to true", func() {
+				category := config.Category{
+					Name:        "test",
+					PIIEnabled:  config.BoolPtr(true),
+					ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
+				}
+
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{category},
+				}
+				// Global is disabled (no model ID)
+				cfg.Classifier.PIIModel.ModelID = ""
+
+				Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue())
+			})
+		})
+
+		Context("when category does not exist", func() {
+			It("should fall back to global setting", func() {
+				cfg := &config.RouterConfig{
+					Classifier: config.RouterConfig{}.Classifier,
+					Categories: []config.Category{},
+				}
+				cfg.Classifier.PIIModel.ModelID = "test-model"
+				cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"
+
+				Expect(cfg.IsPIIEnabledForCategory("nonexistent")).To(BeTrue())
+			})
+		})
+	})
 })