diff --git a/config/examples/pii_category_example.yaml b/config/examples/pii_category_example.yaml new file mode 100644 index 00000000..6b8d9aeb --- /dev/null +++ b/config/examples/pii_category_example.yaml @@ -0,0 +1,176 @@ +# Category-Level PII Detection Example +# This example demonstrates how to configure PII detection at the category level +# Different categories can have different PII detection settings and thresholds based on their sensitivity + +# Global PII detection configuration (can be overridden per category) +classifier: + pii_model: + model_id: "models/pii_classifier_modernbert-base_model" + threshold: 0.7 # Global default threshold - can be overridden per category + use_cpu: true + pii_mapping_path: "models/pii_classifier_modernbert-base_model/pii_type_mapping.json" + + category_model: + model_id: "models/category_classifier_modernbert-base_model" + use_modernbert: true + threshold: 0.6 + use_cpu: true + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" + +# Categories with different PII detection settings +categories: + # High-security category: Strict PII detection with high threshold + - name: healthcare + description: "Healthcare and medical queries" + pii_enabled: true # Explicitly enable (inherits from global by default) + pii_threshold: 0.9 # Higher threshold for stricter detection (fewer false positives) + system_prompt: "You are a healthcare assistant. Handle all personal information with utmost care." + model_scores: + - model: secure-llm + score: 0.9 + use_reasoning: false + + # Financial category: Very strict PII detection + - name: finance + description: "Financial and banking queries" + pii_enabled: true + pii_threshold: 0.95 # Very high threshold for critical PII like SSN, credit cards + system_prompt: "You are a financial advisor. Never store or log any PII information." + model_scores: + - model: secure-llm + score: 0.9 + use_reasoning: false + + # Customer support: Balanced threshold + - name: customer_support + description: "Customer support and general inquiries" + pii_enabled: true + pii_threshold: 0.8 # Slightly higher than global for customer-facing content + system_prompt: "You are a friendly customer support agent. Be cautious with customer information." + model_scores: + - model: general-llm + score: 0.8 + use_reasoning: false + + # Internal tools: Relaxed threshold (trusted environment) + - name: code_generation + description: "Internal code generation and development tools" + pii_enabled: true + pii_threshold: 0.5 # Lower threshold to reduce false positives for code/technical content + system_prompt: "You are a code generation assistant for internal developers." + model_scores: + - model: general-llm + score: 0.9 + use_reasoning: true + + # Public documentation: Lower threshold for broader detection + - name: documentation + description: "Public documentation and help articles" + pii_enabled: true + pii_threshold: 0.6 # Lower threshold to catch more potential PII in public content + system_prompt: "You are a documentation assistant. Help create clear public documentation." + model_scores: + - model: general-llm + score: 0.7 + use_reasoning: false + + # Testing category: Disable PII detection + - name: testing + description: "Testing and quality assurance queries" + pii_enabled: false # Disable PII detection for testing purposes + system_prompt: "You are a QA assistant helping with test scenarios." + model_scores: + - model: general-llm + score: 0.6 + use_reasoning: false + + # Default category: Uses global setting + - name: general + description: "General queries that don't fit into specific categories" + # pii_enabled not specified - will inherit from global (enabled if pii_model is configured) + # pii_threshold not specified - will inherit from global threshold (0.7) + system_prompt: "You are a helpful assistant." + model_scores: + - model: general-llm + score: 0.5 + use_reasoning: false + +# Model configuration +model_config: + "secure-llm": + preferred_endpoints: ["secure-endpoint"] + pii_policy: + allow_by_default: false # Deny all PII by default for secure model + pii_types_allowed: + - "GPE" # Geopolitical entities (cities, countries) are OK + - "ORGANIZATION" # Organization names are OK + + "general-llm": + preferred_endpoints: ["general-endpoint"] + pii_policy: + allow_by_default: true # Allow all PII for general model + +# Default model for fallback +default_model: general-llm + +# vLLM endpoints configuration +vllm_endpoints: + - name: "secure-endpoint" + address: "127.0.0.1" + port: 8000 + weight: 1 + + - name: "general-endpoint" + address: "127.0.0.1" + port: 8001 + weight: 1 + +# Usage Notes: +# ============= +# 1. Global Settings: +# - classifier.pii_model: Configures the PII detection model and default threshold +# - threshold: Sets the default detection threshold (0.0-1.0) for all categories +# 2. Category Overrides: +# - pii_enabled: Override global enabled/disabled setting per category +# - pii_threshold: Override global threshold per category +# 3. Inheritance: +# - If pii_enabled is not specified, inherits from global (enabled if pii_model is configured) +# - If pii_threshold is not specified, inherits from global classifier.pii_model.threshold +# 4. Threshold Tuning Guidelines: +# - Higher threshold (0.85-0.95): Stricter detection, fewer false positives, may miss subtle PII +# * Use for: Healthcare, Finance, Legal categories where precision is critical +# * Risk: May miss some PII entities with lower confidence +# - Medium threshold (0.65-0.85): Balanced detection, good for most use cases +# * Use for: Customer support, HR, General business queries +# * Risk: Moderate false positive/negative rate +# - Lower threshold (0.4-0.65): More sensitive detection, catches more PII, higher false positive rate +# * Use for: Public content, Documentation, Code generation (to avoid false positives) +# * Risk: Higher false positive rate, especially with technical content +# 5. PII Type Considerations: +# - Different PII types have different consequences: +# * Critical (SSN, Credit Card, Passwords): Use threshold 0.9+ +# * Sensitive (Email, Phone, Address): Use threshold 0.75-0.9 +# * General (Names, Organizations, Dates): Use threshold 0.6-0.75 +# - Consider using different thresholds per category based on expected PII types +# 6. Use Cases by Category: +# - Healthcare: High threshold (0.9+) to avoid false positives on medical terms +# - Finance: Very high threshold (0.95+) for critical financial PII +# - Customer Support: Medium-high threshold (0.8) for balanced protection +# - Code/Technical: Lower threshold (0.5-0.6) to reduce false positives on code artifacts +# - Public Content: Lower threshold (0.6) to catch more potential PII before publication +# - Testing: Disabled to avoid interference with test data +# 7. Security Best Practices: +# - Enable PII detection by default (configure classifier.pii_model) +# - Only disable or use very low thresholds for specific categories where risk is managed +# - Consider the consequences of PII exposure on a per-category basis +# - Monitor false positive and false negative rates to tune thresholds appropriately +# - Combine with model-level PII policies (pii_policy) for defense in depth +# - Use different thresholds for different sensitivity levels: +# * Public-facing categories: Higher thresholds to reduce false positives +# * Internal categories: Lower thresholds for broader detection +# * Critical categories: Highest thresholds for precision +# 8. Testing and Tuning: +# - Start with conservative (higher) thresholds and adjust based on false positive rate +# - Monitor PII detection metrics to understand category-specific patterns +# - Test with representative data for each category to validate threshold settings +# - Consider A/B testing different thresholds to find optimal values diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go index 8e5d34aa..983fd6e7 100644 --- a/src/semantic-router/pkg/config/config.go +++ b/src/semantic-router/pkg/config/config.go @@ -376,6 +376,12 @@ type Category struct { // JailbreakThreshold defines the confidence threshold for jailbreak detection (0.0-1.0) // If nil, uses the global threshold from PromptGuard.Threshold JailbreakThreshold *float32 `yaml:"jailbreak_threshold,omitempty"` + // PIIEnabled controls whether PII detection is enabled for this category + // If nil, inherits from global PII detection enabled setting (based on classifier.pii_model configuration) + PIIEnabled *bool `yaml:"pii_enabled,omitempty"` + // PIIThreshold defines the confidence threshold for PII detection (0.0-1.0) + // If nil, uses the global threshold from Classifier.PIIModel.Threshold + PIIThreshold *float32 `yaml:"pii_threshold,omitempty"` } // GetModelReasoningFamily returns the reasoning family configuration for a given model name @@ -843,3 +849,25 @@ func (c *RouterConfig) GetJailbreakThresholdForCategory(categoryName string) flo // Fall back to global threshold return c.PromptGuard.Threshold } + +// IsPIIEnabledForCategory returns whether PII detection is enabled for a specific category +// If the category has an explicit setting, it takes precedence; otherwise, uses global setting +func (c *RouterConfig) IsPIIEnabledForCategory(categoryName string) bool { + category := c.GetCategoryByName(categoryName) + if category != nil && category.PIIEnabled != nil { + return *category.PIIEnabled + } + // Fall back to global setting + return c.IsPIIClassifierEnabled() +} + +// GetPIIThresholdForCategory returns the effective PII detection threshold for a category +// Priority: category-specific > global classifier.pii_model threshold +func (c *RouterConfig) GetPIIThresholdForCategory(categoryName string) float32 { + category := c.GetCategoryByName(categoryName) + if category != nil && category.PIIThreshold != nil { + return *category.PIIThreshold + } + // Fall back to global threshold + return c.Classifier.PIIModel.Threshold +} diff --git a/src/semantic-router/pkg/config/config_test.go b/src/semantic-router/pkg/config/config_test.go index ff027be3..c851b8b3 100644 --- a/src/semantic-router/pkg/config/config_test.go +++ b/src/semantic-router/pkg/config/config_test.go @@ -2113,4 +2113,150 @@ categories: }) }) }) + + Describe("GetPIIThresholdForCategory", func() { + Context("when global threshold is set", func() { + It("should return global threshold for category without explicit setting", func() { + category := config.Category{ + Name: "test", + ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}}, + } + + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{category}, + } + cfg.Classifier.PIIModel.Threshold = 0.7 + + Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.7))) + }) + + It("should return category-specific threshold when set", func() { + category := config.Category{ + Name: "test", + PIIThreshold: config.Float32Ptr(0.9), + ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}}, + } + + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{category}, + } + cfg.Classifier.PIIModel.Threshold = 0.7 + + Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.9))) + }) + + It("should allow lower threshold override", func() { + category := config.Category{ + Name: "test", + PIIThreshold: config.Float32Ptr(0.5), + ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}}, + } + + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{category}, + } + cfg.Classifier.PIIModel.Threshold = 0.7 + + Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.5))) + }) + + It("should allow higher threshold override", func() { + category := config.Category{ + Name: "test", + PIIThreshold: config.Float32Ptr(0.95), + ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}}, + } + + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{category}, + } + cfg.Classifier.PIIModel.Threshold = 0.7 + + Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.95))) + }) + }) + + Context("when category does not exist", func() { + It("should fall back to global threshold", func() { + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{}, + } + cfg.Classifier.PIIModel.Threshold = 0.8 + + Expect(cfg.GetPIIThresholdForCategory("nonexistent")).To(Equal(float32(0.8))) + }) + }) + }) + + Describe("IsPIIEnabledForCategory", func() { + Context("when global PII is enabled", func() { + It("should return true for category without explicit setting", func() { + category := config.Category{ + Name: "test", + ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}}, + } + + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{category}, + } + cfg.Classifier.PIIModel.ModelID = "test-model" + cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json" + + Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue()) + }) + + It("should return category-specific setting when set to false", func() { + category := config.Category{ + Name: "test", + PIIEnabled: config.BoolPtr(false), + ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}}, + } + + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{category}, + } + cfg.Classifier.PIIModel.ModelID = "test-model" + cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json" + + Expect(cfg.IsPIIEnabledForCategory("test")).To(BeFalse()) + }) + + It("should return category-specific setting when set to true", func() { + category := config.Category{ + Name: "test", + PIIEnabled: config.BoolPtr(true), + ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}}, + } + + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{category}, + } + // Global is disabled (no model ID) + cfg.Classifier.PIIModel.ModelID = "" + + Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue()) + }) + }) + + Context("when category does not exist", func() { + It("should fall back to global setting", func() { + cfg := &config.RouterConfig{ + Classifier: config.RouterConfig{}.Classifier, + Categories: []config.Category{}, + } + cfg.Classifier.PIIModel.ModelID = "test-model" + cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json" + + Expect(cfg.IsPIIEnabledForCategory("nonexistent")).To(BeTrue()) + }) + }) + }) }) diff --git a/src/semantic-router/pkg/utils/classification/classifier.go b/src/semantic-router/pkg/utils/classification/classifier.go index 3dc820f1..de7e02ef 100644 --- a/src/semantic-router/pkg/utils/classification/classifier.go +++ b/src/semantic-router/pkg/utils/classification/classifier.go @@ -688,6 +688,11 @@ func (c *Classifier) classifyCategoryWithEntropyInTree(text string) (string, flo // ClassifyPII performs PII token classification on the given text and returns detected PII types func (c *Classifier) ClassifyPII(text string) ([]string, error) { + return c.ClassifyPIIWithThreshold(text, c.Config.Classifier.PIIModel.Threshold) +} + +// ClassifyPIIWithThreshold performs PII token classification with a custom threshold +func (c *Classifier) ClassifyPIIWithThreshold(text string, threshold float32) ([]string, error) { if !c.IsPIIEnabled() { return []string{}, fmt.Errorf("PII detection is not properly configured") } @@ -712,7 +717,7 @@ func (c *Classifier) ClassifyPII(text string) ([]string, error) { // Extract unique PII types from detected entities piiTypes := make(map[string]bool) for _, entity := range tokenResult.Entities { - if entity.Confidence >= c.Config.Classifier.PIIModel.Threshold { + if entity.Confidence >= threshold { piiTypes[entity.EntityType] = true observability.Infof("Detected PII entity: %s ('%s') at [%d-%d] with confidence %.3f", entity.EntityType, entity.Text, entity.Start, entity.End, entity.Confidence) @@ -762,6 +767,11 @@ func (c *Classifier) DetectPIIInContent(allContent []string) []string { // AnalyzeContentForPII performs detailed PII analysis on multiple content pieces func (c *Classifier) AnalyzeContentForPII(contentList []string) (bool, []PIIAnalysisResult, error) { + return c.AnalyzeContentForPIIWithThreshold(contentList, c.Config.Classifier.PIIModel.Threshold) +} + +// AnalyzeContentForPIIWithThreshold performs detailed PII analysis with a custom threshold +func (c *Classifier) AnalyzeContentForPIIWithThreshold(contentList []string, threshold float32) (bool, []PIIAnalysisResult, error) { if !c.IsPIIEnabled() { return false, nil, fmt.Errorf("PII detection is not properly configured") } @@ -790,7 +800,7 @@ func (c *Classifier) AnalyzeContentForPII(contentList []string) (bool, []PIIAnal // Convert token entities to PII detections for _, entity := range tokenResult.Entities { - if entity.Confidence >= c.Config.Classifier.PIIModel.Threshold { + if entity.Confidence >= threshold { detection := PIIDetection{ EntityType: entity.EntityType, Start: entity.Start, diff --git a/website/docs/overview/categories/configuration.md b/website/docs/overview/categories/configuration.md index 9a274ec9..6ed2ad3c 100644 --- a/website/docs/overview/categories/configuration.md +++ b/website/docs/overview/categories/configuration.md @@ -143,6 +143,7 @@ categories: ``` **Threshold Guidelines**: + - **0.8-0.95**: High-security categories (customer support, business) - **0.6-0.8**: Standard categories (general queries) - **0.4-0.6**: Technical categories (code generation, development tools) diff --git a/website/docs/tutorials/content-safety/pii-detection.md b/website/docs/tutorials/content-safety/pii-detection.md index cd58ec60..665d9210 100644 --- a/website/docs/tutorials/content-safety/pii-detection.md +++ b/website/docs/tutorials/content-safety/pii-detection.md @@ -46,11 +46,87 @@ Enable PII detection in your configuration: classifier: pii_model: model_id: "models/pii_classifier_modernbert-base_model" - threshold: 0.7 # Detection sensitivity (0.0-1.0) + threshold: 0.7 # Global detection threshold (0.0-1.0) use_cpu: true # Run on CPU pii_mapping_path: "config/pii_type_mapping.json" # Path to PII type mapping ``` +### Category-Level PII Detection + +**New in v0.x**: Configure PII detection thresholds at the category level for fine-grained control based on category-specific requirements and consequences. + +```yaml +# Global PII configuration - applies to all categories by default +classifier: + pii_model: + model_id: "models/pii_classifier_modernbert-base_model" + threshold: 0.7 # Global default threshold + use_cpu: true + pii_mapping_path: "config/pii_type_mapping.json" + +# Category-specific PII settings +categories: + # Healthcare category: High threshold for critical PII + - name: healthcare + description: "Healthcare and medical queries" + pii_enabled: true # Enable PII detection (default: inherits from global) + pii_threshold: 0.9 # Higher threshold for stricter detection + model_scores: + - model: secure-llm + score: 0.9 + use_reasoning: false + + # Finance category: Very high threshold for financial PII + - name: finance + description: "Financial queries" + pii_enabled: true + pii_threshold: 0.95 # Very strict for SSN, credit cards, etc. + model_scores: + - model: secure-llm + score: 0.9 + use_reasoning: false + + # Code generation: Lower threshold to reduce false positives + - name: code_generation + description: "Code and technical content" + pii_enabled: true + pii_threshold: 0.5 # Lower to avoid flagging code artifacts as PII + model_scores: + - model: general-llm + score: 0.9 + use_reasoning: true + + # Testing: Disable PII detection + - name: testing + description: "Test scenarios" + pii_enabled: false # Disable for testing + model_scores: + - model: general-llm + score: 0.6 + use_reasoning: false + + # General: Uses global settings + - name: general + description: "General queries" + # pii_enabled and pii_threshold not specified - inherits global settings + model_scores: + - model: general-llm + score: 0.5 + use_reasoning: false +``` + +**Configuration Inheritance:** + +- `pii_enabled`: If not specified, inherits from global PII model configuration (enabled if `pii_model` is configured) +- `pii_threshold`: If not specified, inherits from `classifier.pii_model.threshold` + +**Threshold Guidelines by Category:** + +- **Critical categories** (healthcare, finance, legal): 0.9-0.95 - Strict detection, fewer false positives +- **Customer-facing** (support, sales): 0.75-0.85 - Balanced detection +- **Internal tools** (code, testing): 0.5-0.65 - Relaxed to reduce false positives +- **Public content** (docs, marketing): 0.6-0.75 - Broader detection before publication + ### Model-Specific PII Policies Configure different PII policies for different models: @@ -99,6 +175,12 @@ PII detection is automatically integrated into the routing process. When a reque 3. Filters out models that don't allow the detected PII types 4. Routes to an appropriate model that can handle the PII +**Note**: The current implementation uses the global PII threshold during automatic routing. To use category-specific thresholds, you can: + +- Configure thresholds appropriately for each category in your config +- Access category-specific thresholds using `config.GetPIIThresholdForCategory(categoryName)` in your code +- Call `classifier.ClassifyPIIWithThreshold(text, threshold)` with the category-specific threshold when you have category context + ### Classification Endpoint You can also check PII detection directly using the classification API: @@ -133,12 +215,46 @@ pii_requests_masked_total 15 - Start with `threshold: 0.7` for balanced accuracy - Increase to `0.8-0.9` for high-security environments - Decrease to `0.5-0.6` for broader detection +- **Use category-level thresholds** for fine-grained control based on PII type consequences + +#### Category-Specific Threshold Guidelines + +Different categories have different PII sensitivity requirements: + +**Critical Categories (Healthcare, Finance, Legal):** + +- Threshold: `0.9-0.95` +- Rationale: High precision required; false positives on medical/financial terms are costly +- Example PII: SSN, Credit Cards, Medical Records +- Risk if too low: Too many false positives disrupt workflows + +**Customer-Facing Categories (Support, Sales):** + +- Threshold: `0.75-0.85` +- Rationale: Balance between catching PII and avoiding false positives +- Example PII: Email, Phone, Names, Addresses +- Risk if too low: Moderate false positive rate + +**Internal Tools (Code Generation, Development):** + +- Threshold: `0.5-0.65` +- Rationale: Code/technical content often triggers false positives; lower threshold needed +- Example PII: Variable names, test data that looks like PII +- Risk if too high: May still flag harmless code artifacts + +**Public Content (Documentation, Marketing):** + +- Threshold: `0.6-0.75` +- Rationale: Broader detection before publication; acceptable to review more false positives +- Example PII: Author names, example emails, placeholder data +- Risk if too high: May miss PII that could be published ### 2. Policy Design - Use `allow_by_default: false` for sensitive models - Explicitly list allowed PII types for clarity - Consider different policies for different use cases +- **Combine category-level thresholds with model-level policies** for defense in depth ### 3. Action Selection