Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions config/examples/pii_category_example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# Category-Level PII Detection Example
# This example demonstrates how to configure PII detection at the category level
# Different categories can have different PII detection settings and thresholds based on their sensitivity

# Global PII detection configuration (can be overridden per category)
classifier:
pii_model:
model_id: "models/pii_classifier_modernbert-base_model"
threshold: 0.7 # Global default threshold - can be overridden per category
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_model/pii_type_mapping.json"

category_model:
model_id: "models/category_classifier_modernbert-base_model"
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"

# Categories with different PII detection settings
categories:
# High-security category: Strict PII detection with high threshold
- name: healthcare
description: "Healthcare and medical queries"
pii_enabled: true # Explicitly enable (inherits from global by default)
pii_threshold: 0.9 # Higher threshold for stricter detection (fewer false positives)
system_prompt: "You are a healthcare assistant. Handle all personal information with utmost care."
model_scores:
- model: secure-llm
score: 0.9
use_reasoning: false

# Financial category: Very strict PII detection
- name: finance
description: "Financial and banking queries"
pii_enabled: true
pii_threshold: 0.95 # Very high threshold for critical PII like SSN, credit cards
system_prompt: "You are a financial advisor. Never store or log any PII information."
model_scores:
- model: secure-llm
score: 0.9
use_reasoning: false

# Customer support: Balanced threshold
- name: customer_support
description: "Customer support and general inquiries"
pii_enabled: true
pii_threshold: 0.8 # Slightly higher than global for customer-facing content
system_prompt: "You are a friendly customer support agent. Be cautious with customer information."
model_scores:
- model: general-llm
score: 0.8
use_reasoning: false

# Internal tools: Relaxed threshold (trusted environment)
- name: code_generation
description: "Internal code generation and development tools"
pii_enabled: true
pii_threshold: 0.5 # Lower threshold to reduce false positives for code/technical content
system_prompt: "You are a code generation assistant for internal developers."
model_scores:
- model: general-llm
score: 0.9
use_reasoning: true

# Public documentation: Lower threshold for broader detection
- name: documentation
description: "Public documentation and help articles"
pii_enabled: true
pii_threshold: 0.6 # Lower threshold to catch more potential PII in public content
system_prompt: "You are a documentation assistant. Help create clear public documentation."
model_scores:
- model: general-llm
score: 0.7
use_reasoning: false

# Testing category: Disable PII detection
- name: testing
description: "Testing and quality assurance queries"
pii_enabled: false # Disable PII detection for testing purposes
system_prompt: "You are a QA assistant helping with test scenarios."
model_scores:
- model: general-llm
score: 0.6
use_reasoning: false

# Default category: Uses global setting
- name: general
description: "General queries that don't fit into specific categories"
# pii_enabled not specified - will inherit from global (enabled if pii_model is configured)
# pii_threshold not specified - will inherit from global threshold (0.7)
system_prompt: "You are a helpful assistant."
model_scores:
- model: general-llm
score: 0.5
use_reasoning: false

# Model configuration
model_config:
"secure-llm":
preferred_endpoints: ["secure-endpoint"]
pii_policy:
allow_by_default: false # Deny all PII by default for secure model
pii_types_allowed:
- "GPE" # Geopolitical entities (cities, countries) are OK
- "ORGANIZATION" # Organization names are OK

"general-llm":
preferred_endpoints: ["general-endpoint"]
pii_policy:
allow_by_default: true # Allow all PII for general model

# Default model for fallback
default_model: general-llm

# vLLM endpoints configuration
vllm_endpoints:
- name: "secure-endpoint"
address: "127.0.0.1"
port: 8000
weight: 1

- name: "general-endpoint"
address: "127.0.0.1"
port: 8001
weight: 1

# Usage Notes:
# =============
# 1. Global Settings:
# - classifier.pii_model: Configures the PII detection model and default threshold
# - threshold: Sets the default detection threshold (0.0-1.0) for all categories
# 2. Category Overrides:
# - pii_enabled: Override global enabled/disabled setting per category
# - pii_threshold: Override global threshold per category
# 3. Inheritance:
# - If pii_enabled is not specified, inherits from global (enabled if pii_model is configured)
# - If pii_threshold is not specified, inherits from global classifier.pii_model.threshold
# 4. Threshold Tuning Guidelines:
# - Higher threshold (0.85-0.95): Stricter detection, fewer false positives, may miss subtle PII
# * Use for: Healthcare, Finance, Legal categories where precision is critical
# * Risk: May miss some PII entities with lower confidence
# - Medium threshold (0.65-0.85): Balanced detection, good for most use cases
# * Use for: Customer support, HR, General business queries
# * Risk: Moderate false positive/negative rate
# - Lower threshold (0.4-0.65): More sensitive detection, catches more PII, higher false positive rate
# * Use for: Public content, Documentation, Code generation (to avoid false positives)
# * Risk: Higher false positive rate, especially with technical content
# 5. PII Type Considerations:
# - Different PII types have different consequences:
# * Critical (SSN, Credit Card, Passwords): Use threshold 0.9+
# * Sensitive (Email, Phone, Address): Use threshold 0.75-0.9
# * General (Names, Organizations, Dates): Use threshold 0.6-0.75
# - Consider using different thresholds per category based on expected PII types
# 6. Use Cases by Category:
# - Healthcare: High threshold (0.9+) to avoid false positives on medical terms
# - Finance: Very high threshold (0.95+) for critical financial PII
# - Customer Support: Medium-high threshold (0.8) for balanced protection
# - Code/Technical: Lower threshold (0.5-0.6) to reduce false positives on code artifacts
# - Public Content: Lower threshold (0.6) to catch more potential PII before publication
# - Testing: Disabled to avoid interference with test data
# 7. Security Best Practices:
# - Enable PII detection by default (configure classifier.pii_model)
# - Only disable or use very low thresholds for specific categories where risk is managed
# - Consider the consequences of PII exposure on a per-category basis
# - Monitor false positive and false negative rates to tune thresholds appropriately
# - Combine with model-level PII policies (pii_policy) for defense in depth
# - Use different thresholds for different sensitivity levels:
# * Public-facing categories: Higher thresholds to reduce false positives
# * Internal categories: Lower thresholds for broader detection
# * Critical categories: Highest thresholds for precision
# 8. Testing and Tuning:
# - Start with conservative (higher) thresholds and adjust based on false positive rate
# - Monitor PII detection metrics to understand category-specific patterns
# - Test with representative data for each category to validate threshold settings
# - Consider A/B testing different thresholds to find optimal values
28 changes: 28 additions & 0 deletions src/semantic-router/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,12 @@ type Category struct {
// JailbreakThreshold defines the confidence threshold for jailbreak detection (0.0-1.0)
// If nil, uses the global threshold from PromptGuard.Threshold
JailbreakThreshold *float32 `yaml:"jailbreak_threshold,omitempty"`
// PIIEnabled controls whether PII detection is enabled for this category
// If nil, inherits from global PII detection enabled setting (based on classifier.pii_model configuration)
PIIEnabled *bool `yaml:"pii_enabled,omitempty"`
// PIIThreshold defines the confidence threshold for PII detection (0.0-1.0)
// If nil, uses the global threshold from Classifier.PIIModel.Threshold
PIIThreshold *float32 `yaml:"pii_threshold,omitempty"`
}

// GetModelReasoningFamily returns the reasoning family configuration for a given model name
Expand Down Expand Up @@ -843,3 +849,25 @@ func (c *RouterConfig) GetJailbreakThresholdForCategory(categoryName string) flo
// Fall back to global threshold
return c.PromptGuard.Threshold
}

// IsPIIEnabledForCategory returns whether PII detection is enabled for a specific category
// If the category has an explicit setting, it takes precedence; otherwise, uses global setting
func (c *RouterConfig) IsPIIEnabledForCategory(categoryName string) bool {
category := c.GetCategoryByName(categoryName)
if category != nil && category.PIIEnabled != nil {
return *category.PIIEnabled
}
// Fall back to global setting
return c.IsPIIClassifierEnabled()
}

// GetPIIThresholdForCategory returns the effective PII detection threshold for a category
// Priority: category-specific > global classifier.pii_model threshold
func (c *RouterConfig) GetPIIThresholdForCategory(categoryName string) float32 {
category := c.GetCategoryByName(categoryName)
if category != nil && category.PIIThreshold != nil {
return *category.PIIThreshold
}
// Fall back to global threshold
return c.Classifier.PIIModel.Threshold
}
146 changes: 146 additions & 0 deletions src/semantic-router/pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2113,4 +2113,150 @@ categories:
})
})
})

Describe("GetPIIThresholdForCategory", func() {
Context("when global threshold is set", func() {
It("should return global threshold for category without explicit setting", func() {
category := config.Category{
Name: "test",
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
}

cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{category},
}
cfg.Classifier.PIIModel.Threshold = 0.7

Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.7)))
})

It("should return category-specific threshold when set", func() {
category := config.Category{
Name: "test",
PIIThreshold: config.Float32Ptr(0.9),
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
}

cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{category},
}
cfg.Classifier.PIIModel.Threshold = 0.7

Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.9)))
})

It("should allow lower threshold override", func() {
category := config.Category{
Name: "test",
PIIThreshold: config.Float32Ptr(0.5),
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
}

cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{category},
}
cfg.Classifier.PIIModel.Threshold = 0.7

Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.5)))
})

It("should allow higher threshold override", func() {
category := config.Category{
Name: "test",
PIIThreshold: config.Float32Ptr(0.95),
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
}

cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{category},
}
cfg.Classifier.PIIModel.Threshold = 0.7

Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.95)))
})
})

Context("when category does not exist", func() {
It("should fall back to global threshold", func() {
cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{},
}
cfg.Classifier.PIIModel.Threshold = 0.8

Expect(cfg.GetPIIThresholdForCategory("nonexistent")).To(Equal(float32(0.8)))
})
})
})

Describe("IsPIIEnabledForCategory", func() {
Context("when global PII is enabled", func() {
It("should return true for category without explicit setting", func() {
category := config.Category{
Name: "test",
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
}

cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{category},
}
cfg.Classifier.PIIModel.ModelID = "test-model"
cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"

Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue())
})

It("should return category-specific setting when set to false", func() {
category := config.Category{
Name: "test",
PIIEnabled: config.BoolPtr(false),
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
}

cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{category},
}
cfg.Classifier.PIIModel.ModelID = "test-model"
cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"

Expect(cfg.IsPIIEnabledForCategory("test")).To(BeFalse())
})

It("should return category-specific setting when set to true", func() {
category := config.Category{
Name: "test",
PIIEnabled: config.BoolPtr(true),
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
}

cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{category},
}
// Global is disabled (no model ID)
cfg.Classifier.PIIModel.ModelID = ""

Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue())
})
})

Context("when category does not exist", func() {
It("should fall back to global setting", func() {
cfg := &config.RouterConfig{
Classifier: config.RouterConfig{}.Classifier,
Categories: []config.Category{},
}
cfg.Classifier.PIIModel.ModelID = "test-model"
cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"

Expect(cfg.IsPIIEnabledForCategory("nonexistent")).To(BeTrue())
})
})
})
})
Loading
Loading