Skip to content

Commit 968b05f

Browse files
CopilotXunzhuo
andauthored
Allow PII detection threshold to be set at the category level (#510)
* Initial plan * Add category-level PII threshold support Co-authored-by: Xunzhuo <[email protected]> * Update documentation with API integration notes Co-authored-by: Xunzhuo <[email protected]> * Fix markdown linting issues Co-authored-by: Xunzhuo <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: Xunzhuo <[email protected]>
1 parent bc4cc98 commit 968b05f

File tree

6 files changed

+480
-3
lines changed

6 files changed

+480
-3
lines changed
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# Category-Level PII Detection Example
2+
# This example demonstrates how to configure PII detection at the category level
3+
# Different categories can have different PII detection settings and thresholds based on their sensitivity
4+
5+
# Global PII detection configuration (can be overridden per category)
6+
classifier:
7+
pii_model:
8+
model_id: "models/pii_classifier_modernbert-base_model"
9+
threshold: 0.7 # Global default threshold - can be overridden per category
10+
use_cpu: true
11+
pii_mapping_path: "models/pii_classifier_modernbert-base_model/pii_type_mapping.json"
12+
13+
category_model:
14+
model_id: "models/category_classifier_modernbert-base_model"
15+
use_modernbert: true
16+
threshold: 0.6
17+
use_cpu: true
18+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
19+
20+
# Categories with different PII detection settings
21+
categories:
22+
# High-security category: Strict PII detection with high threshold
23+
- name: healthcare
24+
description: "Healthcare and medical queries"
25+
pii_enabled: true # Explicitly enable (inherits from global by default)
26+
pii_threshold: 0.9 # Higher threshold for stricter detection (fewer false positives)
27+
system_prompt: "You are a healthcare assistant. Handle all personal information with utmost care."
28+
model_scores:
29+
- model: secure-llm
30+
score: 0.9
31+
use_reasoning: false
32+
33+
# Financial category: Very strict PII detection
34+
- name: finance
35+
description: "Financial and banking queries"
36+
pii_enabled: true
37+
pii_threshold: 0.95 # Very high threshold for critical PII like SSN, credit cards
38+
system_prompt: "You are a financial advisor. Never store or log any PII information."
39+
model_scores:
40+
- model: secure-llm
41+
score: 0.9
42+
use_reasoning: false
43+
44+
# Customer support: Balanced threshold
45+
- name: customer_support
46+
description: "Customer support and general inquiries"
47+
pii_enabled: true
48+
pii_threshold: 0.8 # Slightly higher than global for customer-facing content
49+
system_prompt: "You are a friendly customer support agent. Be cautious with customer information."
50+
model_scores:
51+
- model: general-llm
52+
score: 0.8
53+
use_reasoning: false
54+
55+
# Internal tools: Relaxed threshold (trusted environment)
56+
- name: code_generation
57+
description: "Internal code generation and development tools"
58+
pii_enabled: true
59+
pii_threshold: 0.5 # Lower threshold to reduce false positives for code/technical content
60+
system_prompt: "You are a code generation assistant for internal developers."
61+
model_scores:
62+
- model: general-llm
63+
score: 0.9
64+
use_reasoning: true
65+
66+
# Public documentation: Lower threshold for broader detection
67+
- name: documentation
68+
description: "Public documentation and help articles"
69+
pii_enabled: true
70+
pii_threshold: 0.6 # Lower threshold to catch more potential PII in public content
71+
system_prompt: "You are a documentation assistant. Help create clear public documentation."
72+
model_scores:
73+
- model: general-llm
74+
score: 0.7
75+
use_reasoning: false
76+
77+
# Testing category: Disable PII detection
78+
- name: testing
79+
description: "Testing and quality assurance queries"
80+
pii_enabled: false # Disable PII detection for testing purposes
81+
system_prompt: "You are a QA assistant helping with test scenarios."
82+
model_scores:
83+
- model: general-llm
84+
score: 0.6
85+
use_reasoning: false
86+
87+
# Default category: Uses global setting
88+
- name: general
89+
description: "General queries that don't fit into specific categories"
90+
# pii_enabled not specified - will inherit from global (enabled if pii_model is configured)
91+
# pii_threshold not specified - will inherit from global threshold (0.7)
92+
system_prompt: "You are a helpful assistant."
93+
model_scores:
94+
- model: general-llm
95+
score: 0.5
96+
use_reasoning: false
97+
98+
# Model configuration
99+
model_config:
100+
"secure-llm":
101+
preferred_endpoints: ["secure-endpoint"]
102+
pii_policy:
103+
allow_by_default: false # Deny all PII by default for secure model
104+
pii_types_allowed:
105+
- "GPE" # Geopolitical entities (cities, countries) are OK
106+
- "ORGANIZATION" # Organization names are OK
107+
108+
"general-llm":
109+
preferred_endpoints: ["general-endpoint"]
110+
pii_policy:
111+
allow_by_default: true # Allow all PII for general model
112+
113+
# Default model for fallback
114+
default_model: general-llm
115+
116+
# vLLM endpoints configuration
117+
vllm_endpoints:
118+
- name: "secure-endpoint"
119+
address: "127.0.0.1"
120+
port: 8000
121+
weight: 1
122+
123+
- name: "general-endpoint"
124+
address: "127.0.0.1"
125+
port: 8001
126+
weight: 1
127+
128+
# Usage Notes:
129+
# =============
130+
# 1. Global Settings:
131+
# - classifier.pii_model: Configures the PII detection model and default threshold
132+
# - threshold: Sets the default detection threshold (0.0-1.0) for all categories
133+
# 2. Category Overrides:
134+
# - pii_enabled: Override global enabled/disabled setting per category
135+
# - pii_threshold: Override global threshold per category
136+
# 3. Inheritance:
137+
# - If pii_enabled is not specified, inherits from global (enabled if pii_model is configured)
138+
# - If pii_threshold is not specified, inherits from global classifier.pii_model.threshold
139+
# 4. Threshold Tuning Guidelines:
140+
# - Higher threshold (0.85-0.95): Stricter detection, fewer false positives, may miss subtle PII
141+
# * Use for: Healthcare, Finance, Legal categories where precision is critical
142+
# * Risk: May miss some PII entities with lower confidence
143+
# - Medium threshold (0.65-0.85): Balanced detection, good for most use cases
144+
# * Use for: Customer support, HR, General business queries
145+
# * Risk: Moderate false positive/negative rate
146+
# - Lower threshold (0.4-0.65): More sensitive detection, catches more PII, higher false positive rate
147+
# * Use for: Public content, Documentation, Code generation (to avoid false positives)
148+
# * Risk: Higher false positive rate, especially with technical content
149+
# 5. PII Type Considerations:
150+
# - Different PII types have different consequences:
151+
# * Critical (SSN, Credit Card, Passwords): Use threshold 0.9+
152+
# * Sensitive (Email, Phone, Address): Use threshold 0.75-0.9
153+
# * General (Names, Organizations, Dates): Use threshold 0.6-0.75
154+
# - Consider using different thresholds per category based on expected PII types
155+
# 6. Use Cases by Category:
156+
# - Healthcare: High threshold (0.9+) to avoid false positives on medical terms
157+
# - Finance: Very high threshold (0.95+) for critical financial PII
158+
# - Customer Support: Medium-high threshold (0.8) for balanced protection
159+
# - Code/Technical: Lower threshold (0.5-0.6) to reduce false positives on code artifacts
160+
# - Public Content: Lower threshold (0.6) to catch more potential PII before publication
161+
# - Testing: Disabled to avoid interference with test data
162+
# 7. Security Best Practices:
163+
# - Enable PII detection by default (configure classifier.pii_model)
164+
# - Only disable or use very low thresholds for specific categories where risk is managed
165+
# - Consider the consequences of PII exposure on a per-category basis
166+
# - Monitor false positive and false negative rates to tune thresholds appropriately
167+
# - Combine with model-level PII policies (pii_policy) for defense in depth
168+
# - Use different thresholds for different sensitivity levels:
169+
# * Public-facing categories: Higher thresholds to reduce false positives
170+
# * Internal categories: Lower thresholds for broader detection
171+
# * Critical categories: Highest thresholds for precision
172+
# 8. Testing and Tuning:
173+
# - Start with conservative (higher) thresholds and adjust based on false positive rate
174+
# - Monitor PII detection metrics to understand category-specific patterns
175+
# - Test with representative data for each category to validate threshold settings
176+
# - Consider A/B testing different thresholds to find optimal values

src/semantic-router/pkg/config/config.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,12 @@ type Category struct {
376376
// JailbreakThreshold defines the confidence threshold for jailbreak detection (0.0-1.0)
377377
// If nil, uses the global threshold from PromptGuard.Threshold
378378
JailbreakThreshold *float32 `yaml:"jailbreak_threshold,omitempty"`
379+
// PIIEnabled controls whether PII detection is enabled for this category
380+
// If nil, inherits from global PII detection enabled setting (based on classifier.pii_model configuration)
381+
PIIEnabled *bool `yaml:"pii_enabled,omitempty"`
382+
// PIIThreshold defines the confidence threshold for PII detection (0.0-1.0)
383+
// If nil, uses the global threshold from Classifier.PIIModel.Threshold
384+
PIIThreshold *float32 `yaml:"pii_threshold,omitempty"`
379385
}
380386

381387
// GetModelReasoningFamily returns the reasoning family configuration for a given model name
@@ -843,3 +849,25 @@ func (c *RouterConfig) GetJailbreakThresholdForCategory(categoryName string) flo
843849
// Fall back to global threshold
844850
return c.PromptGuard.Threshold
845851
}
852+
853+
// IsPIIEnabledForCategory returns whether PII detection is enabled for a specific category
854+
// If the category has an explicit setting, it takes precedence; otherwise, uses global setting
855+
func (c *RouterConfig) IsPIIEnabledForCategory(categoryName string) bool {
856+
category := c.GetCategoryByName(categoryName)
857+
if category != nil && category.PIIEnabled != nil {
858+
return *category.PIIEnabled
859+
}
860+
// Fall back to global setting
861+
return c.IsPIIClassifierEnabled()
862+
}
863+
864+
// GetPIIThresholdForCategory returns the effective PII detection threshold for a category
865+
// Priority: category-specific > global classifier.pii_model threshold
866+
func (c *RouterConfig) GetPIIThresholdForCategory(categoryName string) float32 {
867+
category := c.GetCategoryByName(categoryName)
868+
if category != nil && category.PIIThreshold != nil {
869+
return *category.PIIThreshold
870+
}
871+
// Fall back to global threshold
872+
return c.Classifier.PIIModel.Threshold
873+
}

src/semantic-router/pkg/config/config_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,4 +2113,150 @@ categories:
21132113
})
21142114
})
21152115
})
2116+
2117+
Describe("GetPIIThresholdForCategory", func() {
2118+
Context("when global threshold is set", func() {
2119+
It("should return global threshold for category without explicit setting", func() {
2120+
category := config.Category{
2121+
Name: "test",
2122+
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
2123+
}
2124+
2125+
cfg := &config.RouterConfig{
2126+
Classifier: config.RouterConfig{}.Classifier,
2127+
Categories: []config.Category{category},
2128+
}
2129+
cfg.Classifier.PIIModel.Threshold = 0.7
2130+
2131+
Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.7)))
2132+
})
2133+
2134+
It("should return category-specific threshold when set", func() {
2135+
category := config.Category{
2136+
Name: "test",
2137+
PIIThreshold: config.Float32Ptr(0.9),
2138+
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
2139+
}
2140+
2141+
cfg := &config.RouterConfig{
2142+
Classifier: config.RouterConfig{}.Classifier,
2143+
Categories: []config.Category{category},
2144+
}
2145+
cfg.Classifier.PIIModel.Threshold = 0.7
2146+
2147+
Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.9)))
2148+
})
2149+
2150+
It("should allow lower threshold override", func() {
2151+
category := config.Category{
2152+
Name: "test",
2153+
PIIThreshold: config.Float32Ptr(0.5),
2154+
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
2155+
}
2156+
2157+
cfg := &config.RouterConfig{
2158+
Classifier: config.RouterConfig{}.Classifier,
2159+
Categories: []config.Category{category},
2160+
}
2161+
cfg.Classifier.PIIModel.Threshold = 0.7
2162+
2163+
Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.5)))
2164+
})
2165+
2166+
It("should allow higher threshold override", func() {
2167+
category := config.Category{
2168+
Name: "test",
2169+
PIIThreshold: config.Float32Ptr(0.95),
2170+
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
2171+
}
2172+
2173+
cfg := &config.RouterConfig{
2174+
Classifier: config.RouterConfig{}.Classifier,
2175+
Categories: []config.Category{category},
2176+
}
2177+
cfg.Classifier.PIIModel.Threshold = 0.7
2178+
2179+
Expect(cfg.GetPIIThresholdForCategory("test")).To(Equal(float32(0.95)))
2180+
})
2181+
})
2182+
2183+
Context("when category does not exist", func() {
2184+
It("should fall back to global threshold", func() {
2185+
cfg := &config.RouterConfig{
2186+
Classifier: config.RouterConfig{}.Classifier,
2187+
Categories: []config.Category{},
2188+
}
2189+
cfg.Classifier.PIIModel.Threshold = 0.8
2190+
2191+
Expect(cfg.GetPIIThresholdForCategory("nonexistent")).To(Equal(float32(0.8)))
2192+
})
2193+
})
2194+
})
2195+
2196+
Describe("IsPIIEnabledForCategory", func() {
2197+
Context("when global PII is enabled", func() {
2198+
It("should return true for category without explicit setting", func() {
2199+
category := config.Category{
2200+
Name: "test",
2201+
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
2202+
}
2203+
2204+
cfg := &config.RouterConfig{
2205+
Classifier: config.RouterConfig{}.Classifier,
2206+
Categories: []config.Category{category},
2207+
}
2208+
cfg.Classifier.PIIModel.ModelID = "test-model"
2209+
cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"
2210+
2211+
Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue())
2212+
})
2213+
2214+
It("should return category-specific setting when set to false", func() {
2215+
category := config.Category{
2216+
Name: "test",
2217+
PIIEnabled: config.BoolPtr(false),
2218+
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
2219+
}
2220+
2221+
cfg := &config.RouterConfig{
2222+
Classifier: config.RouterConfig{}.Classifier,
2223+
Categories: []config.Category{category},
2224+
}
2225+
cfg.Classifier.PIIModel.ModelID = "test-model"
2226+
cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"
2227+
2228+
Expect(cfg.IsPIIEnabledForCategory("test")).To(BeFalse())
2229+
})
2230+
2231+
It("should return category-specific setting when set to true", func() {
2232+
category := config.Category{
2233+
Name: "test",
2234+
PIIEnabled: config.BoolPtr(true),
2235+
ModelScores: []config.ModelScore{{Model: "test", Score: 1.0, UseReasoning: config.BoolPtr(false)}},
2236+
}
2237+
2238+
cfg := &config.RouterConfig{
2239+
Classifier: config.RouterConfig{}.Classifier,
2240+
Categories: []config.Category{category},
2241+
}
2242+
// Global is disabled (no model ID)
2243+
cfg.Classifier.PIIModel.ModelID = ""
2244+
2245+
Expect(cfg.IsPIIEnabledForCategory("test")).To(BeTrue())
2246+
})
2247+
})
2248+
2249+
Context("when category does not exist", func() {
2250+
It("should fall back to global setting", func() {
2251+
cfg := &config.RouterConfig{
2252+
Classifier: config.RouterConfig{}.Classifier,
2253+
Categories: []config.Category{},
2254+
}
2255+
cfg.Classifier.PIIModel.ModelID = "test-model"
2256+
cfg.Classifier.PIIModel.PIIMappingPath = "/path/to/mapping.json"
2257+
2258+
Expect(cfg.IsPIIEnabledForCategory("nonexistent")).To(BeTrue())
2259+
})
2260+
})
2261+
})
21162262
})

0 commit comments

Comments
 (0)