Skip to content

Commit 7957612

Browse files
authored
fix(api): expose actual PII confidence scores instead of hardcoded 0.9 (#718)
1 parent 8e8cf8d commit 7957612

File tree

3 files changed

+92
-11
lines changed

3 files changed

+92
-11
lines changed

config/testing/config.e2e.yaml

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,11 @@ classifier:
6969
use_cpu: true
7070
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
7171
pii_model:
72-
model_id: "models/pii_classifier_modernbert-base_presidio_token_model" # TODO: Use local model for now before the code can download the entire model from huggingface
73-
use_modernbert: true
72+
model_id: "models/lora_pii_detector_bert-base-uncased_model"
73+
use_modernbert: false # BERT-based LoRA model (this field is ignored - always auto-detects)
7474
threshold: 0.7
7575
use_cpu: true
76-
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
76+
pii_mapping_path: "models/lora_pii_detector_bert-base-uncased_model/pii_type_mapping.json"
7777
categories:
7878
- name: business
7979
description: "Business and management related queries"
@@ -359,6 +359,24 @@ decisions:
359359
enabled: true
360360
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
361361

362+
# Default catch-all decision for unmatched requests (E2E PII test fix)
363+
# This ensures PII detection is always enabled, even when no specific decision matches
364+
- name: "default_decision"
365+
description: "Default catch-all decision - blocks all PII for safety"
366+
priority: 1 # Lowest priority - only matches if nothing else does
367+
rules:
368+
operator: "OR"
369+
conditions:
370+
- type: "always" # Always matches as fallback
371+
modelRefs:
372+
- model: "Model-B"
373+
use_reasoning: false
374+
plugins:
375+
- type: "pii"
376+
configuration:
377+
enabled: true
378+
pii_types_allowed: [] # Block ALL PII - empty list means nothing allowed
379+
362380
default_model: "Model-A"
363381

364382
# API Configuration

src/semantic-router/pkg/classification/classifier.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,67 @@ func (c *Classifier) ClassifyPIIWithThreshold(text string, threshold float32) ([
884884
return result, nil
885885
}
886886

887+
// ClassifyPIIWithDetails performs PII token classification and returns full entity details including confidence scores
888+
func (c *Classifier) ClassifyPIIWithDetails(text string) ([]PIIDetection, error) {
889+
return c.ClassifyPIIWithDetailsAndThreshold(text, c.Config.PIIModel.Threshold)
890+
}
891+
892+
// ClassifyPIIWithDetailsAndThreshold performs PII token classification with a custom threshold and returns full entity details
893+
func (c *Classifier) ClassifyPIIWithDetailsAndThreshold(text string, threshold float32) ([]PIIDetection, error) {
894+
if !c.IsPIIEnabled() {
895+
return []PIIDetection{}, fmt.Errorf("PII detection is not properly configured")
896+
}
897+
898+
if text == "" {
899+
return []PIIDetection{}, nil
900+
}
901+
902+
// Use PII token classifier for entity detection
903+
configPath := fmt.Sprintf("%s/config.json", c.Config.PIIModel.ModelID)
904+
start := time.Now()
905+
tokenResult, err := c.piiInference.ClassifyTokens(text, configPath)
906+
metrics.RecordClassifierLatency("pii", time.Since(start).Seconds())
907+
if err != nil {
908+
return nil, fmt.Errorf("PII token classification error: %w", err)
909+
}
910+
911+
if len(tokenResult.Entities) > 0 {
912+
logging.Infof("PII token classification found %d entities", len(tokenResult.Entities))
913+
}
914+
915+
// Convert token entities to PII detections, filtering by threshold
916+
var detections []PIIDetection
917+
for _, entity := range tokenResult.Entities {
918+
if entity.Confidence >= threshold {
919+
detection := PIIDetection{
920+
EntityType: entity.EntityType,
921+
Start: entity.Start,
922+
End: entity.End,
923+
Text: entity.Text,
924+
Confidence: entity.Confidence,
925+
}
926+
detections = append(detections, detection)
927+
logging.Infof("Detected PII entity: %s ('%s') at [%d-%d] with confidence %.3f",
928+
entity.EntityType, entity.Text, entity.Start, entity.End, entity.Confidence)
929+
}
930+
}
931+
932+
if len(detections) > 0 {
933+
// Log unique PII types for compatibility with existing logs
934+
uniqueTypes := make(map[string]bool)
935+
for _, d := range detections {
936+
uniqueTypes[d.EntityType] = true
937+
}
938+
types := make([]string, 0, len(uniqueTypes))
939+
for t := range uniqueTypes {
940+
types = append(types, t)
941+
}
942+
logging.Infof("Detected PII types: %v", types)
943+
}
944+
945+
return detections, nil
946+
}
947+
887948
// DetectPIIInContent performs PII classification on all provided content
888949
func (c *Classifier) DetectPIIInContent(allContent []string) []string {
889950
var detectedPII []string

src/semantic-router/pkg/services/classification.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -290,8 +290,8 @@ func (s *ClassificationService) DetectPII(req PIIRequest) (*PIIResponse, error)
290290
}, nil
291291
}
292292

293-
// Perform PII detection using the existing classifier
294-
piiTypes, err := s.classifier.ClassifyPII(req.Text)
293+
// Perform PII detection using the classifier with full details
294+
detections, err := s.classifier.ClassifyPIIWithDetails(req.Text)
295295
if err != nil {
296296
return nil, fmt.Errorf("PII detection failed: %w", err)
297297
}
@@ -300,17 +300,19 @@ func (s *ClassificationService) DetectPII(req PIIRequest) (*PIIResponse, error)
300300

301301
// Build response
302302
response := &PIIResponse{
303-
HasPII: len(piiTypes) > 0,
303+
HasPII: len(detections) > 0,
304304
Entities: []PIIEntity{},
305305
ProcessingTimeMs: processingTime,
306306
}
307307

308-
// Convert PII types to entities (simplified for now)
309-
for _, piiType := range piiTypes {
308+
// Convert PII detections to API entities with actual confidence scores
309+
for _, detection := range detections {
310310
entity := PIIEntity{
311-
Type: piiType,
312-
Value: "[DETECTED]", // Placeholder - would need actual entity extraction
313-
Confidence: 0.9, // Placeholder - would need actual confidence
311+
Type: detection.EntityType,
312+
Value: "[DETECTED]", // Redacted for security
313+
Confidence: float64(detection.Confidence), // Actual confidence from model
314+
StartPos: detection.Start,
315+
EndPos: detection.End,
314316
}
315317
response.Entities = append(response.Entities, entity)
316318
}

0 commit comments

Comments
 (0)