Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions deploy/helm/semantic-router/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Declare variables to be passed into your templates.

# Global settings
global:

Check warning on line 6 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

6:1 [document-start] missing document start "---"
# -- Namespace for all resources (if not specified, uses Release.Namespace)
namespace: ""

Expand Down Expand Up @@ -47,7 +47,7 @@

# Pod security context
podSecurityContext: {}
# fsGroup: 2000

Check warning on line 50 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

50:3 [comments-indentation] comment not indented like content

# Container security context
securityContext:
Expand Down Expand Up @@ -100,7 +100,7 @@
className: ""
# -- Ingress annotations
annotations: {}
# kubernetes.io/ingress.class: nginx

Check warning on line 103 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

103:5 [comments-indentation] comment not indented like content
# kubernetes.io/tls-acme: "true"
# -- Ingress hosts configuration
hosts:
Expand Down Expand Up @@ -166,7 +166,10 @@
- name: jailbreak_classifier_modernbert-base_model
repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
- name: pii_classifier_modernbert-base_presidio_token_model
repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model

Check failure on line 169 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

169:81 [line-length] line too long (83 > 80 characters)
# LoRA PII detector (for auto-detection feature)
- name: lora_pii_detector_bert-base-uncased_model
repo: LLM-Semantic-Router/lora_pii_detector_bert-base-uncased_model


# Autoscaling configuration
Expand Down Expand Up @@ -229,7 +232,7 @@
size: 10Gi
# -- Annotations for PVC
annotations: {}
# -- Existing claim name (if provided, will use existing PVC instead of creating new one)

Check failure on line 235 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

235:81 [line-length] line too long (91 > 80 characters)
existingClaim: ""

# Application configuration
Expand Down Expand Up @@ -264,7 +267,7 @@
model_id: "models/jailbreak_classifier_modernbert-base_model"
threshold: 0.7
use_cpu: true
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

Check failure on line 270 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

270:81 [line-length] line too long (107 > 80 characters)

# Classifier configuration
classifier:
Expand All @@ -273,13 +276,13 @@
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"

Check failure on line 279 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

279:81 [line-length] line too long (101 > 80 characters)
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

Check failure on line 285 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

285:81 [line-length] line too long (106 > 80 characters)

# Reasoning families
reasoning_families:
Expand Down Expand Up @@ -310,7 +313,7 @@
detailed_goroutine_tracking: true
high_resolution_timing: false
sample_rate: 1.0
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]

Check failure on line 316 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

316:81 [line-length] line too long (94 > 80 characters)
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

# Observability configuration
Expand Down Expand Up @@ -348,7 +351,7 @@
enum: ["celsius", "fahrenheit"]
description: "Temperature unit"
required: ["location"]
description: "Get current weather information, temperature, conditions, forecast for any location, city, or place. Check weather today, now, current conditions, temperature, rain, sun, cloudy, hot, cold, storm, snow"

Check failure on line 354 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

354:81 [line-length] line too long (220 > 80 characters)
category: "weather"
tags: ["weather", "temperature", "forecast", "climate"]
- tool:
Expand All @@ -367,7 +370,7 @@
description: "Number of results to return"
default: 5
required: ["query"]
description: "Search the internet, web search, find information online, browse web content, lookup, research, google, find answers, discover, investigate"

Check failure on line 373 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

373:81 [line-length] line too long (158 > 80 characters)
category: "search"
tags: ["search", "web", "internet", "information", "browse"]
- tool:
Expand All @@ -382,7 +385,7 @@
type: "string"
description: "Mathematical expression to evaluate"
required: ["expression"]
description: "Calculate mathematical expressions, solve math problems, arithmetic operations, compute numbers, addition, subtraction, multiplication, division, equations, formula"

Check failure on line 388 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

388:81 [line-length] line too long (183 > 80 characters)
category: "math"
tags: ["math", "calculation", "arithmetic", "compute", "numbers"]
- tool:
Expand All @@ -403,7 +406,7 @@
type: "string"
description: "Email body content"
required: ["to", "subject", "body"]
description: "Send email messages, email communication, contact people via email, mail, message, correspondence, notify, inform"

Check failure on line 409 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

409:81 [line-length] line too long (132 > 80 characters)
category: "communication"
tags: ["email", "send", "communication", "message", "contact"]
- tool:
Expand Down
6 changes: 4 additions & 2 deletions deploy/kubernetes/aibrix/semantic-router-values/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -437,8 +437,10 @@ config:
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
# Support both traditional (modernbert) and LoRA-based PII detection
# When model_type is "auto", the system will auto-detect LoRA configuration
model_id: "models/lora_pii_detector_bert-base-uncased_model"
use_modernbert: false # Use LoRA PII model with auto-detection
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
Expand Down
6 changes: 4 additions & 2 deletions e2e/profiles/ai-gateway/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,10 @@ config:
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
# Support both traditional (modernbert) and LoRA-based PII detection
# When model_type is "auto", the system will auto-detect LoRA configuration
model_id: "models/lora_pii_detector_bert-base-uncased_model"
use_modernbert: false # Use LoRA PII model with auto-detection
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
Expand Down
4 changes: 2 additions & 2 deletions e2e/profiles/dynamic-config/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ config:
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
model_id: "models/lora_pii_detector_bert-base-uncased_model"
use_modernbert: false # Use LoRA PII model with auto-detection
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
Expand Down
49 changes: 35 additions & 14 deletions src/semantic-router/pkg/classification/classifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,35 +140,55 @@ func createJailbreakInference(useModernBERT bool) JailbreakInference {
}

type PIIInitializer interface {
Init(modelID string, useCPU bool) error
Init(modelID string, useCPU bool, numClasses int) error
}

type ModernBertPIIInitializer struct{}
type PIIInitializerImpl struct {
usedModernBERT bool // Track which init path succeeded for inference routing
}

func (c *PIIInitializerImpl) Init(modelID string, useCPU bool, numClasses int) error {
// Try auto-detecting Candle BERT init first - checks for lora_config.json
// This enables LoRA PII models when available
success := candle_binding.InitCandleBertTokenClassifier(modelID, numClasses, useCPU)
if success {
c.usedModernBERT = false
logging.Infof("Initialized PII token classifier with auto-detection (LoRA or Traditional BERT)")
return nil
}

func (c *ModernBertPIIInitializer) Init(modelID string, useCPU bool) error {
// Fallback to ModernBERT-specific init for backward compatibility
// This handles models with incomplete configs (missing hidden_act, etc.)
logging.Infof("Auto-detection failed, falling back to ModernBERT PII initializer")
err := candle_binding.InitModernBertPIITokenClassifier(modelID, useCPU)
if err != nil {
return err
return fmt.Errorf("failed to initialize PII token classifier (both auto-detect and ModernBERT): %w", err)
}
logging.Infof("Initialized ModernBERT PII token classifier for entity detection")
c.usedModernBERT = true
logging.Infof("Initialized ModernBERT PII token classifier (fallback mode)")
return nil
}

// createPIIInitializer creates the appropriate PII initializer (currently only ModernBERT)
func createPIIInitializer() PIIInitializer { return &ModernBertPIIInitializer{} }
// createPIIInitializer creates the PII initializer (auto-detecting)
func createPIIInitializer() PIIInitializer {
return &PIIInitializerImpl{}
}

type PIIInference interface {
ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error)
}

type ModernBertPIIInference struct{}
type PIIInferenceImpl struct{}

func (c *ModernBertPIIInference) ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error) {
return candle_binding.ClassifyModernBertPIITokens(text, configPath)
func (c *PIIInferenceImpl) ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error) {
// Auto-detecting inference - uses whichever classifier was initialized (LoRA or Traditional)
return candle_binding.ClassifyCandleBertTokens(text)
}

// createPIIInference creates the appropriate PII inference (currently only ModernBERT)
func createPIIInference() PIIInference { return &ModernBertPIIInference{} }
// createPIIInference creates the PII inference (auto-detecting)
func createPIIInference() PIIInference {
return &PIIInferenceImpl{}
}

// JailbreakDetection represents the result of jailbreak analysis for a piece of content
type JailbreakDetection struct {
Expand Down Expand Up @@ -348,7 +368,7 @@ func NewClassifier(cfg *config.RouterConfig, categoryMapping *CategoryMapping, p

// Add in-tree classifier if configured
if cfg.CategoryModel.ModelID != "" {
options = append(options, withCategory(categoryMapping, createCategoryInitializer(cfg.UseModernBERT), createCategoryInference(cfg.UseModernBERT)))
options = append(options, withCategory(categoryMapping, createCategoryInitializer(cfg.CategoryModel.UseModernBERT), createCategoryInference(cfg.CategoryModel.UseModernBERT)))
}

// Add MCP classifier if configured
Expand Down Expand Up @@ -509,7 +529,8 @@ func (c *Classifier) initializePIIClassifier() error {
return fmt.Errorf("not enough PII types for classification, need at least 2, got %d", numPIIClasses)
}

return c.piiInitializer.Init(c.Config.PIIModel.ModelID, c.Config.PIIModel.UseCPU)
// Pass numClasses to support auto-detection
return c.piiInitializer.Init(c.Config.PIIModel.ModelID, c.Config.PIIModel.UseCPU, numPIIClasses)
}

// EvaluateAllRules evaluates all rule types and returns matched rule names
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ var _ = Describe("jailbreak detection", func() {

type MockPIIInitializer struct{ InitError error }

func (m *MockPIIInitializer) Init(_ string, useCPU bool) error { return m.InitError }
func (m *MockPIIInitializer) Init(_ string, useCPU bool, numClasses int) error { return m.InitError }

type MockPIIInferenceResponse struct {
classifyTokensResult candle_binding.TokenClassificationResult
Expand Down
2 changes: 2 additions & 0 deletions src/semantic-router/pkg/extproc/extproc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2030,6 +2030,8 @@ var _ = Describe("Caching Functionality", func() {
BeforeEach(func() {
cfg = CreateTestConfig()
cfg.Enabled = true
// Disable PII detection for caching tests (not needed and avoids model loading issues)
cfg.InlineModels.Classifier.PIIModel.ModelID = ""

var err error
router, err = CreateTestRouter(cfg)
Expand Down
Loading