vllm-project · yossiovadia · Nov 20, 2025
@@ -3,7 +3,7 @@
 # Declare variables to be passed into your templates.

 # Global settings
 global:
  # -- Namespace for all resources (if not specified, uses Release.Namespace)
  namespace: ""

@@ -47,7 +47,7 @@

 # Pod security context
 podSecurityContext: {}
  # fsGroup: 2000

 # Container security context
 securityContext:
@@ -100,7 +100,7 @@
  className: ""
  # -- Ingress annotations
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  # -- Ingress hosts configuration
  hosts:
@@ -166,7 +166,10 @@
    - name: jailbreak_classifier_modernbert-base_model
       repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
     - name: pii_classifier_modernbert-base_presidio_token_model
       repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model
+    # LoRA PII detector (for auto-detection feature)
+    - name: lora_pii_detector_bert-base-uncased_model
+      repo: LLM-Semantic-Router/lora_pii_detector_bert-base-uncased_model
 
 
 # Autoscaling configuration
@@ -229,7 +232,7 @@
  size: 10Gi
  # -- Annotations for PVC
  annotations: {}
  # -- Existing claim name (if provided, will use existing PVC instead of creating new one)
  existingClaim: ""

 # Application configuration
@@ -264,7 +267,7 @@
    model_id: "models/jailbreak_classifier_modernbert-base_model"
    threshold: 0.7
    use_cpu: true
    jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

  # Classifier configuration
  classifier:
@@ -273,13 +276,13 @@
      use_modernbert: true
      threshold: 0.6
      use_cpu: true
      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
    pii_model:
      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
      use_modernbert: true
      threshold: 0.7
      use_cpu: true
      pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

  # Reasoning families
  reasoning_families:
@@ -310,7 +313,7 @@
        detailed_goroutine_tracking: true
        high_resolution_timing: false
        sample_rate: 1.0
        duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
        size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

  # Observability configuration
@@ -348,7 +351,7 @@
              enum: ["celsius", "fahrenheit"]
              description: "Temperature unit"
          required: ["location"]
    description: "Get current weather information, temperature, conditions, forecast for any location, city, or place. Check weather today, now, current conditions, temperature, rain, sun, cloudy, hot, cold, storm, snow"
    category: "weather"
    tags: ["weather", "temperature", "forecast", "climate"]
  - tool:
@@ -367,7 +370,7 @@
              description: "Number of results to return"
              default: 5
          required: ["query"]
    description: "Search the internet, web search, find information online, browse web content, lookup, research, google, find answers, discover, investigate"
    category: "search"
    tags: ["search", "web", "internet", "information", "browse"]
  - tool:
@@ -382,7 +385,7 @@
              type: "string"
              description: "Mathematical expression to evaluate"
          required: ["expression"]
    description: "Calculate mathematical expressions, solve math problems, arithmetic operations, compute numbers, addition, subtraction, multiplication, division, equations, formula"
    category: "math"
    tags: ["math", "calculation", "arithmetic", "compute", "numbers"]
  - tool:
@@ -403,7 +406,7 @@
              type: "string"
              description: "Email body content"
          required: ["to", "subject", "body"]
    description: "Send email messages, email communication, contact people via email, mail, message, correspondence, notify, inform"
    category: "communication"
    tags: ["email", "send", "communication", "message", "contact"]
  - tool:

@@ -437,8 +437,10 @@ config:
       use_cpu: true
       category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
-      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
-      use_modernbert: true
+      # Support both traditional (modernbert) and LoRA-based PII detection
+      # When model_type is "auto", the system will auto-detect LoRA configuration
+      model_id: "models/lora_pii_detector_bert-base-uncased_model"
+      use_modernbert: false  # Use LoRA PII model with auto-detection
       threshold: 0.7
       use_cpu: true
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

@@ -467,8 +467,10 @@ config:
       use_cpu: true
       category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
-      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
-      use_modernbert: true
+      # Support both traditional (modernbert) and LoRA-based PII detection
+      # When model_type is "auto", the system will auto-detect LoRA configuration
+      model_id: "models/lora_pii_detector_bert-base-uncased_model"
+      use_modernbert: false  # Use LoRA PII model with auto-detection
       threshold: 0.7
       use_cpu: true
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

@@ -48,8 +48,8 @@ config:
       use_cpu: true
       category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
-      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
-      use_modernbert: true
+      model_id: "models/lora_pii_detector_bert-base-uncased_model"
+      use_modernbert: false  # Use LoRA PII model with auto-detection
       threshold: 0.7
       use_cpu: true
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

@@ -140,35 +140,55 @@ func createJailbreakInference(useModernBERT bool) JailbreakInference {
 }
 
 type PIIInitializer interface {
-	Init(modelID string, useCPU bool) error
+	Init(modelID string, useCPU bool, numClasses int) error
 }
 
-type ModernBertPIIInitializer struct{}
+type PIIInitializerImpl struct {
+	usedModernBERT bool // Track which init path succeeded for inference routing
+}
+
+func (c *PIIInitializerImpl) Init(modelID string, useCPU bool, numClasses int) error {
+	// Try auto-detecting Candle BERT init first - checks for lora_config.json
+	// This enables LoRA PII models when available
+	success := candle_binding.InitCandleBertTokenClassifier(modelID, numClasses, useCPU)
+	if success {
+		c.usedModernBERT = false
+		logging.Infof("Initialized PII token classifier with auto-detection (LoRA or Traditional BERT)")
+		return nil
+	}
 
-func (c *ModernBertPIIInitializer) Init(modelID string, useCPU bool) error {
+	// Fallback to ModernBERT-specific init for backward compatibility
+	// This handles models with incomplete configs (missing hidden_act, etc.)
+	logging.Infof("Auto-detection failed, falling back to ModernBERT PII initializer")
 	err := candle_binding.InitModernBertPIITokenClassifier(modelID, useCPU)
 	if err != nil {
-		return err
+		return fmt.Errorf("failed to initialize PII token classifier (both auto-detect and ModernBERT): %w", err)
 	}
-	logging.Infof("Initialized ModernBERT PII token classifier for entity detection")
+	c.usedModernBERT = true
+	logging.Infof("Initialized ModernBERT PII token classifier (fallback mode)")
 	return nil
 }
 
-// createPIIInitializer creates the appropriate PII initializer (currently only ModernBERT)
-func createPIIInitializer() PIIInitializer { return &ModernBertPIIInitializer{} }
+// createPIIInitializer creates the PII initializer (auto-detecting)
+func createPIIInitializer() PIIInitializer {
+	return &PIIInitializerImpl{}
+}
 
 type PIIInference interface {
 	ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error)
 }
 
-type ModernBertPIIInference struct{}
+type PIIInferenceImpl struct{}
 
-func (c *ModernBertPIIInference) ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error) {
-	return candle_binding.ClassifyModernBertPIITokens(text, configPath)
+func (c *PIIInferenceImpl) ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error) {
+	// Auto-detecting inference - uses whichever classifier was initialized (LoRA or Traditional)
+	return candle_binding.ClassifyCandleBertTokens(text)
 }
 
-// createPIIInference creates the appropriate PII inference (currently only ModernBERT)
-func createPIIInference() PIIInference { return &ModernBertPIIInference{} }
+// createPIIInference creates the PII inference (auto-detecting)
+func createPIIInference() PIIInference {
+	return &PIIInferenceImpl{}
+}
 
 // JailbreakDetection represents the result of jailbreak analysis for a piece of content
 type JailbreakDetection struct {
@@ -348,7 +368,7 @@ func NewClassifier(cfg *config.RouterConfig, categoryMapping *CategoryMapping, p
 
 	// Add in-tree classifier if configured
 	if cfg.CategoryModel.ModelID != "" {
-		options = append(options, withCategory(categoryMapping, createCategoryInitializer(cfg.UseModernBERT), createCategoryInference(cfg.UseModernBERT)))
+		options = append(options, withCategory(categoryMapping, createCategoryInitializer(cfg.CategoryModel.UseModernBERT), createCategoryInference(cfg.CategoryModel.UseModernBERT)))
 	}
 
 	// Add MCP classifier if configured
@@ -509,7 +529,8 @@ func (c *Classifier) initializePIIClassifier() error {
 		return fmt.Errorf("not enough PII types for classification, need at least 2, got %d", numPIIClasses)
 	}
 
-	return c.piiInitializer.Init(c.Config.PIIModel.ModelID, c.Config.PIIModel.UseCPU)
+	// Pass numClasses to support auto-detection
+	return c.piiInitializer.Init(c.Config.PIIModel.ModelID, c.Config.PIIModel.UseCPU, numPIIClasses)
 }
 
 // EvaluateAllRules evaluates all rule types and returns matched rule names

@@ -287,7 +287,7 @@ var _ = Describe("jailbreak detection", func() {
 
 type MockPIIInitializer struct{ InitError error }
 
-func (m *MockPIIInitializer) Init(_ string, useCPU bool) error { return m.InitError }
+func (m *MockPIIInitializer) Init(_ string, useCPU bool, numClasses int) error { return m.InitError }
 
 type MockPIIInferenceResponse struct {
 	classifyTokensResult candle_binding.TokenClassificationResult

@@ -2030,6 +2030,8 @@ var _ = Describe("Caching Functionality", func() {
 	BeforeEach(func() {
 		cfg = CreateTestConfig()
 		cfg.Enabled = true
+		// Disable PII detection for caching tests (not needed and avoids model loading issues)
+		cfg.InlineModels.Classifier.PIIModel.ModelID = ""
 
 		var err error
 		router, err = CreateTestRouter(cfg)