vllm-project
diff --git a/‎candle-binding/src/classifiers/lora/intent_lora.rs‎
Lines changed: 31 additions & 8 deletions b/‎candle-binding/src/classifiers/lora/intent_lora.rs‎
Lines changed: 31 additions & 8 deletions
diff --git a/‎candle-binding/src/classifiers/lora/pii_lora.rs‎
Lines changed: 36 additions & 13 deletions b/‎candle-binding/src/classifiers/lora/pii_lora.rs‎
Lines changed: 36 additions & 13 deletions
diff --git a/‎candle-binding/src/classifiers/lora/security_lora.rs‎
Lines changed: 61 additions & 22 deletions b/‎candle-binding/src/classifiers/lora/security_lora.rs‎
Lines changed: 61 additions & 22 deletions
@@ -47,9 +47,15 @@ impl IntentLoRAClassifier {
                 candle_core::Error::from(unified_err)
             })?;
 
+        // Load threshold from global config instead of hardcoding
+        let confidence_threshold = {
+            use crate::core::config_loader::GlobalConfigLoader;
+            GlobalConfigLoader::load_intent_threshold().unwrap_or(0.6) // Default from config.yaml classifier.category_model.threshold
+        };
+
         Ok(Self {
             bert_classifier: classifier,
-            confidence_threshold: 0.7,
+            confidence_threshold,
             intent_labels,
             model_path: model_path.to_string(),
         })
@@ -81,11 +87,21 @@ impl IntentLoRAClassifier {
                 candle_core::Error::from(unified_err)
             })?;
 
-        // Map class index to intent label
+        // Map class index to intent label - fail if class not found
         let intent = if predicted_class < self.intent_labels.len() {
             self.intent_labels[predicted_class].clone()
         } else {
-            format!("UNKNOWN_{}", predicted_class)
+            let unified_err = model_error!(
+                ModelErrorType::LoRA,
+                "intent classification",
+                format!(
+                    "Invalid class index {} not found in labels (max: {})",
+                    predicted_class,
+                    self.intent_labels.len()
+                ),
+                text
+            );
+            return Err(candle_core::Error::from(unified_err));
         };
 
         let processing_time = start_time.elapsed().as_millis() as u64;
@@ -119,16 +135,23 @@ impl IntentLoRAClassifier {
         let processing_time = start_time.elapsed().as_millis() as u64;
 
         let mut results = Vec::new();
-        for (predicted_class, confidence) in batch_results {
-            let intent = if predicted_class < self.intent_labels.len() {
-                self.intent_labels[predicted_class].clone()
+        for (i, (predicted_class, confidence)) in batch_results.iter().enumerate() {
+            let intent = if *predicted_class < self.intent_labels.len() {
+                self.intent_labels[*predicted_class].clone()
             } else {
-                format!("UNKNOWN_{}", predicted_class)
+                let unified_err = model_error!(
+                    ModelErrorType::LoRA,
+                    "batch intent classification",
+                    format!("Invalid class index {} not found in labels (max: {}) for text at position {}",
+                           predicted_class, self.intent_labels.len(), i),
+                    &format!("batch[{}]", i)
+                );
+                return Err(candle_core::Error::from(unified_err));
             };
 
             results.push(IntentResult {
                 intent,
-                confidence,
+                confidence: *confidence,
                 processing_time_ms: processing_time,
             });
         }
 
@@ -20,12 +20,23 @@ pub struct PIILoRAClassifier {
     model_path: String,
 }
 
-/// PII detection result
+/// Individual PII occurrence with its own confidence
+#[derive(Debug, Clone)]
+pub struct PIIOccurrence {
+    pub pii_type: String,
+    pub confidence: f32,
+    pub token: String,
+    pub start_pos: usize,
+    pub end_pos: usize,
+}
+
+/// PII detection result with individual occurrence confidences
 #[derive(Debug, Clone)]
 pub struct PIIResult {
     pub has_pii: bool,
-    pub pii_types: Vec<String>,
-    pub confidence: f32,
+    pub pii_types: Vec<String>,          // Keep for backward compatibility
+    pub confidence: f32,                 // Overall confidence (average or max)
+    pub occurrences: Vec<PIIOccurrence>, // Individual occurrences with their own confidence
     pub processing_time_ms: u64,
 }
 
@@ -86,12 +97,13 @@ impl PIILoRAClassifier {
                 candle_core::Error::from(unified_err)
             })?;
 
-        // Analyze token results to determine PII presence
+        // Create individual occurrences with their own confidence scores
+        let mut occurrences = Vec::new();
         let mut detected_types = Vec::new();
-        let mut max_confidence = 0.0f32;
+        let mut confidence_scores = Vec::new();
         let mut has_pii = false;
 
-        // Calculate confidence for "O" class before processing
+        // Calculate confidence for "O" class for non-PII tokens
         let o_confidences: Vec<f32> = token_results
             .iter()
             .filter(|(_, class_idx, _)| *class_idx == 0) // "O" class
@@ -103,25 +115,35 @@ impl PIILoRAClassifier {
             o_confidences.iter().sum::<f32>() / o_confidences.len() as f32
         };
 
-        for (_token, class_idx, confidence) in token_results {
+        // Process each token with its individual confidence
+        for (i, (token, class_idx, confidence)) in token_results.iter().enumerate() {
             // Skip "O" (Outside) labels - class 0 typically means no PII
-            if class_idx > 0 && class_idx < self.pii_types.len() {
+            if *class_idx > 0 && *class_idx < self.pii_types.len() {
                 has_pii = true;
-                max_confidence = max_confidence.max(confidence);
+                confidence_scores.push(*confidence);
 
-                let pii_type = &self.pii_types[class_idx];
+                let pii_type = &self.pii_types[*class_idx];
                 if !detected_types.contains(pii_type) {
                     detected_types.push(pii_type.clone());
                 }
+
+                // Create individual occurrence with its own confidence
+                occurrences.push(PIIOccurrence {
+                    pii_type: pii_type.clone(),
+                    confidence: *confidence, // Each occurrence keeps its individual confidence
+                    token: token.clone(),
+                    start_pos: i, // Token position in sequence
+                    end_pos: i + 1,
+                });
             }
         }
 
-        // Use real confidence from model inference - no hardcoded values
+        // Calculate overall confidence without inflating individual confidences
         let final_confidence = if has_pii {
-            max_confidence
+            // Use average confidence instead of max to avoid inflating significance
+            confidence_scores.iter().sum::<f32>() / confidence_scores.len() as f32
         } else {
             // For no PII detected, use the confidence of the "O" (Outside) class
-            // This comes from the actual model's softmax output for class 0
             avg_o_confidence
         };
 
@@ -131,6 +153,7 @@ impl PIILoRAClassifier {
             has_pii,
             pii_types: detected_types,
             confidence: final_confidence,
+            occurrences, // Include individual occurrences with their own confidences
             processing_time_ms: processing_time,
         })
     }
 
@@ -49,9 +49,15 @@ impl SecurityLoRAClassifier {
                 candle_core::Error::from(unified_err)
             })?;
 
+        // Load threshold from global config instead of hardcoding
+        let confidence_threshold = {
+            use crate::core::config_loader::GlobalConfigLoader;
+            GlobalConfigLoader::load_security_threshold().unwrap_or(0.7) // Default from config.yaml prompt_guard.threshold
+        };
+
         Ok(Self {
             bert_classifier,
-            confidence_threshold: 0.5,
+            confidence_threshold,
             threat_types,
             model_path: model_path.to_string(),
         })
@@ -83,22 +89,38 @@ impl SecurityLoRAClassifier {
                 candle_core::Error::from(unified_err)
             })?;
 
-        // Determine if threat is detected based on predicted class
-        let is_threat = predicted_class > 0; // Assuming class 0 is "benign" or "safe"
+        // Map class index to threat type label - fail if class not found
+        let threat_type = if predicted_class < self.threat_types.len() {
+            self.threat_types[predicted_class].clone()
+        } else {
+            let unified_err = model_error!(
+                ModelErrorType::LoRA,
+                "security classification",
+                format!(
+                    "Invalid class index {} not found in labels (max: {})",
+                    predicted_class,
+                    self.threat_types.len()
+                ),
+                text
+            );
+            return Err(candle_core::Error::from(unified_err));
+        };
 
-        // Get detected threat types
-        let mut detected_threats = Vec::new();
-        if is_threat && predicted_class < self.threat_types.len() {
-            detected_threats.push(self.threat_types[predicted_class].clone());
-        }
+        // Determine if threat is detected based on class label (instead of hardcoded index)
+        let is_threat = !threat_type.to_lowercase().contains("safe")
+            && !threat_type.to_lowercase().contains("benign")
+            && !threat_type.to_lowercase().contains("no_threat");
 
-        // Calculate severity score based on confidence and threat type
-        let severity_score = if is_threat {
-            confidence * 0.9 // High severity for detected threats
+        // Get detected threat types
+        let detected_threats = if is_threat {
+            vec![threat_type]
         } else {
-            0.0 // No severity for safe content
+            Vec::new()
         };
 
+        // Use confidence as severity score (no artificial scaling)
+        let severity_score = if is_threat { confidence } else { 0.0 };
+
         let processing_time = start_time.elapsed().as_millis() as u64;
 
         Ok(SecurityResult {
@@ -129,24 +151,41 @@ impl SecurityLoRAClassifier {
         let processing_time = start_time.elapsed().as_millis() as u64;
 
         let mut results = Vec::new();
-        for (predicted_class, confidence) in batch_results {
-            // Determine if threat is detected
-            let is_threat = predicted_class > 0; // Assuming class 0 is "benign"
+        for (i, (predicted_class, confidence)) in batch_results.iter().enumerate() {
+            // Map class index to threat type label - fail if class not found
+            let threat_type = if *predicted_class < self.threat_types.len() {
+                self.threat_types[*predicted_class].clone()
+            } else {
+                let unified_err = model_error!(
+                    ModelErrorType::LoRA,
+                    "batch security classification",
+                    format!("Invalid class index {} not found in labels (max: {}) for text at position {}",
+                           predicted_class, self.threat_types.len(), i),
+                    &format!("batch[{}]", i)
+                );
+                return Err(candle_core::Error::from(unified_err));
+            };
+
+            // Determine if threat is detected based on class label
+            let is_threat = !threat_type.to_lowercase().contains("safe")
+                && !threat_type.to_lowercase().contains("benign")
+                && !threat_type.to_lowercase().contains("no_threat");
 
             // Get detected threat types
-            let mut detected_threats = Vec::new();
-            if is_threat && predicted_class < self.threat_types.len() {
-                detected_threats.push(self.threat_types[predicted_class].clone());
-            }
+            let detected_threats = if is_threat {
+                vec![threat_type]
+            } else {
+                Vec::new()
+            };
 
-            // Calculate severity score
-            let severity_score = if is_threat { confidence * 0.9 } else { 0.0 };
+            // Use confidence as severity score (no artificial scaling)
+            let severity_score = if is_threat { *confidence } else { 0.0 };
 
             results.push(SecurityResult {
                 is_threat,
                 threat_types: detected_threats,
                 severity_score,
-                confidence,
+                confidence: *confidence,
                 processing_time_ms: processing_time,
             });
         }