finish pii tests

aeft · aeft · commit 41213b1de332 · 2025-09-05T11:35:49.000-07:00
Signed-off-by: Alex Wang &lt;yesterda9@gmail.com&gt;
diff --git a/src/semantic-router/pkg/utils/classification/classifier.go b/src/semantic-router/pkg/utils/classification/classifier.go
@@ -22,9 +22,9 @@ func (c *LinearCategoryInference) Classify(text string) (candle_binding.ClassRes
 	return candle_binding.ClassifyText(text)
 }
 
-type ModernBERTCategoryInference struct{}
+type ModernBertCategoryInference struct{}
 
-func (c *ModernBERTCategoryInference) Classify(text string) (candle_binding.ClassResult, error) {
+func (c *ModernBertCategoryInference) Classify(text string) (candle_binding.ClassResult, error) {
 	return candle_binding.ClassifyModernBertText(text)
 }
 
@@ -38,12 +38,22 @@ func (c *LinearJailbreakInference) Classify(text string) (candle_binding.ClassRe
 	return candle_binding.ClassifyJailbreakText(text)
 }
 
-type ModernBERTJailbreakInference struct{}
+type ModernBertJailbreakInference struct{}
 
-func (c *ModernBERTJailbreakInference) Classify(text string) (candle_binding.ClassResult, error) {
+func (c *ModernBertJailbreakInference) Classify(text string) (candle_binding.ClassResult, error) {
 	return candle_binding.ClassifyModernBertJailbreakText(text)
 }
 
+type PIIInference interface {
+	ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error)
+}
+
+type ModernBertPIIInference struct{}
+
+func (c *ModernBertPIIInference) ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error) {
+	return candle_binding.ClassifyModernBertPIITokens(text, configPath)
+}
+
 // JailbreakDetection represents the result of jailbreak analysis for a piece of content
 type JailbreakDetection struct {
 	Content       string  `json:"content"`
@@ -75,6 +85,7 @@ type Classifier struct {
 	// Dependencies
 	categoryInference  CategoryInference
 	jailbreakInference JailbreakInference
+	piiInference       PIIInference
 
 	Config           *config.RouterConfig
 	CategoryMapping  *CategoryMapping
@@ -92,21 +103,25 @@ type Classifier struct {
 func NewClassifier(cfg *config.RouterConfig, categoryMapping *CategoryMapping, piiMapping *PIIMapping, jailbreakMapping *JailbreakMapping, modelTTFT map[string]float64) *Classifier {
 	var categoryInference CategoryInference
 	if cfg.Classifier.CategoryModel.UseModernBERT {
-		categoryInference = &ModernBERTCategoryInference{}
+		categoryInference = &ModernBertCategoryInference{}
 	} else {
 		categoryInference = &LinearCategoryInference{}
 	}
 
 	var jailbreakInference JailbreakInference
 	if cfg.PromptGuard.UseModernBERT {
-		jailbreakInference = &ModernBERTJailbreakInference{}
+		jailbreakInference = &ModernBertJailbreakInference{}
 	} else {
 		jailbreakInference = &LinearJailbreakInference{}
 	}
 
+	piiInference := &ModernBertPIIInference{}
+
 	return &Classifier{
-		categoryInference:    categoryInference,
-		jailbreakInference:   jailbreakInference,
+		categoryInference:  categoryInference,
+		jailbreakInference: jailbreakInference,
+		piiInference:       piiInference,
+
 		Config:               cfg,
 		CategoryMapping:      categoryMapping,
 		PIIMapping:           piiMapping,
@@ -289,7 +304,7 @@ func (c *Classifier) ClassifyPII(text string) ([]string, error) {
 	// Use ModernBERT PII token classifier for entity detection
 	configPath := fmt.Sprintf("%s/config.json", c.Config.Classifier.PIIModel.ModelID)
 	start := time.Now()
-	tokenResult, err := candle_binding.ClassifyModernBertPIITokens(text, configPath)
+	tokenResult, err := c.piiInference.ClassifyTokens(text, configPath)
 	metrics.RecordClassifierLatency("pii", time.Since(start).Seconds())
 	if err != nil {
 		return nil, fmt.Errorf("PII token classification error: %w", err)
@@ -371,7 +386,7 @@ func (c *Classifier) AnalyzeContentForPII(contentList []string) (bool, []PIIAnal
 		// Use ModernBERT PII token classifier for detailed analysis
 		configPath := fmt.Sprintf("%s/config.json", c.Config.Classifier.PIIModel.ModelID)
 		start := time.Now()
-		tokenResult, err := candle_binding.ClassifyModernBertPIITokens(content, configPath)
+		tokenResult, err := c.piiInference.ClassifyTokens(content, configPath)
 		metrics.RecordClassifierLatency("pii", time.Since(start).Seconds())
 		if err != nil {
 			log.Printf("Error analyzing content %d: %v", i, err)
diff --git a/src/semantic-router/pkg/utils/classification/classifier_test.go b/src/semantic-router/pkg/utils/classification/classifier_test.go
@@ -55,7 +55,7 @@ var _ = Describe("ClassifyCategory", func() {
 
 			category, score, err := classifier.ClassifyCategory("This is about politics")
 
-			Expect(err).To(BeNil())
+			Expect(err).ToNot(HaveOccurred())
 			Expect(category).To(Equal("politics"))
 			Expect(score).To(BeNumerically("~", 0.95, 0.001))
 		})
@@ -70,7 +70,7 @@ var _ = Describe("ClassifyCategory", func() {
 
 			category, score, err := classifier.ClassifyCategory("Ambiguous text")
 
-			Expect(err).To(BeNil())
+			Expect(err).ToNot(HaveOccurred())
 			Expect(category).To(Equal(""))
 			Expect(score).To(BeNumerically("~", 0.3, 0.001))
 		})
@@ -82,7 +82,8 @@ var _ = Describe("ClassifyCategory", func() {
 
 			category, score, err := classifier.ClassifyCategory("Some text")
 
-			Expect(err).ToNot(BeNil())
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("classification error"))
 			Expect(category).To(Equal(""))
 			Expect(score).To(BeNumerically("~", 0.0, 0.001))
 		})
@@ -97,8 +98,7 @@ var _ = Describe("ClassifyCategory", func() {
 
 			category, score, err := classifier.ClassifyCategory("")
 
-			// Should still attempt classification
-			Expect(err).To(BeNil())
+			Expect(err).ToNot(HaveOccurred())
 			Expect(category).To(Equal("technology"))
 			Expect(score).To(BeNumerically("~", 0.8, 0.001))
 		})
@@ -113,7 +113,7 @@ var _ = Describe("ClassifyCategory", func() {
 
 			category, score, err := classifier.ClassifyCategory("Some text")
 
-			Expect(err).To(BeNil())
+			Expect(err).ToNot(HaveOccurred())
 			Expect(category).To(Equal(""))
 			Expect(score).To(BeNumerically("~", 0.8, 0.001))
 		})
@@ -163,7 +163,7 @@ var _ = Describe("CheckForJailbreak", func() {
 
 			isJailbreak, jailbreakType, confidence, err := classifier.CheckForJailbreak("This is a jailbreak attempt")
 
-			Expect(err).To(BeNil())
+			Expect(err).ToNot(HaveOccurred())
 			Expect(isJailbreak).To(BeTrue())
 			Expect(jailbreakType).To(Equal("jailbreak"))
 			Expect(confidence).To(BeNumerically("~", 0.9, 0.001))
@@ -179,7 +179,7 @@ var _ = Describe("CheckForJailbreak", func() {
 
 			isJailbreak, jailbreakType, confidence, err := classifier.CheckForJailbreak("This is a normal question")
 
-			Expect(err).To(BeNil())
+			Expect(err).ToNot(HaveOccurred())
 			Expect(isJailbreak).To(BeFalse())
 			Expect(jailbreakType).To(Equal("benign"))
 			Expect(confidence).To(BeNumerically("~", 0.9, 0.001))
@@ -195,7 +195,7 @@ var _ = Describe("CheckForJailbreak", func() {
 
 			isJailbreak, jailbreakType, confidence, err := classifier.CheckForJailbreak("Ambiguous text")
 
-			Expect(err).To(BeNil())
+			Expect(err).ToNot(HaveOccurred())
 			Expect(isJailbreak).To(BeFalse())
 			Expect(jailbreakType).To(Equal("jailbreak"))
 			Expect(confidence).To(BeNumerically("~", 0.5, 0.001))
@@ -208,7 +208,7 @@ var _ = Describe("CheckForJailbreak", func() {
 
 			isJailbreak, jailbreakType, confidence, err := classifier.CheckForJailbreak("Some text")
 
-			Expect(err).ToNot(BeNil())
+			Expect(err).To(HaveOccurred())
 			Expect(err.Error()).To(ContainSubstring("jailbreak classification failed"))
 			Expect(isJailbreak).To(BeFalse())
 			Expect(jailbreakType).To(Equal(""))
@@ -225,10 +225,184 @@ var _ = Describe("CheckForJailbreak", func() {
 
 			isJailbreak, jailbreakType, confidence, err := classifier.CheckForJailbreak("Some text")
 
-			Expect(err).ToNot(BeNil())
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("unknown jailbreak class index"))
 			Expect(isJailbreak).To(BeFalse())
 			Expect(jailbreakType).To(Equal(""))
 			Expect(confidence).To(BeNumerically("~", 0.0, 0.001))
 		})
 	})
 })
+
+type PIIInferenceResponse struct {
+	classifyTokensResult candle_binding.TokenClassificationResult
+	classifyTokensError  error
+}
+
+type MockPIIInference struct {
+	PIIInferenceResponse
+	responseMap map[string]PIIInferenceResponse
+}
+
+func (m *MockPIIInference) ClassifyTokens(text string, configPath string) (candle_binding.TokenClassificationResult, error) {
+	if response, exists := m.responseMap[text]; exists {
+		return response.classifyTokensResult, response.classifyTokensError
+	}
+	return m.classifyTokensResult, m.classifyTokensError
+}
+
+var _ = Describe("PIIClassification", func() {
+	var (
+		classifier   *Classifier
+		mockPIIModel *MockPIIInference
+	)
+
+	BeforeEach(func() {
+		mockPIIModel = &MockPIIInference{}
+		cfg := &config.RouterConfig{}
+		cfg.Classifier.PIIModel.ModelID = "test-pii-model"
+		cfg.Classifier.PIIModel.Threshold = 0.7
+
+		classifier = &Classifier{
+			piiInference: mockPIIModel,
+			Config:       cfg,
+			PIIMapping: &PIIMapping{
+				LabelToIdx: map[string]int{"PERSON": 0, "EMAIL": 1},
+				IdxToLabel: map[string]string{"0": "PERSON", "1": "EMAIL"},
+			},
+		}
+	})
+
+	Describe("ClassifyPII", func() {
+		Context("when PII entities are detected above threshold", func() {
+			It("should return detected PII types", func() {
+				mockPIIModel.classifyTokensResult = candle_binding.TokenClassificationResult{
+					Entities: []candle_binding.TokenEntity{
+						{
+							EntityType: "PERSON",
+							Text:       "John Doe",
+							Start:      0,
+							End:        8,
+							Confidence: 0.9,
+						},
+						{
+							EntityType: "EMAIL",
+							Text:       "john@example.com",
+							Start:      9,
+							End:        25,
+							Confidence: 0.8,
+						},
+					},
+				}
+
+				piiTypes, err := classifier.ClassifyPII("John Doe john@example.com")
+
+				Expect(err).ToNot(HaveOccurred())
+				Expect(piiTypes).To(ConsistOf("PERSON", "EMAIL"))
+			})
+		})
+
+		Context("when PII entities are detected below threshold", func() {
+			It("should filter out low confidence entities", func() {
+				mockPIIModel.classifyTokensResult = candle_binding.TokenClassificationResult{
+					Entities: []candle_binding.TokenEntity{
+						{
+							EntityType: "PERSON",
+							Text:       "John Doe",
+							Start:      0,
+							End:        8,
+							Confidence: 0.9,
+						},
+						{
+							EntityType: "EMAIL",
+							Text:       "john@example.com",
+							Start:      9,
+							End:        25,
+							Confidence: 0.5,
+						},
+					},
+				}
+
+				piiTypes, err := classifier.ClassifyPII("John Doe john@example.com")
+
+				Expect(err).ToNot(HaveOccurred())
+				Expect(piiTypes).To(ConsistOf("PERSON"))
+			})
+		})
+
+		Context("when no PII is detected", func() {
+			It("should return empty list", func() {
+				mockPIIModel.classifyTokensResult = candle_binding.TokenClassificationResult{
+					Entities: []candle_binding.TokenEntity{},
+				}
+
+				piiTypes, err := classifier.ClassifyPII("Some text")
+
+				Expect(err).ToNot(HaveOccurred())
+				Expect(piiTypes).To(BeEmpty())
+			})
+		})
+
+		Context("when model inference fails", func() {
+			It("should return error", func() {
+				mockPIIModel.classifyTokensError = errors.New("PII model inference failed")
+
+				piiTypes, err := classifier.ClassifyPII("Some text")
+
+				Expect(err).To(HaveOccurred())
+				Expect(err.Error()).To(ContainSubstring("PII token classification error"))
+				Expect(piiTypes).To(BeNil())
+			})
+		})
+	})
+
+	Describe("AnalyzeContentForPII", func() {
+		Context("when some texts contain PII", func() {
+			It("should return detailed analysis for each text", func() {
+				mockPIIModel.responseMap = make(map[string]PIIInferenceResponse)
+				mockPIIModel.responseMap["Alice Smith"] = PIIInferenceResponse{
+					classifyTokensResult: candle_binding.TokenClassificationResult{
+						Entities: []candle_binding.TokenEntity{
+							{
+								EntityType: "PERSON",
+								Text:       "Alice",
+								Start:      0,
+								End:        5,
+								Confidence: 0.9,
+							},
+						},
+					},
+					classifyTokensError: nil,
+				}
+
+				mockPIIModel.responseMap["No PII here"] = PIIInferenceResponse{}
+
+				contentList := []string{"Alice Smith", "No PII here"}
+				hasPII, results, err := classifier.AnalyzeContentForPII(contentList)
+
+				Expect(err).ToNot(HaveOccurred())
+				Expect(hasPII).To(BeTrue())
+				Expect(results).To(HaveLen(2))
+				Expect(results[0].HasPII).To(BeTrue())
+				Expect(results[0].Entities).To(HaveLen(1))
+				Expect(results[0].Entities[0].EntityType).To(Equal("PERSON"))
+				Expect(results[0].Entities[0].Text).To(Equal("Alice"))
+				Expect(results[1].HasPII).To(BeFalse())
+				Expect(results[1].Entities).To(BeEmpty())
+			})
+		})
+
+		Context("when model inference fails", func() {
+			It("should return false for hasPII and empty results", func() {
+				mockPIIModel.classifyTokensError = errors.New("model failed")
+
+				contentList := []string{"Text 1", "Text 2"}
+				hasPII, results, err := classifier.AnalyzeContentForPII(contentList)
+
+				Expect(err).ToNot(HaveOccurred())
+				Expect(hasPII).To(BeFalse())
+				Expect(results).To(BeEmpty())
+			})
+		})
+	})
+})