vllm-project
diff --git a/‎e2e-tests/03-classification-api-test.py‎
Lines changed: 56 additions & 5 deletions b/‎e2e-tests/03-classification-api-test.py‎
Lines changed: 56 additions & 5 deletions
diff --git a/‎src/semantic-router/cmd/main.go‎
Lines changed: 9 additions & 8 deletions b/‎src/semantic-router/cmd/main.go‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎src/semantic-router/pkg/api/server.go‎
Lines changed: 175 additions & 6 deletions b/‎src/semantic-router/pkg/api/server.go‎
Lines changed: 175 additions & 6 deletions
@@ -189,29 +189,80 @@ def test_batch_classification(self):
         response_json = response.json()
         results = response_json.get("results", [])
 
+        # Extract actual categories from results
+        actual_categories = []
+        correct_classifications = 0
+
+        for i, result in enumerate(results):
+            if isinstance(result, dict):
+                actual_category = result.get("category", "unknown")
+            else:
+                actual_category = "unknown"
+
+            actual_categories.append(actual_category)
+
+            if (
+                i < len(expected_categories)
+                and actual_category == expected_categories[i]
+            ):
+                correct_classifications += 1
+
+        # Calculate accuracy
+        accuracy = (
+            (correct_classifications / len(expected_categories)) * 100
+            if expected_categories
+            else 0
+        )
+
         self.print_response_info(
             response,
             {
                 "Total Texts": len(texts),
                 "Results Count": len(results),
                 "Processing Time (ms)": response_json.get("processing_time_ms", 0),
+                "Accuracy": f"{accuracy:.1f}% ({correct_classifications}/{len(expected_categories)})",
             },
         )
 
-        passed = response.status_code == 200 and len(results) == len(texts)
+        # Print detailed classification results
+        print("\n📊 Detailed Classification Results:")
+        for i, (text, expected, actual) in enumerate(
+            zip(texts, expected_categories, actual_categories)
+        ):
+            status = "✅" if expected == actual else "❌"
+            print(f"  {i+1}. {status} Expected: {expected:<15} | Actual: {actual:<15}")
+            print(f"     Text: {text[:60]}...")
+
+        # Check basic requirements first
+        basic_checks_passed = response.status_code == 200 and len(results) == len(texts)
+
+        # Check classification accuracy (should be high for a working system)
+        accuracy_threshold = 75.0  # Expect at least 75% accuracy
+        accuracy_passed = accuracy >= accuracy_threshold
+
+        overall_passed = basic_checks_passed and accuracy_passed
 
         self.print_test_result(
-            passed=passed,
+            passed=overall_passed,
             message=(
-                f"Successfully classified {len(results)} texts"
-                if passed
-                else f"Batch classification failed or returned wrong count"
+                f"Successfully classified {len(results)} texts with {accuracy:.1f}% accuracy"
+                if overall_passed
+                else f"Batch classification issues: Basic checks: {basic_checks_passed}, Accuracy: {accuracy:.1f}% (threshold: {accuracy_threshold}%)"
             ),
         )
 
+        # Basic checks
         self.assertEqual(response.status_code, 200, "Batch request failed")
         self.assertEqual(len(results), len(texts), "Result count mismatch")
 
+        # NEW: Validate classification accuracy
+        self.assertGreaterEqual(
+            accuracy,
+            accuracy_threshold,
+            f"Classification accuracy too low: {accuracy:.1f}% < {accuracy_threshold}%. "
+            f"Expected: {expected_categories}, Actual: {actual_categories}",
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
@@ -15,13 +15,14 @@ import (
 func main() {
 	// Parse command-line flags
 	var (
-		configPath  = flag.String("config", "config/config.yaml", "Path to the configuration file")
-		port        = flag.Int("port", 50051, "Port to listen on for gRPC ExtProc")
-		apiPort     = flag.Int("api-port", 8080, "Port to listen on for Classification API")
-		metricsPort = flag.Int("metrics-port", 9190, "Port for Prometheus metrics")
-		enableAPI   = flag.Bool("enable-api", true, "Enable Classification API server")
-		secure      = flag.Bool("secure", false, "Enable secure gRPC server with TLS")
-		certPath    = flag.String("cert-path", "", "Path to TLS certificate directory (containing tls.crt and tls.key)")
+		configPath            = flag.String("config", "config/config.yaml", "Path to the configuration file")
+		port                  = flag.Int("port", 50051, "Port to listen on for gRPC ExtProc")
+		apiPort               = flag.Int("api-port", 8080, "Port to listen on for Classification API")
+		metricsPort           = flag.Int("metrics-port", 9190, "Port for Prometheus metrics")
+		enableAPI             = flag.Bool("enable-api", true, "Enable Classification API server")
+		enableSystemPromptAPI = flag.Bool("enable-system-prompt-api", false, "Enable system prompt configuration endpoints (SECURITY: only enable in trusted environments)")
+		secure                = flag.Bool("secure", false, "Enable secure gRPC server with TLS")
+		certPath              = flag.String("cert-path", "", "Path to TLS certificate directory (containing tls.crt and tls.key)")
 	)
 	flag.Parse()
 
@@ -58,7 +59,7 @@ func main() {
 	if *enableAPI {
 		go func() {
 			observability.Infof("Starting Classification API server on port %d", *apiPort)
-			if err := api.StartClassificationAPI(*configPath, *apiPort); err != nil {
+			if err := api.StartClassificationAPI(*configPath, *apiPort, *enableSystemPromptAPI); err != nil {
 				observability.Errorf("Classification API server error: %v", err)
 			}
 		}()
 
@@ -17,8 +17,9 @@ import (
 
 // ClassificationAPIServer holds the server state and dependencies
 type ClassificationAPIServer struct {
-	classificationSvc *services.ClassificationService
-	config            *config.RouterConfig
+	classificationSvc     *services.ClassificationService
+	config                *config.RouterConfig
+	enableSystemPromptAPI bool
 }
 
 // ModelsInfoResponse represents the response for models info endpoint
@@ -101,7 +102,7 @@ type ClassificationOptions struct {
 }
 
 // StartClassificationAPI starts the Classification API server
-func StartClassificationAPI(configPath string, port int) error {
+func StartClassificationAPI(configPath string, port int, enableSystemPromptAPI bool) error {
 	// Load configuration
 	cfg, err := config.LoadConfig(configPath)
 	if err != nil {
@@ -139,8 +140,9 @@ func StartClassificationAPI(configPath string, port int) error {
 
 	// Create server instance
 	apiServer := &ClassificationAPIServer{
-		classificationSvc: classificationSvc,
-		config:            cfg,
+		classificationSvc:     classificationSvc,
+		config:                cfg,
+		enableSystemPromptAPI: enableSystemPromptAPI,
 	}
 
 	// Create HTTP server with routes
@@ -203,6 +205,15 @@ func (s *ClassificationAPIServer) setupRoutes() *http.ServeMux {
 	mux.HandleFunc("GET /config/classification", s.handleGetConfig)
 	mux.HandleFunc("PUT /config/classification", s.handleUpdateConfig)
 
+	// System prompt configuration endpoints (only if explicitly enabled)
+	if s.enableSystemPromptAPI {
+		observability.Infof("System prompt configuration endpoints enabled")
+		mux.HandleFunc("GET /config/system-prompts", s.handleGetSystemPrompts)
+		mux.HandleFunc("PUT /config/system-prompts", s.handleUpdateSystemPrompts)
+	} else {
+		observability.Infof("System prompt configuration endpoints disabled for security")
+	}
+
 	return mux
 }
 
@@ -221,7 +232,16 @@ func (s *ClassificationAPIServer) handleIntentClassification(w http.ResponseWrit
 		return
 	}
 
-	response, err := s.classificationSvc.ClassifyIntent(req)
+	// Use unified classifier if available, otherwise fall back to legacy
+	var response *services.IntentResponse
+	var err error
+
+	if s.classificationSvc.HasUnifiedClassifier() {
+		response, err = s.classificationSvc.ClassifyIntentUnified(req)
+	} else {
+		response, err = s.classificationSvc.ClassifyIntent(req)
+	}
+
 	if err != nil {
 		s.writeErrorResponse(w, http.StatusInternalServerError, "CLASSIFICATION_ERROR", err.Error())
 		return
@@ -705,3 +725,152 @@ func (s *ClassificationAPIServer) calculateUnifiedStatistics(unifiedResults *ser
 		LowConfidenceCount:   lowConfidenceCount,
 	}
 }
+
+// SystemPromptInfo represents system prompt information for a category
+type SystemPromptInfo struct {
+	Category string `json:"category"`
+	Prompt   string `json:"prompt"`
+	Enabled  bool   `json:"enabled"`
+	Mode     string `json:"mode"` // "replace" or "insert"
+}
+
+// SystemPromptsResponse represents the response for GET /config/system-prompts
+type SystemPromptsResponse struct {
+	SystemPrompts []SystemPromptInfo `json:"system_prompts"`
+}
+
+// SystemPromptUpdateRequest represents a request to update system prompt settings
+type SystemPromptUpdateRequest struct {
+	Category string `json:"category,omitempty"` // If empty, applies to all categories
+	Enabled  *bool  `json:"enabled,omitempty"`  // true to enable, false to disable
+	Mode     string `json:"mode,omitempty"`     // "replace" or "insert"
+}
+
+// handleGetSystemPrompts handles GET /config/system-prompts
+func (s *ClassificationAPIServer) handleGetSystemPrompts(w http.ResponseWriter, r *http.Request) {
+	cfg := s.config
+	if cfg == nil {
+		http.Error(w, "Configuration not available", http.StatusInternalServerError)
+		return
+	}
+
+	var systemPrompts []SystemPromptInfo
+	for _, category := range cfg.Categories {
+		systemPrompts = append(systemPrompts, SystemPromptInfo{
+			Category: category.Name,
+			Prompt:   category.SystemPrompt,
+			Enabled:  category.IsSystemPromptEnabled(),
+			Mode:     category.GetSystemPromptMode(),
+		})
+	}
+
+	response := SystemPromptsResponse{
+		SystemPrompts: systemPrompts,
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	if err := json.NewEncoder(w).Encode(response); err != nil {
+		http.Error(w, "Failed to encode response", http.StatusInternalServerError)
+		return
+	}
+}
+
+// handleUpdateSystemPrompts handles PUT /config/system-prompts
+func (s *ClassificationAPIServer) handleUpdateSystemPrompts(w http.ResponseWriter, r *http.Request) {
+	var req SystemPromptUpdateRequest
+	if err := s.parseJSONRequest(r, &req); err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	if req.Enabled == nil && req.Mode == "" {
+		http.Error(w, "either enabled or mode field is required", http.StatusBadRequest)
+		return
+	}
+
+	// Validate mode if provided
+	if req.Mode != "" && req.Mode != "replace" && req.Mode != "insert" {
+		http.Error(w, "mode must be either 'replace' or 'insert'", http.StatusBadRequest)
+		return
+	}
+
+	cfg := s.config
+	if cfg == nil {
+		http.Error(w, "Configuration not available", http.StatusInternalServerError)
+		return
+	}
+
+	// Create a copy of the config to modify
+	newCfg := *cfg
+	newCategories := make([]config.Category, len(cfg.Categories))
+	copy(newCategories, cfg.Categories)
+	newCfg.Categories = newCategories
+
+	updated := false
+	if req.Category == "" {
+		// Update all categories
+		for i := range newCfg.Categories {
+			if newCfg.Categories[i].SystemPrompt != "" {
+				if req.Enabled != nil {
+					newCfg.Categories[i].SystemPromptEnabled = req.Enabled
+				}
+				if req.Mode != "" {
+					newCfg.Categories[i].SystemPromptMode = req.Mode
+				}
+				updated = true
+			}
+		}
+	} else {
+		// Update specific category
+		for i := range newCfg.Categories {
+			if newCfg.Categories[i].Name == req.Category {
+				if newCfg.Categories[i].SystemPrompt == "" {
+					http.Error(w, fmt.Sprintf("Category '%s' has no system prompt configured", req.Category), http.StatusBadRequest)
+					return
+				}
+				if req.Enabled != nil {
+					newCfg.Categories[i].SystemPromptEnabled = req.Enabled
+				}
+				if req.Mode != "" {
+					newCfg.Categories[i].SystemPromptMode = req.Mode
+				}
+				updated = true
+				break
+			}
+		}
+		if !updated {
+			http.Error(w, fmt.Sprintf("Category '%s' not found", req.Category), http.StatusNotFound)
+			return
+		}
+	}
+
+	if !updated {
+		http.Error(w, "No categories with system prompts found to update", http.StatusBadRequest)
+		return
+	}
+
+	// Update the configuration
+	s.config = &newCfg
+	s.classificationSvc.UpdateConfig(&newCfg)
+
+	// Return the updated system prompts
+	var systemPrompts []SystemPromptInfo
+	for _, category := range newCfg.Categories {
+		systemPrompts = append(systemPrompts, SystemPromptInfo{
+			Category: category.Name,
+			Prompt:   category.SystemPrompt,
+			Enabled:  category.IsSystemPromptEnabled(),
+			Mode:     category.GetSystemPromptMode(),
+		})
+	}
+
+	response := SystemPromptsResponse{
+		SystemPrompts: systemPrompts,
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	if err := json.NewEncoder(w).Encode(response); err != nil {
+		http.Error(w, "Failed to encode response", http.StatusInternalServerError)
+		return
+	}
+}