03 classification api test (#304)

yossiovadia · web-flow · commit 9aef73f791d2 · 2025-10-01T16:22:34.000-04:00
* test: add Classification API intent classification test Add e2e test for standalone Classification API service that validates the /api/v1/classify/intent endpoint correctly classifies different types of queries. Test validates: - Math queries are classified as 'math' - Computer science queries are classified as 'computer science' - Business queries are classified as 'business' - History queries are classified as 'history' - Batch classification endpoint processes multiple texts correctly The Classification API (port 8080) is a standalone service separate from the ExtProc router, providing direct classification capabilities for applications that need text classification without LLM routing. Test requirements: - Classification API must be running on port 8080 - Start with: make run-router-e2e Signed-off-by: Yossi Ovadia <yovadia@redhat.com> * docs: update README with 03-classification-api-test Add 03-classification-api-test.py to the test suite documentation: - Add to test flow list as test #4 - Update numbering for remaining tests - Add to Available Tests section with usage example Signed-off-by: Yossi Ovadia <yovadia@redhat.com> * style: apply pre-commit fixes to 03-classification-api-test.py - Apply black formatter: remove unnecessary parentheses - Fix end of file: remove extra blank line Signed-off-by: Yossi Ovadia <yovadia@redhat.com> --------- Signed-off-by: Yossi Ovadia <yovadia@redhat.com>
diff --git a/e2e-tests/03-classification-api-test.py b/e2e-tests/03-classification-api-test.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""
+03-classification-api-test.py - Classification API tests
+
+This test validates the standalone Classification API service,
+which provides direct classification capabilities without LLM routing.
+The API is separate from the ExtProc router and runs on port 8080.
+"""
+
+import json
+import sys
+import unittest
+
+import requests
+
+# Import test base from same directory
+from test_base import SemanticRouterTestBase
+
+# Constants
+CLASSIFICATION_API_URL = "http://localhost:8080"
+INTENT_ENDPOINT = "/api/v1/classify/intent"
+
+# Test cases with expected categories based on config.e2e.yaml
+INTENT_TEST_CASES = [
+    {
+        "name": "Math Query",
+        "text": "Solve the quadratic equation x^2 + 5x + 6 = 0",
+        "expected_category": "math",
+    },
+    {
+        "name": "Computer Science Query",
+        "text": "Write a Python function to implement a linked list",
+        "expected_category": "computer science",
+    },
+    {
+        "name": "Business Query",
+        "text": "What are the key principles of supply chain management?",
+        "expected_category": "business",
+    },
+    {
+        "name": "History Query",
+        "text": "Describe the main causes of World War I",
+        "expected_category": "history",
+    },
+]
+
+
+class ClassificationAPITest(SemanticRouterTestBase):
+    """Test the standalone Classification API service."""
+
+    def setUp(self):
+        """Check if the Classification API is running before running tests."""
+        self.print_test_header(
+            "Setup Check",
+            "Verifying that Classification API is running and accepting requests",
+        )
+
+        try:
+            # Test health endpoint
+            health_response = requests.get(
+                f"{CLASSIFICATION_API_URL}/health", timeout=5
+            )
+
+            if health_response.status_code != 200:
+                self.skipTest(
+                    f"Classification API health check failed: {health_response.status_code}"
+                )
+
+            self.print_response_info(
+                health_response, {"Service": "Classification API Health"}
+            )
+
+        except requests.exceptions.ConnectionError:
+            self.skipTest(
+                "Cannot connect to Classification API on port 8080. Is it running? Start with: make run-router-e2e"
+            )
+        except requests.exceptions.Timeout:
+            self.skipTest("Classification API health check timed out")
+
+    def test_intent_classification(self):
+        """Test that intent classification returns correct categories for different query types."""
+        self.print_test_header(
+            "Intent Classification Test",
+            "Verifies that Classification API correctly classifies different query types",
+        )
+
+        for test_case in INTENT_TEST_CASES:
+            self.print_subtest_header(test_case["name"])
+
+            payload = {
+                "text": test_case["text"],
+                "options": {"return_probabilities": False},
+            }
+
+            self.print_request_info(
+                payload=payload,
+                expectations=f"Expect: Correctly classified as '{test_case['expected_category']}'",
+            )
+
+            response = requests.post(
+                f"{CLASSIFICATION_API_URL}{INTENT_ENDPOINT}",
+                headers={"Content-Type": "application/json"},
+                json=payload,
+                timeout=10,
+            )
+
+            response_json = response.json()
+            # The response may be nested in "classification" or at top level
+            if "classification" in response_json:
+                classification = response_json["classification"]
+                actual_category = classification.get("category", "unknown")
+                confidence = classification.get("confidence", 0.0)
+            else:
+                actual_category = response_json.get("category", "unknown")
+                confidence = response_json.get("confidence", 0.0)
+
+            # Check if classification is correct
+            category_correct = actual_category == test_case["expected_category"]
+            is_placeholder = actual_category == "general"
+            passed = response.status_code == 200 and category_correct
+
+            self.print_response_info(
+                response,
+                {
+                    "Expected Category": test_case["expected_category"],
+                    "Actual Category": actual_category,
+                    "Confidence": f"{confidence:.2f}",
+                    "Is Placeholder": "⚠️  Yes" if is_placeholder else "No",
+                    "Category Match": "✅" if category_correct else "❌",
+                },
+            )
+
+            if not category_correct:
+                if is_placeholder:
+                    failure_message = f"Classification failed: returned placeholder 'general' instead of '{test_case['expected_category']}'"
+                else:
+                    failure_message = (
+                        f"Classification incorrect: expected '{test_case['expected_category']}', "
+                        f"got '{actual_category}'"
+                    )
+            else:
+                failure_message = None
+
+            self.print_test_result(
+                passed=passed,
+                message=(
+                    f"Correctly classified as '{actual_category}'"
+                    if passed
+                    else failure_message
+                ),
+            )
+
+            self.assertEqual(
+                response.status_code,
+                200,
+                f"Request failed with status {response.status_code}",
+            )
+
+            self.assertEqual(
+                actual_category,
+                test_case["expected_category"],
+                f"{test_case['name']}: Expected category '{test_case['expected_category']}', got '{actual_category}'",
+            )
+
+    def test_batch_classification(self):
+        """Test batch classification endpoint works correctly."""
+        self.print_test_header(
+            "Batch Classification Test",
+            "Verifies that batch classification endpoint processes multiple texts correctly",
+        )
+
+        texts = [tc["text"] for tc in INTENT_TEST_CASES]
+        expected_categories = [tc["expected_category"] for tc in INTENT_TEST_CASES]
+
+        payload = {"texts": texts, "task_type": "intent"}
+
+        self.print_request_info(
+            payload={"texts": f"{len(texts)} texts", "task_type": "intent"},
+            expectations=f"Expect: {len(texts)} classifications matching expected categories",
+        )
+
+        response = requests.post(
+            f"{CLASSIFICATION_API_URL}/api/v1/classify/batch",
+            headers={"Content-Type": "application/json"},
+            json=payload,
+            timeout=30,
+        )
+
+        response_json = response.json()
+        results = response_json.get("results", [])
+
+        self.print_response_info(
+            response,
+            {
+                "Total Texts": len(texts),
+                "Results Count": len(results),
+                "Processing Time (ms)": response_json.get("processing_time_ms", 0),
+            },
+        )
+
+        passed = response.status_code == 200 and len(results) == len(texts)
+
+        self.print_test_result(
+            passed=passed,
+            message=(
+                f"Successfully classified {len(results)} texts"
+                if passed
+                else f"Batch classification failed or returned wrong count"
+            ),
+        )
+
+        self.assertEqual(response.status_code, 200, "Batch request failed")
+        self.assertEqual(len(results), len(texts), "Result count mismatch")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/e2e-tests/README.md b/e2e-tests/README.md
@@ -21,21 +21,27 @@ This test suite provides a progressive approach to testing the Semantic Router,
    - Tests classification consistency across identical requests
    - Validates metrics collection for classification operations
 
-4. **03-model-routing-test.py** - TBD (To Be Developed)
+4. **03-classification-api-test.py** - Classification API tests ✅
+   - Tests standalone Classification API service (port 8080)
+   - Validates intent classification for different query types
+   - Tests batch classification endpoint
+   - Verifies classification accuracy without LLM routing
+
+5. **04-model-routing-test.py** - TBD (To Be Developed)
    - Tests that requests are routed to the correct backend model
    - Verifies model header modifications
 
-5. **04-cache-test.py** - TBD (To Be Developed)
+6. **04-cache-test.py** - TBD (To Be Developed)
    - Tests cache hit/miss behavior
    - Verifies similarity thresholds
    - Tests cache TTL
 
-6. **05-e2e-category-test.py** - TBD (To Be Developed)
+7. **05-e2e-category-test.py** - TBD (To Be Developed)
    - Tests math queries route to the math-specialized model
    - Tests creative queries route to the creative-specialized model
    - Tests other domain-specific routing
 
-7. **06-metrics-test.py** - TBD (To Be Developed)
+8. **06-metrics-test.py** - TBD (To Be Developed)
    - Tests Prometheus metrics endpoints
    - Verifies correct metrics are being recorded
 
@@ -77,13 +83,15 @@ Currently implemented:
 - **00-client-request-test.py** ✅ - Complete client request validation and smart routing
 - **01-envoy-extproc-test.py** ✅ - Envoy ExtProc interaction and processing tests
 - **02-router-classification-test.py** ✅ - Router classification and model selection tests
+- **03-classification-api-test.py** ✅ - Standalone Classification API service tests
 
 Individual tests can be run with:
 
 ```bash
 python e2e-tests/00-client-request-test.py
 python e2e-tests/01-envoy-extproc-test.py
 python e2e-tests/02-router-classification-test.py
+python e2e-tests/03-classification-api-test.py
 ```
 
 Or run all available tests with: