Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 59 additions & 27 deletions e2e-tests/02-router-classification-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,49 @@
import os
import sys
import time
import unittest
from collections import defaultdict

import requests

# Add parent directory to path to allow importing common test utilities
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from tests.test_base import SemanticRouterTestBase
from test_base import SemanticRouterTestBase

# Constants
ENVOY_URL = "http://localhost:8801"
OPENAI_ENDPOINT = "/v1/chat/completions"
ROUTER_METRICS_URL = "http://localhost:9190/metrics"
DEFAULT_MODEL = "qwen2.5:32b" # Changed from gemma3:27b to match make test-prompt
DEFAULT_MODEL = "Model-A" # Use configured model that matches router config

# Category test cases - each designed to trigger a specific classifier category
# Based on config.e2e.yaml: math→Model-B, computer science→Model-B, business→Model-A, history→Model-A
CATEGORY_TEST_CASES = [
{
"name": "Math Query",
"expected_category": "math",
"content": "Solve the differential equation dy/dx + 2y = x^2 with the initial condition y(0) = 1.",
"expected_model": "Model-B", # math has Model-B with score 1.0
"content": "Solve the quadratic equation x^2 + 5x + 6 = 0 and explain the steps.",
},
{
"name": "Creative Writing Query",
"expected_category": "creative",
"content": "Write a short story about a space cat.",
"name": "Computer Science/Coding Query",
"expected_category": "computer science",
"expected_model": "Model-B", # computer science has Model-B with score 0.6
"content": "Write a Python function to implement a linked list with insert and delete operations.",
},
] # Reduced to just 2 test cases to avoid timeouts
{
"name": "Business Query",
"expected_category": "business",
"expected_model": "Model-A", # business has Model-A with score 0.8
"content": "What are the key principles of supply chain management in modern business?",
},
{
"name": "History Query",
"expected_category": "history",
"expected_model": "Model-A", # history has Model-A with score 0.8
"content": "Describe the main causes and key events of World War I.",
},
]


class RouterClassificationTest(SemanticRouterTestBase):
Expand Down Expand Up @@ -129,7 +145,7 @@ def test_classification_consistency(self):
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
headers={"Content-Type": "application/json"},
json=payload,
timeout=10,
timeout=60,
)

passed = response.status_code < 400
Expand Down Expand Up @@ -165,7 +181,7 @@ def test_category_classification(self):
self.print_subtest_header(test_case["name"])

payload = {
"model": DEFAULT_MODEL,
"model": "auto", # Use "auto" to trigger category-based classification routing
"messages": [
{
"role": "assistant",
Expand All @@ -178,7 +194,7 @@ def test_category_classification(self):

self.print_request_info(
payload=payload,
expectations=f"Expect: Query to be classified as {test_case['expected_category']} and routed accordingly",
expectations=f"Expect: Query classified as '{test_case['expected_category']}' → routed to {test_case.get('expected_model', 'appropriate model')}",
)

response = requests.post(
Expand All @@ -188,25 +204,30 @@ def test_category_classification(self):
timeout=60,
)

passed = response.status_code < 400
response_json = response.json()
model = response_json.get("model", "unknown")
results[test_case["name"]] = model
actual_model = response_json.get("model", "unknown")
expected_model = test_case.get("expected_model", "unknown")
results[test_case["name"]] = actual_model

model_match = actual_model == expected_model
passed = response.status_code < 400 and model_match

self.print_response_info(
response,
{
"Expected Category": test_case["expected_category"],
"Selected Model": model,
"Expected Model": expected_model,
"Actual Model": actual_model,
"Routing Correct": "✅" if model_match else "❌",
},
)

self.print_test_result(
passed=passed,
message=(
f"Query successfully routed to model: {model}"
if passed
else f"Request failed with status {response.status_code}"
f"Query correctly routed to {actual_model}"
if model_match
else f"Routing failed: expected {expected_model}, got {actual_model}"
),
)

Expand All @@ -215,23 +236,30 @@ def test_category_classification(self):
400,
f"{test_case['name']} request failed with status {response.status_code}",
)

self.assertEqual(
actual_model,
expected_model,
f"{test_case['name']}: Expected routing to {expected_model}, but got {actual_model}",
)

def test_classifier_metrics(self):
"""Test that classification metrics are being recorded."""
"""Test that router metrics are being recorded and exposed."""
self.print_test_header(
"Classifier Metrics Test",
"Verifies that classification metrics are being properly recorded and exposed",
"Router Metrics Test",
"Verifies that router metrics (classification, cache operations) are being properly recorded and exposed",
)

# First, let's get the current metrics as a baseline
response = requests.get(ROUTER_METRICS_URL)
baseline_metrics = response.text

# Check if classification metrics exist without making additional requests
# Check if classification and routing metrics exist
# These are the actual metrics exposed by the router
classification_metrics = [
"llm_router_classification_duration_seconds",
"llm_router_requests_total",
"llm_router_model_selection_count",
"llm_entropy_classification_latency_seconds", # Entropy-based classification timing
"llm_cache_hits_total", # Cache operations (related to classification)
"llm_cache_misses_total", # Cache misses
]

metrics_found = 0
Expand Down Expand Up @@ -259,13 +287,17 @@ def test_classifier_metrics(self):
self.print_test_result(
passed=passed,
message=(
f"Found {metrics_found} classification metrics"
f"Found {metrics_found}/{len(classification_metrics)} router metrics"
if passed
else "No classification metrics found"
else "No router metrics found"
),
)

self.assertGreaterEqual(metrics_found, 0, "No classification metrics found")
self.assertGreater(
metrics_found,
0,
f"No router metrics found. Expected at least one of: {', '.join(classification_metrics)}"
)


if __name__ == "__main__":
Expand Down
18 changes: 12 additions & 6 deletions e2e-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,16 @@ This test suite provides a progressive approach to testing the Semantic Router,
- Tests malformed request validation
- Tests content-based smart routing (math → Model-B, creative → Model-A)

2. **01-envoy-extproc-test.py** - TBD (To Be Developed)
2. **01-envoy-extproc-test.py** - Envoy ExtProc interaction tests ✅
- Tests that Envoy correctly forwards requests to the ExtProc
- Checks header propagation
- Checks header propagation and body modification
- Tests ExtProc error handling and performance impact

3. **02-router-classification-test.py** - TBD (To Be Developed)
- Tests BERT embeddings
- Tests category classification
- Verifies model selection based on content
3. **02-router-classification-test.py** - Router classification tests ✅
- Tests category-based classification with auto model selection
- Verifies queries route to appropriate specialized models
- Tests classification consistency across identical requests
- Validates metrics collection for classification operations

4. **03-model-routing-test.py** - TBD (To Be Developed)
- Tests that requests are routed to the correct backend model
Expand Down Expand Up @@ -73,11 +75,15 @@ Will be added in future PRs for testing with actual model inference.
Currently implemented:

- **00-client-request-test.py** ✅ - Complete client request validation and smart routing
- **01-envoy-extproc-test.py** ✅ - Envoy ExtProc interaction and processing tests
- **02-router-classification-test.py** ✅ - Router classification and model selection tests

Individual tests can be run with:

```bash
python e2e-tests/00-client-request-test.py
python e2e-tests/01-envoy-extproc-test.py
python e2e-tests/02-router-classification-test.py
```

Or run all available tests with:
Expand Down
Loading