|
38 | 38 | }, |
39 | 39 | ] # Reduced to just 2 test cases to avoid timeouts |
40 | 40 |
|
| 41 | +# Auto routing test cases - should trigger different model selection |
| 42 | +AUTO_ROUTING_TEST_CASES = [ |
| 43 | + { |
| 44 | + "name": "Math Problem (should route to phi4)", |
| 45 | + "expected_model": "phi4", |
| 46 | + "content": "Calculate the derivative of f(x) = x^3 + 2x^2 - 5x + 7", |
| 47 | + }, |
| 48 | + { |
| 49 | + "name": "Creative Writing (should route to another model)", |
| 50 | + "expected_model_not": "phi4", # Should NOT be phi4 since phi4 is optimized for math |
| 51 | + "content": "Write a poem about the ocean at sunset", |
| 52 | + }, |
| 53 | +] |
| 54 | + |
41 | 55 |
|
42 | 56 | class RouterClassificationTest(SemanticRouterTestBase): |
43 | 57 | """Test the router's classification functionality.""" |
@@ -267,6 +281,105 @@ def test_classifier_metrics(self): |
267 | 281 |
|
268 | 282 | self.assertGreaterEqual(metrics_found, 0, "No classification metrics found") |
269 | 283 |
|
| 284 | + def test_auto_routing_intelligence(self): |
| 285 | + """Test that auto model selection actually routes different queries to different specialized models.""" |
| 286 | + self.print_test_header( |
| 287 | + "Auto Routing Intelligence Test", |
| 288 | + "Verifies that model='auto' actually routes different query types to different specialized models", |
| 289 | + ) |
| 290 | + |
| 291 | + results = {} |
| 292 | + |
| 293 | + for test_case in AUTO_ROUTING_TEST_CASES: |
| 294 | + self.print_subtest_header(test_case["name"]) |
| 295 | + |
| 296 | + payload = { |
| 297 | + "model": "auto", # This should trigger intelligent routing |
| 298 | + "messages": [ |
| 299 | + {"role": "system", "content": "You are a helpful assistant."}, |
| 300 | + {"role": "user", "content": test_case["content"]}, |
| 301 | + ], |
| 302 | + "temperature": 0.7, |
| 303 | + } |
| 304 | + |
| 305 | + self.print_request_info( |
| 306 | + payload=payload, |
| 307 | + expectations=f"Expect: Auto routing to select appropriate specialized model", |
| 308 | + ) |
| 309 | + |
| 310 | + response = requests.post( |
| 311 | + f"{ENVOY_URL}{OPENAI_ENDPOINT}", |
| 312 | + headers={"Content-Type": "application/json"}, |
| 313 | + json=payload, |
| 314 | + timeout=30, |
| 315 | + ) |
| 316 | + |
| 317 | + passed = response.status_code == 200 |
| 318 | + |
| 319 | + try: |
| 320 | + response_json = response.json() |
| 321 | + selected_model = response_json.get("model", "unknown") |
| 322 | + except: |
| 323 | + selected_model = "unknown" |
| 324 | + |
| 325 | + results[test_case["name"]] = selected_model |
| 326 | + |
| 327 | + # Check if routing met expectations |
| 328 | + if "expected_model" in test_case: |
| 329 | + routing_correct = selected_model == test_case["expected_model"] |
| 330 | + routing_message = f"Expected {test_case['expected_model']}, got {selected_model}" |
| 331 | + elif "expected_model_not" in test_case: |
| 332 | + routing_correct = selected_model != test_case["expected_model_not"] |
| 333 | + routing_message = f"Should NOT be {test_case['expected_model_not']}, got {selected_model}" |
| 334 | + else: |
| 335 | + routing_correct = True |
| 336 | + routing_message = f"Got {selected_model}" |
| 337 | + |
| 338 | + self.print_response_info( |
| 339 | + response, |
| 340 | + { |
| 341 | + "Query Type": test_case["name"], |
| 342 | + "Selected Model": selected_model, |
| 343 | + "Routing Expectation": routing_message, |
| 344 | + "Routing Correct": routing_correct, |
| 345 | + }, |
| 346 | + ) |
| 347 | + |
| 348 | + self.print_test_result( |
| 349 | + passed=passed and routing_correct, |
| 350 | + message=( |
| 351 | + f"Auto routing working: {selected_model} for {test_case['name']}" |
| 352 | + if passed and routing_correct |
| 353 | + else f"Auto routing failed: {routing_message}" |
| 354 | + ), |
| 355 | + ) |
| 356 | + |
| 357 | + self.assertEqual(response.status_code, 200, f"Auto routing request failed with status {response.status_code}") |
| 358 | + |
| 359 | + # Check routing intelligence |
| 360 | + if "expected_model" in test_case: |
| 361 | + self.assertEqual( |
| 362 | + selected_model, |
| 363 | + test_case["expected_model"], |
| 364 | + f"Auto routing failed: expected {test_case['expected_model']}, got {selected_model}" |
| 365 | + ) |
| 366 | + elif "expected_model_not" in test_case: |
| 367 | + self.assertNotEqual( |
| 368 | + selected_model, |
| 369 | + test_case["expected_model_not"], |
| 370 | + f"Auto routing failed: got {selected_model}, should not be {test_case['expected_model_not']}" |
| 371 | + ) |
| 372 | + |
| 373 | + # Print summary of routing decisions |
| 374 | + print(f"\nAuto Routing Summary:") |
| 375 | + for test_name, model in results.items(): |
| 376 | + print(f" {test_name}: {model}") |
| 377 | + |
| 378 | + # Ensure we got different models for different query types (intelligence test) |
| 379 | + unique_models = set(results.values()) |
| 380 | + if len(unique_models) == 1: |
| 381 | + self.fail(f"Auto routing not working - all queries routed to same model: {list(unique_models)[0]}") |
| 382 | + |
270 | 383 |
|
271 | 384 | if __name__ == "__main__": |
272 | 385 | unittest.main() |
0 commit comments