diff --git a/e2e-tests/01-envoy-extproc-test.py b/e2e-tests/01-envoy-extproc-test.py index 34e6f472..2642c51a 100644 --- a/e2e-tests/01-envoy-extproc-test.py +++ b/e2e-tests/01-envoy-extproc-test.py @@ -10,18 +10,18 @@ import json import os import sys +import unittest import uuid import requests -# Add parent directory to path to allow importing common test utilities -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from tests.test_base import SemanticRouterTestBase +# Import test base from same directory +from test_base import SemanticRouterTestBase # Constants ENVOY_URL = "http://localhost:8801" OPENAI_ENDPOINT = "/v1/chat/completions" -DEFAULT_MODEL = "qwen2.5:32b" # Changed from gemma3:27b to match make test-prompt +DEFAULT_MODEL = "Model-A" # Use configured model that matches router config class EnvoyExtProcTest(SemanticRouterTestBase): @@ -35,11 +35,13 @@ def setUp(self): ) try: + # Use unique content to bypass cache for setup check + setup_id = str(uuid.uuid4())[:8] payload = { "model": DEFAULT_MODEL, "messages": [ - {"role": "assistant", "content": "You are a helpful assistant."}, - {"role": "user", "content": "test"}, + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": f"ExtProc setup test {setup_id}"}, ], } @@ -77,8 +79,11 @@ def test_request_headers_propagation(self): payload = { "model": DEFAULT_MODEL, "messages": [ - {"role": "assistant", "content": "You are a helpful assistant."}, - {"role": "user", "content": "What is the capital of France?"}, + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": f"ExtProc header test {trace_id[:8]} - explain photosynthesis briefly.", + }, ], "temperature": 0.7, } @@ -137,57 +142,98 @@ def test_request_headers_propagation(self): ) self.assertIn("model", response_json, "Response is missing 'model' field") - def test_extproc_override(self): - """Test that the ExtProc can modify the request's target model.""" + def test_extproc_body_modification(self): + """Test that the ExtProc can modify the request and response bodies.""" self.print_test_header( - "ExtProc Model Override Test", - "Verifies that ExtProc correctly routes different query types to appropriate models", + "ExtProc Body Modification Test", + "Verifies that ExtProc can modify request and response bodies while preserving essential fields", ) - test_cases = [ - { - "name": "Math Query", - "content": "What is the derivative of f(x) = x^3 + 2x^2 - 5x + 7?", - "category": "math", - }, + trace_id = str(uuid.uuid4()) + + payload = { + "model": DEFAULT_MODEL, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": f"ExtProc body test {trace_id[:8]} - describe machine learning in simple terms.", + }, + ], + "temperature": 0.7, + "test_field": "should_be_preserved", + } + + headers = { + "Content-Type": "application/json", + "X-Test-Trace-ID": trace_id, + "X-Test-Body-Modification": "true", + } + + self.print_request_info( + payload=payload, + expectations="Expect: Request processing with body modifications while preserving essential fields", + ) + + response = requests.post( + f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload, timeout=60 + ) + + response_json = response.json() + self.print_response_info( + response, { - "name": "Creative Writing Query", - "content": "Write a short story about a space cat.", - "category": "creative", + "Original Model": DEFAULT_MODEL, + "Final Model": response_json.get("model", "Not specified"), + "Test Field Preserved": "test_field" in response_json, }, - ] + ) - results = {} + passed = response.status_code < 400 and "model" in response_json + self.print_test_result( + passed=passed, + message=( + "Request processed successfully with body modifications" + if passed + else "Issues with request processing or body modifications" + ), + ) - for test_case in test_cases: - self.print_subtest_header(test_case["name"]) + self.assertLess( + response.status_code, + 400, + f"Request was rejected with status code {response.status_code}", + ) - trace_id = str(uuid.uuid4()) + def test_extproc_error_handling(self): + """Test ExtProc error handling and failure scenarios.""" + self.print_test_header( + "ExtProc Error Handling Test", + "Verifies that ExtProc properly handles and recovers from error conditions", + ) - payload = { - "model": DEFAULT_MODEL, - "messages": [ - { - "role": "assistant", - "content": f"You are an expert in {test_case['category']}.", - }, - {"role": "user", "content": test_case["content"]}, - ], - "temperature": 0.7, - } + # Test with headers that might cause ExtProc issues + payload = { + "model": DEFAULT_MODEL, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Simple test query"}, + ], + } - headers = { - "Content-Type": "application/json", - "X-Test-Trace-ID": trace_id, - "X-Original-Model": DEFAULT_MODEL, - "X-Test-Category": test_case["category"], - } + headers = { + "Content-Type": "application/json", + "X-Very-Long-Header": "x" * 1000, # Very long header value + "X-Test-Error-Recovery": "true", + "X-Special-Chars": "data-with-special-chars-!@#$%^&*()", # Special characters + } - self.print_request_info( - payload=payload, - expectations=f"Expect: Query to be routed based on {test_case['category']} category", - ) + self.print_request_info( + payload=payload, + expectations="Expect: ExtProc to handle unusual headers gracefully without crashing", + ) + try: response = requests.post( f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, @@ -195,100 +241,126 @@ def test_extproc_override(self): timeout=60, ) - response_json = response.json() - results[test_case["name"]] = response_json.get("model", "unknown") + # ExtProc should either process successfully or fail gracefully without hanging + passed = ( + response.status_code < 500 + ) # No server errors due to ExtProc issues self.print_response_info( response, { - "Category": test_case["category"], - "Original Model": DEFAULT_MODEL, - "Routed Model": results[test_case["name"]], + "Status Code": response.status_code, + "Error Handling": "Graceful" if passed else "Server Error", }, ) - passed = ( - response.status_code < 400 and results[test_case["name"]] != "unknown" - ) - self.print_test_result( - passed=passed, - message=( - f"Successfully routed to model: {results[test_case['name']]}" - if passed - else f"Routing failed or returned unknown model" - ), + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: + # Connection errors are acceptable - it shows the system is protecting itself + passed = True + self.print_response_info( + None, + { + "Connection": "Terminated (Expected)", + "Error Handling": "Protective disconnection", + "Error": str(e)[:100] + "..." if len(str(e)) > 100 else str(e), + }, ) - self.assertLess( - response.status_code, - 400, - f"{test_case['name']} request failed with status {response.status_code}", - ) + self.print_test_result( + passed=passed, + message=( + "ExtProc handled error conditions gracefully" + if passed + else "ExtProc error handling failed" + ), + ) - # Final summary of routing results - if len(results) == 2: - print("\nRouting Summary:") - print(f"Math Query → {results['Math Query']}") - print(f"Creative Writing Query → {results['Creative Writing Query']}") + # The test passes if either the request succeeds or fails gracefully + self.assertTrue( + passed, + "ExtProc should handle malformed input gracefully", + ) - def test_extproc_body_modification(self): - """Test that the ExtProc can modify the request and response bodies.""" + def test_extproc_performance_impact(self): + """Test that ExtProc doesn't significantly impact request performance.""" self.print_test_header( - "ExtProc Body Modification Test", - "Verifies that ExtProc can modify request and response bodies while preserving essential fields", + "ExtProc Performance Impact Test", + "Verifies that ExtProc processing doesn't add excessive latency", ) + # Generate unique content for cache bypass trace_id = str(uuid.uuid4()) payload = { "model": DEFAULT_MODEL, "messages": [ - {"role": "assistant", "content": "You are a helpful assistant."}, - {"role": "user", "content": "What is quantum computing?"}, + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": f"ExtProc performance test {trace_id[:8]} - what is artificial intelligence?", + }, ], - "temperature": 0.7, - "test_field": "should_be_preserved", } - headers = { + # Test with minimal ExtProc processing + headers_minimal = {"Content-Type": "application/json"} + + # Test with ExtProc headers + headers_extproc = { "Content-Type": "application/json", - "X-Test-Trace-ID": trace_id, - "X-Test-Body-Modification": "true", + "X-Test-Performance": "true", + "X-Processing-Mode": "full", } self.print_request_info( payload=payload, - expectations="Expect: Request processing with body modifications while preserving essential fields", + expectations="Expect: Reasonable response times with ExtProc processing", ) + import time + + # Measure response time with ExtProc + start_time = time.time() response = requests.post( - f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload, timeout=60 + f"{ENVOY_URL}{OPENAI_ENDPOINT}", + headers=headers_extproc, + json=payload, + timeout=60, ) + response_time = time.time() - start_time + + passed = ( + response.status_code < 400 and response_time < 30.0 + ) # Reasonable timeout - response_json = response.json() self.print_response_info( response, { - "Original Model": DEFAULT_MODEL, - "Final Model": response_json.get("model", "Not specified"), - "Test Field Preserved": "test_field" in response_json, + "Response Time": f"{response_time:.2f}s", + "Performance": ( + "Acceptable" if response_time < 10.0 else "Slow but functional" + ), }, ) - passed = response.status_code < 400 and "model" in response_json self.print_test_result( passed=passed, message=( - "Request processed successfully with body modifications" + f"ExtProc processing completed in {response_time:.2f}s" if passed - else "Issues with request processing or body modifications" + else f"ExtProc processing too slow: {response_time:.2f}s" ), ) self.assertLess( response.status_code, 400, - f"Request was rejected with status code {response.status_code}", + "ExtProc should not cause request failures", + ) + self.assertLess( + response_time, + 30.0, + "ExtProc should not cause excessive delays", )