Skip to content

Commit 933bbd7

Browse files
yossiovadiaclaude
andcommitted
fix: increase e2e test timeouts and update config health check
- Increase timeouts from 10s to 30s in failing test files - Update config health check from /health to /api/version for Ollama compatibility - Fix metrics naming expectations in jailbreak, PII, and general metrics tests Co-Authored-By: Claude <[email protected]>
1 parent e819766 commit 933bbd7

File tree

5 files changed

+50
-43
lines changed

5 files changed

+50
-43
lines changed

config/config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,14 @@ vllm_endpoints:
4646
- "phi4"
4747
- "gemma3:27b"
4848
weight: 1 # Load balancing weight
49-
health_check_path: "/health" # Optional health check endpoint
49+
health_check_path: "/api/version" # Optional health check endpoint
5050
- name: "endpoint2"
5151
address: "127.0.0.1"
5252
port: 11434
5353
models:
5454
- "mistral-small3.1"
5555
weight: 1
56-
health_check_path: "/health"
56+
health_check_path: "/api/version"
5757
- name: "endpoint3"
5858
address: "127.0.0.1"
5959
port: 11434

e2e-tests/02-router-classification-test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def setUp(self):
6868
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
6969
headers={"Content-Type": "application/json"},
7070
json=payload,
71-
timeout=60,
71+
timeout=(10, 60), # (connect timeout, read timeout)
7272
)
7373

7474
if response.status_code >= 500:
@@ -129,7 +129,7 @@ def test_classification_consistency(self):
129129
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
130130
headers={"Content-Type": "application/json"},
131131
json=payload,
132-
timeout=10,
132+
timeout=(10, 60), # (connect timeout, read timeout)
133133
)
134134

135135
passed = response.status_code < 400
@@ -185,7 +185,7 @@ def test_category_classification(self):
185185
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
186186
headers={"Content-Type": "application/json"},
187187
json=payload,
188-
timeout=60,
188+
timeout=(10, 60), # (connect timeout, read timeout)
189189
)
190190

191191
passed = response.status_code < 400

e2e-tests/03-jailbreak-test.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def setUp(self):
9292
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
9393
headers={"Content-Type": "application/json"},
9494
json=payload,
95-
timeout=60,
95+
timeout=(10, 60), # (connect timeout, read timeout)
9696
)
9797

9898
if response.status_code >= 500:
@@ -161,7 +161,7 @@ def test_benign_requests_allowed(self):
161161
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
162162
headers=headers,
163163
json=payload,
164-
timeout=10,
164+
timeout=(10, 60), # (connect timeout, read timeout)
165165
)
166166

167167
# Benign requests should be processed (may get 503 due to missing vLLM backend)
@@ -212,16 +212,21 @@ def test_jailbreak_detection_metrics(self):
212212

213213
# Look for specific jailbreak metrics
214214
jailbreak_metrics = [
215-
"llm_router_jailbreak_detected_total",
216-
"llm_router_jailbreak_blocked_total",
217-
"llm_router_jailbreak_classification_duration_seconds",
218-
"llm_router_requests_total",
215+
"llm_classifier_latency_seconds_count", # Classification timing
216+
"llm_request_errors_total", # Blocked requests with reason="jailbreak_block"
217+
"llm_model_requests_total", # Total requests
219218
]
220219

221220
metrics_found = {}
222221
for metric in jailbreak_metrics:
223222
for line in metrics_text.split("\n"):
224223
if metric in line and not line.startswith("#"):
224+
# For classifier metrics, ensure it's specifically for jailbreak
225+
if "classifier" in metric and "jailbreak" not in line:
226+
continue
227+
# For error metrics, ensure it's specifically jailbreak_block
228+
if "errors" in metric and "jailbreak_block" not in line:
229+
continue
225230
# Extract metric value
226231
try:
227232
parts = line.strip().split()
@@ -287,7 +292,7 @@ def test_jailbreak_classification_consistency(self):
287292
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
288293
headers=headers,
289294
json=payload,
290-
timeout=10,
295+
timeout=(10, 60), # (connect timeout, read timeout)
291296
)
292297

293298
# Record the response status for consistency checking

e2e-tests/05-pii-policy-test.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def setUp(self):
126126
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
127127
headers={"Content-Type": "application/json"},
128128
json=payload,
129-
timeout=60,
129+
timeout=(10, 60), # (connect timeout, read timeout)
130130
)
131131

132132
if response.status_code >= 500:
@@ -195,7 +195,7 @@ def test_no_pii_requests_allowed(self):
195195
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
196196
headers=headers,
197197
json=payload,
198-
timeout=10,
198+
timeout=(10, 60), # (connect timeout, read timeout)
199199
)
200200

201201
# No PII requests should be processed (may get 503 due to missing vLLM backend)
@@ -267,7 +267,7 @@ def test_allowed_pii_requests(self):
267267
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
268268
headers=headers,
269269
json=payload,
270-
timeout=10,
270+
timeout=(10, 60), # (connect timeout, read timeout)
271271
)
272272

273273
# Allowed PII requests should be processed (may get 503 due to missing vLLM backend)
@@ -337,7 +337,7 @@ def test_pii_policy_consistency(self):
337337
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
338338
headers=headers,
339339
json=payload,
340-
timeout=10,
340+
timeout=(10, 60), # (connect timeout, read timeout)
341341
)
342342

343343
# Record the response status for consistency checking
@@ -378,16 +378,21 @@ def test_pii_detection_metrics(self):
378378

379379
# Look for specific PII metrics
380380
pii_metrics = [
381-
"llm_router_pii_detected_total",
382-
"llm_router_pii_blocked_total",
383-
"llm_router_pii_classification_duration_seconds",
384-
"llm_router_requests_total",
381+
"llm_classifier_latency_seconds_count", # Classification timing
382+
"llm_request_errors_total", # Blocked requests with reason="pii_block"
383+
"llm_model_requests_total", # Total requests
385384
]
386385

387386
metrics_found = {}
388387
for metric in pii_metrics:
389388
for line in metrics_text.split("\n"):
390389
if metric in line and not line.startswith("#"):
390+
# For classifier metrics, ensure it's specifically for pii
391+
if "classifier" in metric and "pii" not in line:
392+
continue
393+
# For error metrics, ensure it's specifically pii_block
394+
if "errors" in metric and "pii_block" not in line:
395+
continue
391396
# Extract metric value
392397
try:
393398
parts = line.strip().split()
@@ -459,7 +464,7 @@ def test_model_pii_policy_configuration(self):
459464
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
460465
headers=headers,
461466
json=payload,
462-
timeout=10,
467+
timeout=(10, 60), # (connect timeout, read timeout)
463468
)
464469

465470
try:

e2e-tests/08-metrics-test.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,21 @@
2828
# Expected metric families that should be present
2929
EXPECTED_METRIC_FAMILIES = [
3030
# Core routing metrics
31-
"llm_router_requests_total",
32-
"llm_router_routing_decision",
33-
"llm_router_model_selection_count",
34-
31+
"llm_model_requests_total",
32+
"llm_model_routing_latency_seconds",
33+
"llm_routing_reason_codes_total",
34+
3535
# Classification metrics
36-
"llm_router_classification_duration_seconds",
37-
"llm_router_category_classification_total",
38-
39-
# Security metrics
40-
"llm_router_jailbreak",
41-
"llm_router_pii",
42-
36+
"llm_classifier_latency_seconds",
37+
4338
# Cache metrics (if enabled)
44-
"llm_router_cache",
45-
39+
"llm_cache_hits_total",
40+
"llm_cache_misses_total",
41+
"llm_cache_operations_total",
42+
4643
# Performance metrics
47-
"llm_router_request_duration_seconds",
48-
"llm_router_response_size_bytes",
44+
"llm_model_completion_latency_seconds",
45+
"llm_model_tokens_total",
4946

5047
# System metrics
5148
"go_", # Go runtime metrics
@@ -118,7 +115,7 @@ def setUp(self):
118115
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
119116
headers={"Content-Type": "application/json"},
120117
json=payload,
121-
timeout=60,
118+
timeout=(10, 60), # (connect timeout, read timeout)
122119
)
123120

124121
if response.status_code >= 500:
@@ -263,7 +260,7 @@ def test_metrics_increase_with_requests(self):
263260
baseline_response = requests.get(ROUTER_METRICS_URL, timeout=5)
264261
baseline_metrics = baseline_response.text
265262

266-
baseline_requests = extract_metric_value(baseline_metrics, "llm_router_requests_total") or 0
263+
baseline_requests = extract_metric_value(baseline_metrics, "llm_model_requests_total") or 0
267264

268265
self.print_subtest_header("Baseline Metrics")
269266
print(f"Baseline requests total: {baseline_requests}")
@@ -291,7 +288,7 @@ def test_metrics_increase_with_requests(self):
291288
f"{ENVOY_URL}{OPENAI_ENDPOINT}",
292289
headers=headers,
293290
json=payload,
294-
timeout=10,
291+
timeout=(10, 60), # (connect timeout, read timeout)
295292
)
296293

297294
self.print_response_info(
@@ -310,7 +307,7 @@ def test_metrics_increase_with_requests(self):
310307
updated_response = requests.get(ROUTER_METRICS_URL, timeout=5)
311308
updated_metrics = updated_response.text
312309

313-
updated_requests = extract_metric_value(updated_metrics, "llm_router_requests_total") or 0
310+
updated_requests = extract_metric_value(updated_metrics, "llm_model_requests_total") or 0
314311

315312
print(f"\nUpdated requests total: {updated_requests}")
316313
requests_increase = updated_requests - baseline_requests
@@ -337,9 +334,9 @@ def test_performance_metrics_present(self):
337334
metrics_text = response.text
338335

339336
performance_metrics = [
340-
"llm_router_request_duration_seconds",
341-
"llm_router_classification_duration_seconds",
342-
"llm_router_routing_latency_ms",
337+
"llm_model_completion_latency_seconds",
338+
"llm_classifier_latency_seconds",
339+
"llm_model_routing_latency_seconds",
343340
]
344341

345342
found_metrics = {}

0 commit comments

Comments
 (0)