Skip to content

Commit cb2344b

Browse files
yossiovadiaclaude
andcommitted
feat: harden all remaining e2e tests - remove 503 acceptance and add comprehensive status report
- Remove HTTP 503 acceptance from PII, tools, model selection, and error handling tests - Tests now require 200 status codes for successful operations - Service failures now properly fail tests instead of false positives - Add comprehensive TEST_STATUS_REPORT.md documenting all test improvements - Expose real system bugs: input validation gaps, jailbreak blocking issues - 6/11 test files now hardened and provide reliable system health assessment Co-Authored-By: Claude <[email protected]> Signed-off-by: Yossi Ovadia <[email protected]>
1 parent 10134b3 commit cb2344b

File tree

5 files changed

+322
-35
lines changed

5 files changed

+322
-35
lines changed

e2e-tests/05-pii-policy-test.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,9 @@ def test_no_pii_requests_allowed(self):
198198
timeout=(10, 60), # (connect timeout, read timeout)
199199
)
200200

201-
# No PII requests should be processed (may get 503 due to missing vLLM backend)
202-
passed = response.status_code in [200, 503]
201+
# No PII requests should be processed successfully - no 503 accepted
202+
# If PII detection is working, no-PII requests must succeed with 200
203+
passed = response.status_code == 200
203204

204205
try:
205206
response_json = response.json()
@@ -226,10 +227,10 @@ def test_no_pii_requests_allowed(self):
226227
),
227228
)
228229

229-
self.assertIn(
230+
self.assertEqual(
230231
response.status_code,
231-
[200, 503],
232-
f"No PII request '{test_case['name']}' was blocked when it should be allowed. Status: {response.status_code}",
232+
200,
233+
f"No PII request '{test_case['name']}' failed with status {response.status_code}. Expected: 200 (service must be working)",
233234
)
234235

235236
def test_allowed_pii_requests(self):
@@ -270,8 +271,9 @@ def test_allowed_pii_requests(self):
270271
timeout=(10, 60), # (connect timeout, read timeout)
271272
)
272273

273-
# Allowed PII requests should be processed (may get 503 due to missing vLLM backend)
274-
passed = response.status_code in [200, 503]
274+
# Allowed PII requests should be processed successfully - no 503 accepted
275+
# If PII detection is working, allowed PII requests must succeed with 200
276+
passed = response.status_code == 200
275277

276278
try:
277279
response_json = response.json()
@@ -299,10 +301,10 @@ def test_allowed_pii_requests(self):
299301
),
300302
)
301303

302-
self.assertIn(
304+
self.assertEqual(
303305
response.status_code,
304-
[200, 503],
305-
f"Allowed PII request '{test_case['name']}' was blocked when it should be allowed. Status: {response.status_code}",
306+
200,
307+
f"Allowed PII request '{test_case['name']}' failed with status {response.status_code}. Expected: 200 (service must be working)",
306308
)
307309

308310
def test_pii_policy_consistency(self):
@@ -483,17 +485,18 @@ def test_model_pii_policy_configuration(self):
483485
},
484486
)
485487

486-
# The request should be processed (allowed PII types for gemma3:27b)
487-
passed = response.status_code in [200, 503]
488+
# The request should be processed successfully - no 503 accepted
489+
# If PII policy is working, allowed PII types must succeed with 200
490+
passed = response.status_code == 200
488491
self.print_test_result(
489492
passed=passed,
490493
message=f"Model {model} PII policy applied correctly",
491494
)
492495

493-
self.assertIn(
496+
self.assertEqual(
494497
response.status_code,
495-
[200, 503],
496-
f"Model {model} did not handle PII policy correctly. Status: {response.status_code}",
498+
200,
499+
f"Model {model} PII policy failed with status {response.status_code}. Expected: 200 (service must be working)",
497500
)
498501

499502

e2e-tests/06-tools-test.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,9 @@ def test_specific_tool_selection(self):
187187
)
188188

189189
# Tool selection should work regardless of vLLM backend availability
190-
passed = response.status_code in [200, 503]
190+
# Tool selection should work successfully - no 503 accepted
191+
# If tool selection is working, requests must succeed with 200
192+
passed = response.status_code == 200
191193

192194
try:
193195
response_json = response.json()
@@ -262,7 +264,9 @@ def test_no_tool_requests(self):
262264
timeout=30,
263265
)
264266

265-
passed = response.status_code in [200, 503]
267+
# Tool selection should work successfully - no 503 accepted
268+
# If tool selection is working, requests must succeed with 200
269+
passed = response.status_code == 200
266270

267271
try:
268272
response_json = response.json()

e2e-tests/07-model-selection-test.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,9 @@ def test_category_based_model_selection(self):
217217
timeout=30,
218218
)
219219

220-
passed = response.status_code in [200, 503]
220+
# Model selection should work successfully - no 503 accepted
221+
# If model selection is working, requests must succeed with 200
222+
passed = response.status_code == 200
221223

222224
try:
223225
response_json = response.json()
@@ -249,10 +251,10 @@ def test_category_based_model_selection(self):
249251
),
250252
)
251253

252-
self.assertIn(
254+
self.assertEqual(
253255
response.status_code,
254-
[200, 503],
255-
f"Model selection request '{test_case['name']}' failed. Status: {response.status_code}",
256+
200,
257+
f"Model selection request '{test_case['name']}' failed with status {response.status_code}. Expected: 200 (service must be working)",
256258
)
257259

258260
def test_reasoning_mode_selection(self):
@@ -296,7 +298,9 @@ def test_reasoning_mode_selection(self):
296298
timeout=30,
297299
)
298300

299-
passed = response.status_code in [200, 503]
301+
# Reasoning mode should work successfully - no 503 accepted
302+
# If reasoning mode selection is working, requests must succeed with 200
303+
passed = response.status_code == 200
300304

301305
try:
302306
response_json = response.json()
@@ -327,10 +331,10 @@ def test_reasoning_mode_selection(self):
327331
),
328332
)
329333

330-
self.assertIn(
334+
self.assertEqual(
331335
response.status_code,
332-
[200, 503],
333-
f"Reasoning mode test '{test_case['name']}' failed. Status: {response.status_code}",
336+
200,
337+
f"Reasoning mode test '{test_case['name']}' failed with status {response.status_code}. Expected: 200 (service must be working)",
334338
)
335339

336340
def test_model_fallback_behavior(self):
@@ -368,8 +372,9 @@ def test_model_fallback_behavior(self):
368372
timeout=30,
369373
)
370374

371-
# Fallback should work, though may get 503 if no vLLM backend
372-
passed = response.status_code in [200, 400, 503] # 400 is acceptable for invalid model
375+
# Fallback should work - 400 is acceptable for invalid model request
376+
# No 503 accepted - if fallback is working, it should handle gracefully
377+
passed = response.status_code in [200, 400] # 400 is acceptable for invalid model
373378

374379
try:
375380
response_json = response.json()
@@ -394,8 +399,8 @@ def test_model_fallback_behavior(self):
394399

395400
self.assertIn(
396401
response.status_code,
397-
[200, 400, 503],
398-
f"Model fallback test failed unexpectedly. Status: {response.status_code}",
402+
[200, 400],
403+
f"Model fallback test failed with status {response.status_code}. Expected: 200 (fallback) or 400 (invalid model)",
399404
)
400405

401406
def test_model_selection_metrics(self):

e2e-tests/09-error-handling-test.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
{"role": "user", "content": "A" * 10000} # 10KB message
9999
],
100100
},
101-
"expected_status_range": (200, 503), # Should be processed or vLLM unavailable
101+
"expected_status_range": (200, 200), # Should be processed successfully - no 503 accepted
102102
"description": "Very long message should be handled gracefully",
103103
},
104104
{
@@ -110,7 +110,7 @@
110110
for i in range(100) # 100 messages
111111
],
112112
},
113-
"expected_status_range": (200, 503),
113+
"expected_status_range": (200, 200), # Must be processed successfully - no 503 accepted
114114
"description": "Large number of messages should be handled",
115115
},
116116
{
@@ -121,7 +121,7 @@
121121
{"role": "user", "content": "Hello 世界 🌍 Здравствуй мир"}
122122
],
123123
},
124-
"expected_status_range": (200, 503),
124+
"expected_status_range": (200, 200), # Must be processed successfully - no 503 accepted
125125
"description": "Unicode characters should be handled correctly",
126126
},
127127
{
@@ -131,7 +131,7 @@
131131
"messages": [{"role": "user", "content": "Hello"}],
132132
"temperature": 0,
133133
},
134-
"expected_status_range": (200, 503),
134+
"expected_status_range": (200, 200), # Must be processed successfully - no 503 accepted
135135
"description": "Zero temperature should be valid",
136136
},
137137
{
@@ -141,7 +141,7 @@
141141
"messages": [{"role": "user", "content": "Hello"}],
142142
"temperature": 2.0,
143143
},
144-
"expected_status_range": (200, 503),
144+
"expected_status_range": (200, 200), # Must be processed successfully - no 503 accepted
145145
"description": "Maximum valid temperature should work",
146146
},
147147
{
@@ -152,7 +152,7 @@
152152
{"role": "user", "content": "Test with \"quotes\" and 'apostrophes' and \n newlines \t tabs"}
153153
],
154154
},
155-
"expected_status_range": (200, 503),
155+
"expected_status_range": (200, 200), # Must be processed successfully - no 503 accepted
156156
"description": "Special characters should be handled",
157157
},
158158
]

0 commit comments

Comments
 (0)