@@ -164,9 +164,9 @@ def test_benign_requests_allowed(self):
164164 timeout = (10 , 60 ), # (connect timeout, read timeout)
165165 )
166166
167- # Benign requests should be processed (may get 503 due to missing vLLM backend)
168- # But should NOT be blocked at the jailbreak detection level
169- passed = response .status_code in [ 200 , 503 ] # 503 = vLLM not available, but not blocked
167+ # Benign requests should be processed successfully - no 503 accepted
168+ # If jailbreak detection is working, benign requests must get through with 200
169+ passed = response .status_code == 200
170170
171171 try :
172172 response_json = response .json ()
@@ -193,10 +193,87 @@ def test_benign_requests_allowed(self):
193193 ),
194194 )
195195
196- self .assertIn (
196+ self .assertEqual (
197197 response .status_code ,
198- [200 , 503 ], # Accept both success and vLLM unavailable
199- f"Benign request '{ test_case ['name' ]} ' was blocked when it should be allowed. Status: { response .status_code } " ,
198+ 200 , # Only accept success - if service fails, test should fail
199+ f"Benign request '{ test_case ['name' ]} ' failed with status { response .status_code } . Expected: 200" ,
200+ )
201+
202+ def test_jailbreak_attempts_blocked (self ):
203+ """Test that actual jailbreak attempts are blocked by the security system."""
204+ self .print_test_header (
205+ "Jailbreak Attempts Blocked Test" ,
206+ "Verifies that malicious jailbreak attempts are properly detected and blocked" ,
207+ )
208+
209+ for test_case in JAILBREAK_TEST_CASES :
210+ with self .subTest (test_case ["name" ]):
211+ self .print_subtest_header (test_case ["name" ])
212+
213+ session_id = str (uuid .uuid4 ())
214+ payload = {
215+ "model" : DEFAULT_MODEL ,
216+ "messages" : [
217+ {"role" : "system" , "content" : "You are a helpful assistant." },
218+ {"role" : "user" , "content" : test_case ["content" ]},
219+ ],
220+ "temperature" : 0.7 ,
221+ }
222+
223+ headers = {
224+ "Content-Type" : "application/json" ,
225+ "X-Session-ID" : session_id ,
226+ }
227+
228+ self .print_request_info (
229+ payload = payload ,
230+ expectations = f"Expect: Jailbreak attempt to be blocked with 4xx status code" ,
231+ )
232+
233+ response = requests .post (
234+ f"{ ENVOY_URL } { OPENAI_ENDPOINT } " ,
235+ headers = headers ,
236+ json = payload ,
237+ timeout = (10 , 60 ), # (connect timeout, read timeout)
238+ )
239+
240+ # Jailbreak attempts should be blocked with 4xx status codes
241+ passed = 400 <= response .status_code < 500
242+
243+ try :
244+ response_json = response .json ()
245+ error_message = response_json .get ("error" , {}).get ("message" , "" )
246+ except :
247+ error_message = "N/A"
248+
249+ self .print_response_info (
250+ response ,
251+ {
252+ "Content" : test_case ["content" ][:50 ] + "..." ,
253+ "Error Message" : error_message ,
254+ "Session ID" : session_id ,
255+ "Jailbreak Status" : "Expected: Blocked" ,
256+ },
257+ )
258+
259+ self .print_test_result (
260+ passed = passed ,
261+ message = (
262+ f"Jailbreak attempt properly blocked (status: { response .status_code } )"
263+ if passed
264+ else f"Jailbreak attempt NOT blocked - security failure! (status: { response .status_code } )"
265+ ),
266+ )
267+
268+ self .assertGreaterEqual (
269+ response .status_code ,
270+ 400 ,
271+ f"Jailbreak attempt '{ test_case ['name' ]} ' was NOT blocked! Status: { response .status_code } . This is a security vulnerability." ,
272+ )
273+ self .assertLess (
274+ response .status_code ,
275+ 500 ,
276+ f"Jailbreak attempt '{ test_case ['name' ]} ' returned server error { response .status_code } instead of proper block." ,
200277 )
201278
202279 def test_jailbreak_detection_metrics (self ):
0 commit comments