Skip to content

Commit 78e5346

Browse files
committed
Enhance test cases and add default handling for num_attempts parameter
1 parent 8d6d8bc commit 78e5346

File tree

8 files changed

+288
-137
lines changed

8 files changed

+288
-137
lines changed

examples/llamator-api.ipynb

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@
4545
{
4646
"metadata": {
4747
"ExecuteTime": {
48-
"end_time": "2025-07-10T20:48:57.401571Z",
49-
"start_time": "2025-07-10T20:48:57.399176Z"
48+
"end_time": "2025-07-15T10:52:14.326798Z",
49+
"start_time": "2025-07-15T10:52:08.534205Z"
5050
}
5151
},
5252
"cell_type": "code",
5353
"source": "import llamator",
5454
"outputs": [],
55-
"execution_count": 2
55+
"execution_count": 1
5656
},
5757
{
5858
"cell_type": "code",
@@ -203,8 +203,8 @@
203203
{
204204
"metadata": {
205205
"ExecuteTime": {
206-
"end_time": "2025-07-10T20:49:03.961794Z",
207-
"start_time": "2025-07-10T20:49:03.955671Z"
206+
"end_time": "2025-07-15T10:52:22.481186Z",
207+
"start_time": "2025-07-15T10:52:22.474697Z"
208208
}
209209
},
210210
"cell_type": "code",
@@ -216,62 +216,63 @@
216216
"text": [
217217
"# Example configuration for preset 'all':\n",
218218
"basic_tests = [\n",
219-
" (\"aim_jailbreak\", { \"num_attempts\": 0 }),\n",
219+
" (\"aim_jailbreak\", { \"num_attempts\": 3 }),\n",
220220
" (\"autodan_turbo\", {\n",
221221
" \"custom_dataset\": None,\n",
222222
" \"language\": \"any\",\n",
223223
" \"multistage_depth\": 10,\n",
224-
" \"num_attempts\": 0,\n",
224+
" \"num_attempts\": 3,\n",
225225
" \"strategy_library_size\": 10\n",
226226
" }),\n",
227-
" (\"base64_injection\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
227+
" (\"base64_injection\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
228228
" (\"bon\", {\n",
229229
" \"custom_dataset\": None,\n",
230230
" \"language\": \"any\",\n",
231-
" \"num_attempts\": 0,\n",
231+
" \"num_attempts\": 3,\n",
232232
" \"num_transformations\": 5,\n",
233233
" \"sigma\": 0.4\n",
234234
" }),\n",
235235
" (\"crescendo\", {\n",
236236
" \"custom_dataset\": None,\n",
237237
" \"language\": \"any\",\n",
238238
" \"multistage_depth\": 5,\n",
239-
" \"num_attempts\": 0\n",
239+
" \"num_attempts\": 3\n",
240240
" }),\n",
241-
" (\"dan\", { \"language\": \"any\", \"num_attempts\": 0 }),\n",
242-
" (\"deceptive_delight\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
243-
" (\"dialogue_injection_devmode\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
244-
" (\"dialogue_injection_continuation\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 0 }),\n",
245-
" (\"ethical_compliance\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
246-
" (\"harmbench\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 0 }),\n",
247-
" (\"linguistic_evasion\", { \"num_attempts\": 0 }),\n",
248-
" (\"logical_inconsistencies\", { \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
241+
" (\"dan\", { \"language\": \"any\", \"num_attempts\": 3 }),\n",
242+
" (\"deceptive_delight\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
243+
" (\"dialogue_injection_devmode\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
244+
" (\"dialogue_injection_continuation\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 3 }),\n",
245+
" (\"ethical_compliance\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
246+
" (\"harmbench\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 3 }),\n",
247+
" (\"linguistic_evasion\", { \"num_attempts\": 3 }),\n",
248+
" (\"linguistic_sandwich\", { \"custom_dataset\": None, \"num_attempts\": 3, \"num_translations\": 5 }),\n",
249+
" (\"logical_inconsistencies\", { \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
249250
" (\"pair\", {\n",
250251
" \"custom_dataset\": None,\n",
251252
" \"language\": \"any\",\n",
252253
" \"multistage_depth\": 20,\n",
253-
" \"num_attempts\": 0\n",
254+
" \"num_attempts\": 3\n",
254255
" }),\n",
255256
" (\"shuffle\", {\n",
256257
" \"custom_dataset\": None,\n",
257258
" \"language\": \"any\",\n",
258-
" \"num_attempts\": 0,\n",
259+
" \"num_attempts\": 3,\n",
259260
" \"num_transformations\": 5\n",
260261
" }),\n",
261-
" (\"suffix\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
262-
" (\"sycophancy\", { \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
263-
" (\"system_prompt_leakage\", { \"custom_dataset\": None, \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
262+
" (\"suffix\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
263+
" (\"sycophancy\", { \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
264+
" (\"system_prompt_leakage\", { \"custom_dataset\": None, \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
264265
" (\"time_machine\", {\n",
265266
" \"custom_dataset\": None,\n",
266267
" \"language\": \"any\",\n",
267-
" \"num_attempts\": 0,\n",
268+
" \"num_attempts\": 3,\n",
268269
" \"time_context\": \"any\"\n",
269270
" }),\n",
270-
" (\"ucar\", { \"language\": \"any\", \"num_attempts\": 0 }),\n",
271+
" (\"ucar\", { \"language\": \"any\", \"num_attempts\": 3 }),\n",
271272
" (\"vlm_lowres_docs\", {\n",
272273
" \"custom_pdf_dir\": None,\n",
273274
" \"is_long_pdf\": False,\n",
274-
" \"num_attempts\": 0,\n",
275+
" \"num_attempts\": 3,\n",
275276
" \"overwrite_existing_pdfs\": False,\n",
276277
" \"rescale\": 0.25\n",
277278
" }),\n",
@@ -280,14 +281,14 @@
280281
" \"attack_source\": \"parquet\",\n",
281282
" \"dataset\": \"bigscale_100\",\n",
282283
" \"dataset_variations\": None,\n",
283-
" \"num_attempts\": 0\n",
284+
" \"num_attempts\": 3\n",
284285
" }),\n",
285-
" (\"vlm_text_hallucination\", { \"attack_types\": None, \"num_attempts\": 0 }),\n",
286+
" (\"vlm_text_hallucination\", { \"attack_types\": None, \"num_attempts\": 3 }),\n",
286287
"]\n"
287288
]
288289
}
289290
],
290-
"execution_count": 3
291+
"execution_count": 2
291292
},
292293
{
293294
"cell_type": "code",

examples/llamator-langchain-custom-attack.ipynb

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@
4545
{
4646
"metadata": {
4747
"ExecuteTime": {
48-
"end_time": "2025-07-10T20:48:39.812221Z",
49-
"start_time": "2025-07-10T20:48:39.810058Z"
48+
"end_time": "2025-07-15T10:51:52.329146Z",
49+
"start_time": "2025-07-15T10:51:46.534968Z"
5050
}
5151
},
5252
"cell_type": "code",
5353
"source": "import llamator",
5454
"outputs": [],
55-
"execution_count": 2
55+
"execution_count": 1
5656
},
5757
{
5858
"cell_type": "code",
@@ -1206,8 +1206,8 @@
12061206
{
12071207
"metadata": {
12081208
"ExecuteTime": {
1209-
"end_time": "2025-07-10T20:48:44.130751Z",
1210-
"start_time": "2025-07-10T20:48:44.126703Z"
1209+
"end_time": "2025-07-15T10:52:02.010099Z",
1210+
"start_time": "2025-07-15T10:52:02.002716Z"
12111211
}
12121212
},
12131213
"cell_type": "code",
@@ -1219,62 +1219,63 @@
12191219
"text": [
12201220
"# Example configuration for preset 'all':\n",
12211221
"basic_tests = [\n",
1222-
" (\"aim_jailbreak\", { \"num_attempts\": 0 }),\n",
1222+
" (\"aim_jailbreak\", { \"num_attempts\": 3 }),\n",
12231223
" (\"autodan_turbo\", {\n",
12241224
" \"custom_dataset\": None,\n",
12251225
" \"language\": \"any\",\n",
12261226
" \"multistage_depth\": 10,\n",
1227-
" \"num_attempts\": 0,\n",
1227+
" \"num_attempts\": 3,\n",
12281228
" \"strategy_library_size\": 10\n",
12291229
" }),\n",
1230-
" (\"base64_injection\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
1230+
" (\"base64_injection\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
12311231
" (\"bon\", {\n",
12321232
" \"custom_dataset\": None,\n",
12331233
" \"language\": \"any\",\n",
1234-
" \"num_attempts\": 0,\n",
1234+
" \"num_attempts\": 3,\n",
12351235
" \"num_transformations\": 5,\n",
12361236
" \"sigma\": 0.4\n",
12371237
" }),\n",
12381238
" (\"crescendo\", {\n",
12391239
" \"custom_dataset\": None,\n",
12401240
" \"language\": \"any\",\n",
12411241
" \"multistage_depth\": 5,\n",
1242-
" \"num_attempts\": 0\n",
1242+
" \"num_attempts\": 3\n",
12431243
" }),\n",
1244-
" (\"dan\", { \"language\": \"any\", \"num_attempts\": 0 }),\n",
1245-
" (\"deceptive_delight\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
1246-
" (\"dialogue_injection_devmode\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
1247-
" (\"dialogue_injection_continuation\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 0 }),\n",
1248-
" (\"ethical_compliance\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
1249-
" (\"harmbench\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 0 }),\n",
1250-
" (\"linguistic_evasion\", { \"num_attempts\": 0 }),\n",
1251-
" (\"logical_inconsistencies\", { \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
1244+
" (\"dan\", { \"language\": \"any\", \"num_attempts\": 3 }),\n",
1245+
" (\"deceptive_delight\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
1246+
" (\"dialogue_injection_devmode\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
1247+
" (\"dialogue_injection_continuation\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 3 }),\n",
1248+
" (\"ethical_compliance\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
1249+
" (\"harmbench\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 3 }),\n",
1250+
" (\"linguistic_evasion\", { \"num_attempts\": 3 }),\n",
1251+
" (\"linguistic_sandwich\", { \"custom_dataset\": None, \"num_attempts\": 3, \"num_translations\": 5 }),\n",
1252+
" (\"logical_inconsistencies\", { \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
12521253
" (\"pair\", {\n",
12531254
" \"custom_dataset\": None,\n",
12541255
" \"language\": \"any\",\n",
12551256
" \"multistage_depth\": 20,\n",
1256-
" \"num_attempts\": 0\n",
1257+
" \"num_attempts\": 3\n",
12571258
" }),\n",
12581259
" (\"shuffle\", {\n",
12591260
" \"custom_dataset\": None,\n",
12601261
" \"language\": \"any\",\n",
1261-
" \"num_attempts\": 0,\n",
1262+
" \"num_attempts\": 3,\n",
12621263
" \"num_transformations\": 5\n",
12631264
" }),\n",
1264-
" (\"suffix\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
1265-
" (\"sycophancy\", { \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
1266-
" (\"system_prompt_leakage\", { \"custom_dataset\": None, \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
1265+
" (\"suffix\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
1266+
" (\"sycophancy\", { \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
1267+
" (\"system_prompt_leakage\", { \"custom_dataset\": None, \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
12671268
" (\"time_machine\", {\n",
12681269
" \"custom_dataset\": None,\n",
12691270
" \"language\": \"any\",\n",
1270-
" \"num_attempts\": 0,\n",
1271+
" \"num_attempts\": 3,\n",
12711272
" \"time_context\": \"any\"\n",
12721273
" }),\n",
1273-
" (\"ucar\", { \"language\": \"any\", \"num_attempts\": 0 }),\n",
1274+
" (\"ucar\", { \"language\": \"any\", \"num_attempts\": 3 }),\n",
12741275
" (\"vlm_lowres_docs\", {\n",
12751276
" \"custom_pdf_dir\": None,\n",
12761277
" \"is_long_pdf\": False,\n",
1277-
" \"num_attempts\": 0,\n",
1278+
" \"num_attempts\": 3,\n",
12781279
" \"overwrite_existing_pdfs\": False,\n",
12791280
" \"rescale\": 0.25\n",
12801281
" }),\n",
@@ -1283,14 +1284,14 @@
12831284
" \"attack_source\": \"parquet\",\n",
12841285
" \"dataset\": \"bigscale_100\",\n",
12851286
" \"dataset_variations\": None,\n",
1286-
" \"num_attempts\": 0\n",
1287+
" \"num_attempts\": 3\n",
12871288
" }),\n",
1288-
" (\"vlm_text_hallucination\", { \"attack_types\": None, \"num_attempts\": 0 }),\n",
1289+
" (\"vlm_text_hallucination\", { \"attack_types\": None, \"num_attempts\": 3 }),\n",
12891290
"]\n"
12901291
]
12911292
}
12921293
],
1293-
"execution_count": 3
1294+
"execution_count": 2
12941295
},
12951296
{
12961297
"cell_type": "code",

examples/llamator-selenium.ipynb

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,15 @@
5353
"cell_type": "code",
5454
"metadata": {
5555
"ExecuteTime": {
56-
"end_time": "2025-07-10T20:48:12.441078Z",
57-
"start_time": "2025-07-10T20:48:12.437974Z"
56+
"end_time": "2025-07-15T10:51:35.104571Z",
57+
"start_time": "2025-07-15T10:51:29.231950Z"
5858
}
5959
},
6060
"source": [
6161
"import llamator"
6262
],
6363
"outputs": [],
64-
"execution_count": 2
64+
"execution_count": 1
6565
},
6666
{
6767
"cell_type": "code",
@@ -247,8 +247,8 @@
247247
{
248248
"metadata": {
249249
"ExecuteTime": {
250-
"end_time": "2025-07-10T20:48:21.359288Z",
251-
"start_time": "2025-07-10T20:48:21.353510Z"
250+
"end_time": "2025-07-15T10:51:40.094148Z",
251+
"start_time": "2025-07-15T10:51:40.087442Z"
252252
}
253253
},
254254
"cell_type": "code",
@@ -260,62 +260,63 @@
260260
"text": [
261261
"# Example configuration for preset 'all':\n",
262262
"basic_tests = [\n",
263-
" (\"aim_jailbreak\", { \"num_attempts\": 0 }),\n",
263+
" (\"aim_jailbreak\", { \"num_attempts\": 3 }),\n",
264264
" (\"autodan_turbo\", {\n",
265265
" \"custom_dataset\": None,\n",
266266
" \"language\": \"any\",\n",
267267
" \"multistage_depth\": 10,\n",
268-
" \"num_attempts\": 0,\n",
268+
" \"num_attempts\": 3,\n",
269269
" \"strategy_library_size\": 10\n",
270270
" }),\n",
271-
" (\"base64_injection\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
271+
" (\"base64_injection\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
272272
" (\"bon\", {\n",
273273
" \"custom_dataset\": None,\n",
274274
" \"language\": \"any\",\n",
275-
" \"num_attempts\": 0,\n",
275+
" \"num_attempts\": 3,\n",
276276
" \"num_transformations\": 5,\n",
277277
" \"sigma\": 0.4\n",
278278
" }),\n",
279279
" (\"crescendo\", {\n",
280280
" \"custom_dataset\": None,\n",
281281
" \"language\": \"any\",\n",
282282
" \"multistage_depth\": 5,\n",
283-
" \"num_attempts\": 0\n",
283+
" \"num_attempts\": 3\n",
284284
" }),\n",
285-
" (\"dan\", { \"language\": \"any\", \"num_attempts\": 0 }),\n",
286-
" (\"deceptive_delight\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
287-
" (\"dialogue_injection_devmode\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
288-
" (\"dialogue_injection_continuation\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 0 }),\n",
289-
" (\"ethical_compliance\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
290-
" (\"harmbench\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 0 }),\n",
291-
" (\"linguistic_evasion\", { \"num_attempts\": 0 }),\n",
292-
" (\"logical_inconsistencies\", { \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
285+
" (\"dan\", { \"language\": \"any\", \"num_attempts\": 3 }),\n",
286+
" (\"deceptive_delight\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
287+
" (\"dialogue_injection_devmode\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
288+
" (\"dialogue_injection_continuation\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 3 }),\n",
289+
" (\"ethical_compliance\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
290+
" (\"harmbench\", { \"custom_dataset\": None, \"language\": \"any\", \"num_attempts\": 3 }),\n",
291+
" (\"linguistic_evasion\", { \"num_attempts\": 3 }),\n",
292+
" (\"linguistic_sandwich\", { \"custom_dataset\": None, \"num_attempts\": 3, \"num_translations\": 5 }),\n",
293+
" (\"logical_inconsistencies\", { \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
293294
" (\"pair\", {\n",
294295
" \"custom_dataset\": None,\n",
295296
" \"language\": \"any\",\n",
296297
" \"multistage_depth\": 20,\n",
297-
" \"num_attempts\": 0\n",
298+
" \"num_attempts\": 3\n",
298299
" }),\n",
299300
" (\"shuffle\", {\n",
300301
" \"custom_dataset\": None,\n",
301302
" \"language\": \"any\",\n",
302-
" \"num_attempts\": 0,\n",
303+
" \"num_attempts\": 3,\n",
303304
" \"num_transformations\": 5\n",
304305
" }),\n",
305-
" (\"suffix\", { \"custom_dataset\": None, \"num_attempts\": 0 }),\n",
306-
" (\"sycophancy\", { \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
307-
" (\"system_prompt_leakage\", { \"custom_dataset\": None, \"multistage_depth\": 20, \"num_attempts\": 0 }),\n",
306+
" (\"suffix\", { \"custom_dataset\": None, \"num_attempts\": 3 }),\n",
307+
" (\"sycophancy\", { \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
308+
" (\"system_prompt_leakage\", { \"custom_dataset\": None, \"multistage_depth\": 20, \"num_attempts\": 3 }),\n",
308309
" (\"time_machine\", {\n",
309310
" \"custom_dataset\": None,\n",
310311
" \"language\": \"any\",\n",
311-
" \"num_attempts\": 0,\n",
312+
" \"num_attempts\": 3,\n",
312313
" \"time_context\": \"any\"\n",
313314
" }),\n",
314-
" (\"ucar\", { \"language\": \"any\", \"num_attempts\": 0 }),\n",
315+
" (\"ucar\", { \"language\": \"any\", \"num_attempts\": 3 }),\n",
315316
" (\"vlm_lowres_docs\", {\n",
316317
" \"custom_pdf_dir\": None,\n",
317318
" \"is_long_pdf\": False,\n",
318-
" \"num_attempts\": 0,\n",
319+
" \"num_attempts\": 3,\n",
319320
" \"overwrite_existing_pdfs\": False,\n",
320321
" \"rescale\": 0.25\n",
321322
" }),\n",
@@ -324,14 +325,14 @@
324325
" \"attack_source\": \"parquet\",\n",
325326
" \"dataset\": \"bigscale_100\",\n",
326327
" \"dataset_variations\": None,\n",
327-
" \"num_attempts\": 0\n",
328+
" \"num_attempts\": 3\n",
328329
" }),\n",
329-
" (\"vlm_text_hallucination\", { \"attack_types\": None, \"num_attempts\": 0 }),\n",
330+
" (\"vlm_text_hallucination\", { \"attack_types\": None, \"num_attempts\": 3 }),\n",
330331
"]\n"
331332
]
332333
}
333334
],
334-
"execution_count": 3
335+
"execution_count": 2
335336
},
336337
{
337338
"cell_type": "code",

0 commit comments

Comments
 (0)