@@ -245,7 +245,20 @@ def load_jailbreak_v28k() -> ProbeDataset:
245245 return create_probe_dataset ("JailbreakV-28K/JailBreakV-28k" , [])
246246
247247
248- @cache_to_disk ()
248+ @cache_to_disk (1 )
249+ def file_dataset (file ) -> list [str ]:
250+ prompts = []
251+ try :
252+ df = pd .read_csv (os .path .join ("./datasets" , file ), encoding_errors = "ignore" )
253+ if "prompt" in df .columns :
254+ prompts = df ["prompt" ].tolist ()
255+ else :
256+ logger .warning (f"File { file } lacks a suitable prompt column" )
257+ except Exception as e :
258+ logger .error (f"Error reading { file } : { e } " )
259+ return prompts
260+
261+
249262def load_local_csv () -> ProbeDataset :
250263 """Load prompts from local CSV files."""
251264 os .makedirs ("./datasets" , exist_ok = True )
@@ -254,35 +267,16 @@ def load_local_csv() -> ProbeDataset:
254267
255268 prompts = []
256269 for file in csv_files :
257- try :
258- df = pd .read_csv (os .path .join ("./datasets" , file ), encoding_errors = "ignore" )
259- if "prompt" in df .columns :
260- prompts .extend (df ["prompt" ].tolist ())
261- else :
262- logger .warning (f"File { file } lacks a suitable prompt column" )
263- except Exception as e :
264- logger .error (f"Error reading { file } : { e } " )
265-
270+ prompts .extend (file_dataset (file ))
266271 return create_probe_dataset ("Local CSV" , prompts , {"src" : str (csv_files )})
267272
268273
269- @cache_to_disk (1 )
270274def load_csv (file : str ) -> ProbeDataset :
271275 """Load prompts from local CSV files."""
272- prompts = []
273- try :
274- df = pd .read_csv (os .path .join ("./datasets" , file ), encoding_errors = "ignore" )
275- prompts = df ["prompt" ].tolist ()
276- if "prompt" in df .columns :
277- prompts .extend (df ["prompt" ].tolist ())
278- else :
279- logger .warning (f"File { file } lacks a suitable prompt column" )
280- except Exception as e :
281- logger .error (f"Error reading { file } : { e } " )
276+ prompts = file_dataset (file )
282277 return create_probe_dataset (f"fs://{ file } " , prompts , {"src" : str (file )})
283278
284279
285- @cache_to_disk (1 )
286280def load_local_csv_files () -> list [ProbeDataset ]:
287281 """Load prompts from local CSV files and return a list of ProbeDataset objects."""
288282 csv_files = [f for f in os .listdir ("./datasets" ) if f .endswith (".csv" )]
@@ -291,16 +285,7 @@ def load_local_csv_files() -> list[ProbeDataset]:
291285 datasets = []
292286
293287 for file in csv_files :
294- try :
295- df = pd .read_csv (os .path .join ("./datasets" , file ), encoding_errors = "ignore" )
296- if "prompt" in df .columns :
297- prompts = df ["prompt" ].tolist ()
298- datasets .append (create_probe_dataset (file , prompts , {"src" : file }))
299- else :
300- logger .warning (f"File { file } lacks a suitable prompt column" )
301- except Exception as e :
302- logger .error (f"Error reading { file } : { e } " )
303-
288+ datasets .append (create_probe_dataset (file , file_dataset (file ), {"src" : file }))
304289 return datasets
305290
306291
0 commit comments