Skip to content

Commit 926c583

Browse files
committed
fix(csv ds loading):
1 parent 17e3435 commit 926c583

File tree

1 file changed

+17
-32
lines changed

1 file changed

+17
-32
lines changed

agentic_security/probe_data/data.py

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,20 @@ def load_jailbreak_v28k() -> ProbeDataset:
245245
return create_probe_dataset("JailbreakV-28K/JailBreakV-28k", [])
246246

247247

248-
@cache_to_disk()
248+
@cache_to_disk(1)
249+
def file_dataset(file) -> list[str]:
250+
prompts = []
251+
try:
252+
df = pd.read_csv(os.path.join("./datasets", file), encoding_errors="ignore")
253+
if "prompt" in df.columns:
254+
prompts = df["prompt"].tolist()
255+
else:
256+
logger.warning(f"File {file} lacks a suitable prompt column")
257+
except Exception as e:
258+
logger.error(f"Error reading {file}: {e}")
259+
return prompts
260+
261+
249262
def load_local_csv() -> ProbeDataset:
250263
"""Load prompts from local CSV files."""
251264
os.makedirs("./datasets", exist_ok=True)
@@ -254,35 +267,16 @@ def load_local_csv() -> ProbeDataset:
254267

255268
prompts = []
256269
for file in csv_files:
257-
try:
258-
df = pd.read_csv(os.path.join("./datasets", file), encoding_errors="ignore")
259-
if "prompt" in df.columns:
260-
prompts.extend(df["prompt"].tolist())
261-
else:
262-
logger.warning(f"File {file} lacks a suitable prompt column")
263-
except Exception as e:
264-
logger.error(f"Error reading {file}: {e}")
265-
270+
prompts.extend(file_dataset(file))
266271
return create_probe_dataset("Local CSV", prompts, {"src": str(csv_files)})
267272

268273

269-
@cache_to_disk(1)
270274
def load_csv(file: str) -> ProbeDataset:
271275
"""Load prompts from local CSV files."""
272-
prompts = []
273-
try:
274-
df = pd.read_csv(os.path.join("./datasets", file), encoding_errors="ignore")
275-
prompts = df["prompt"].tolist()
276-
if "prompt" in df.columns:
277-
prompts.extend(df["prompt"].tolist())
278-
else:
279-
logger.warning(f"File {file} lacks a suitable prompt column")
280-
except Exception as e:
281-
logger.error(f"Error reading {file}: {e}")
276+
prompts = file_dataset(file)
282277
return create_probe_dataset(f"fs://{file}", prompts, {"src": str(file)})
283278

284279

285-
@cache_to_disk(1)
286280
def load_local_csv_files() -> list[ProbeDataset]:
287281
"""Load prompts from local CSV files and return a list of ProbeDataset objects."""
288282
csv_files = [f for f in os.listdir("./datasets") if f.endswith(".csv")]
@@ -291,16 +285,7 @@ def load_local_csv_files() -> list[ProbeDataset]:
291285
datasets = []
292286

293287
for file in csv_files:
294-
try:
295-
df = pd.read_csv(os.path.join("./datasets", file), encoding_errors="ignore")
296-
if "prompt" in df.columns:
297-
prompts = df["prompt"].tolist()
298-
datasets.append(create_probe_dataset(file, prompts, {"src": file}))
299-
else:
300-
logger.warning(f"File {file} lacks a suitable prompt column")
301-
except Exception as e:
302-
logger.error(f"Error reading {file}: {e}")
303-
288+
datasets.append(create_probe_dataset(file, file_dataset(file), {"src": file}))
304289
return datasets
305290

306291

0 commit comments

Comments
 (0)