Skip to content

Commit 7b44a2f

Browse files
committed
feat(add csv utils):
1 parent e3c3119 commit 7b44a2f

File tree

1 file changed

+46
-0
lines changed

1 file changed

+46
-0
lines changed

agentic_security/probe_data/data.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ def load_jailbreak_v28k() -> ProbeDataset:
245245
return create_probe_dataset("JailbreakV-28K/JailBreakV-28k", [])
246246

247247

248+
@cache_to_disk()
248249
def load_local_csv() -> ProbeDataset:
249250
"""Load prompts from local CSV files."""
250251
csv_files = [f for f in os.listdir(".") if f.endswith(".csv")]
@@ -264,6 +265,44 @@ def load_local_csv() -> ProbeDataset:
264265
return create_probe_dataset("Local CSV", prompts, {"src": str(csv_files)})
265266

266267

268+
@cache_to_disk()
269+
def load_csv(file: str) -> ProbeDataset:
270+
"""Load prompts from local CSV files."""
271+
prompts = []
272+
try:
273+
df = pd.read_csv(file)
274+
prompts = df["prompt"].tolist()
275+
if "prompt" in df.columns:
276+
prompts.extend(df["prompt"].tolist())
277+
else:
278+
logger.warning(f"File {file} lacks a suitable prompt column")
279+
except Exception as e:
280+
logger.error(f"Error reading {file}: {e}")
281+
return create_probe_dataset(f"fs://{file}", prompts, {"src": str(file)})
282+
283+
284+
@cache_to_disk()
285+
def load_local_csv_files() -> list[ProbeDataset]:
286+
"""Load prompts from local CSV files and return a list of ProbeDataset objects."""
287+
csv_files = [f for f in os.listdir(".") if f.endswith(".csv")]
288+
logger.info(f"Found {len(csv_files)} CSV files: {csv_files}")
289+
290+
datasets = []
291+
292+
for file in csv_files:
293+
try:
294+
df = pd.read_csv(file)
295+
if "prompt" in df.columns:
296+
prompts = df["prompt"].tolist()
297+
datasets.append(create_probe_dataset(file, prompts, {"src": file}))
298+
else:
299+
logger.warning(f"File {file} lacks a suitable prompt column")
300+
except Exception as e:
301+
logger.error(f"Error reading {file}: {e}")
302+
303+
return datasets
304+
305+
267306
# Stenography transformer
268307
class StenographyTransformer:
269308
"""Apply stenography transformations to datasets."""
@@ -435,4 +474,11 @@ def prepare_prompts(
435474
except Exception as e:
436475
logger.exception(f"Error loading dynamic {name}: {e}")
437476

477+
# Load csv datasets and apply transformations
478+
for name, opts in zip(dataset_names, options):
479+
if not name.endswith(".csv"):
480+
continue
481+
logger.info(f"Loading csv dataset {name} {opts}")
482+
datasets.append(load_csv(name))
483+
438484
return datasets

0 commit comments

Comments
 (0)