Azure · AdrGav941 · Dec 29, 2025 · Dec 29, 2025 · Dec 29, 2025 · Dec 29, 2025
diff --git a/pyrit/datasets/seed_datasets/remote/__init__.py b/pyrit/datasets/seed_datasets/remote/__init__.py
@@ -16,6 +16,8 @@
 from pyrit.datasets.seed_datasets.remote.forbidden_questions_dataset import _ForbiddenQuestionsDataset  # noqa: F401
 from pyrit.datasets.seed_datasets.remote.harmbench_dataset import _HarmBenchDataset  # noqa: F401
 from pyrit.datasets.seed_datasets.remote.harmbench_multimodal_dataset import _HarmBenchMultimodalDataset  # noqa: F401
+from pyrit.datasets.seed_datasets.remote.jailbreakv_28k_dataset import _JailbreakV28KDataset  # noqa: F401
+from pyrit.datasets.seed_datasets.remote.jailbreakv_redteam_2k_dataset import _JailbreakVRedteam2KDataset  # noqa: F401
 from pyrit.datasets.seed_datasets.remote.jbb_behaviors_dataset import _JBBBehaviorsDataset  # noqa: F401
 from pyrit.datasets.seed_datasets.remote.librai_do_not_answer_dataset import _LibrAIDoNotAnswerDataset  # noqa: F401
 from pyrit.datasets.seed_datasets.remote.llm_latent_adversarial_training_dataset import (  # noqa: F401

diff --git a/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py b/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py
@@ -0,0 +1,333 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+import pathlib
+import uuid
+import zipfile
+from enum import Enum
+from typing import Dict, List, Literal, Optional
+
+from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
+    _RemoteDatasetLoader,
+)
+from pyrit.models import SeedDataset, SeedObjective, SeedPrompt
+
+logger = logging.getLogger(__name__)
+
+
+class _HarmCategory(Enum):
+    """Harm categories in the JailBreakV-28K dataset."""
+
+    UNETHICAL_BEHAVIOR = "Unethical Behavior"
+    ECONOMIC_HARM = "Economic Harm"
+    HATE_SPEECH = "Hate Speech"
+    GOVERNMENT_DECISION = "Government Decision"
+    PHYSICAL_HARM = "Physical Harm"
+    FRAUD = "Fraud"
+    POLITICAL_SENSITIVITY = "Political Sensitivity"
+    MALWARE = "Malware"
+    ILLEGAL_ACTIVITY = "Illegal Activity"
+    BIAS = "Bias"
+    VIOLENCE = "Violence"
+    ANIMAL_ABUSE = "Animal Abuse"
+    TAILORED_UNLICENSED_ADVICE = "Tailored Unlicensed Advice"
+    PRIVACY_VIOLATION = "Privacy Violation"
+    HEALTH_CONSULTATION = "Health Consultation"
+    CHILD_ABUSE_CONTENT = "Child Abuse Content"
+
+
+class _JailbreakV28KDataset(_RemoteDatasetLoader):
+    """
+    Loader for the JailBreakV-28K multimodal dataset.
+
+    The JailBreakV-28K dataset is a benchmark for assessing the robustness of
+    multimodal large language models against jailbreak attacks. Each example consists
+    of an image and a text query, linked by the same prompt_group_id.
+
+    Note: Most images are not available on HuggingFace. You must download the full image
+    set from Google Drive by filling out the form at:
+    https://docs.google.com/forms/d/e/1FAIpQLSc_p1kCs3p9z-3FbtSeF7uLYsiQk0tvsGi6F0e_z5xCEmN1gQ/viewform
+
+    Reference: https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k
+    Paper: https://arxiv.org/abs/2404.03027
+    Authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo
+    License: MIT
+
+    Warning: Due to the nature of these prompts, consult your legal department
+    before testing them with LLMs to ensure compliance and reduce potential risks.
+    """
+
+    def __init__(
+        self,
+        *,
+        source: str = "JailbreakV-28K/JailBreakV-28k",
+        zip_dir: str = str(pathlib.Path.home()),
+        split: Literal["JailBreakV_28K", "mini_JailBreakV_28K"] = "mini_JailBreakV_28K",
+        harm_categories: Optional[List[_HarmCategory]] = None,
+    ) -> None:
+        """
+        Initialize the JailBreakV-28K dataset loader.
+
+        Args:
+            source: HuggingFace dataset identifier. Defaults to "JailbreakV-28K/JailBreakV-28k".
+            zip_dir: Directory containing the JailBreakV_28K.zip file with images.
+                Defaults to home directory.
+            split: Dataset split to load. Defaults to "mini_JailBreakV_28K".
+                Options are "JailBreakV_28K" and "mini_JailBreakV_28K".
+            harm_categories: List of harm categories to filter examples.
+                If None, all categories are included (default).
+
+        Raises:
+            ValueError: If any of the specified harm categories are invalid.
+        """
+        self.source = source
+        self.zip_dir = pathlib.Path(zip_dir)
+        self.split = split
+        self.harm_categories = harm_categories
+
+        # Validate harm categories if provided
+        if harm_categories is not None:
+            valid_categories = {category.value for category in _HarmCategory}
+            invalid_categories = (
+                set(cat.value if isinstance(cat, _HarmCategory) else cat for cat in harm_categories) - valid_categories
+            )
+            if invalid_categories:
+                raise ValueError(f"Invalid harm categories: {', '.join(invalid_categories)}")
+
+    @property
+    def dataset_name(self) -> str:
+        """Return the dataset name."""
+        return "jailbreakv_28k"
+
+    async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
+        """
+        Fetch JailBreakV-28K dataset and return as SeedDataset.
+
+        The dataset contains both image and text prompts linked by prompt_group_id.
+        You can extract the grouped prompts using the group_seed_prompts_by_prompt_group_id method.
+
+        Args:
+            cache: Whether to cache the fetched dataset. Defaults to True.
+
+        Returns:
+            SeedDataset: A SeedDataset containing the multimodal examples.
+
+        Raises:
+            FileNotFoundError: If the required ZIP file is not found.
+            ValueError: If the number of prompts is below the minimum threshold.
+            Exception: If the dataset cannot be loaded or processed.
+        """
+        # Extract images from ZIP if needed
+        zip_file_path = self.zip_dir / "JailBreakV_28K.zip"
+        zip_extracted_path = self.zip_dir / "JailBreakV_28k"
+
+        if not zip_file_path.exists():
+            raise FileNotFoundError(
+                f"ZIP file not found at {zip_file_path}. "
+                "Please download images from Google Drive using the form at: "
+                "https://docs.google.com/forms/d/e/1FAIpQLSc_p1kCs3p9z-3FbtSeF7uLYsiQk0tvsGi6F0e_z5xCEmN1gQ/viewform"
+            )
+
+        # Only unzip if the target directory does not already exist
+        if not zip_extracted_path.exists():
+            logger.info(f"Extracting {zip_file_path} to {self.zip_dir}")
+            with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+                zip_ref.extractall(self.zip_dir)
+
+        try:
+            logger.info(f"Loading JailBreakV-28K dataset from {self.source}")
+
+            # Load dataset from HuggingFace using the helper method
+            data = await self._fetch_from_huggingface(
+                dataset_name=self.source,
+                config="JailBreakV_28K",
+                split=self.split,
+                cache=cache,
+            )
+
+            # Normalize the harm categories for filtering
+            harm_categories_normalized = (
+                None
+                if self.harm_categories is None
+                else [self._normalize_policy(cat.value) for cat in self.harm_categories]
+            )
+
+            seed_prompts = []
+            missing_images = 0
+            total_items_processed = 0
+            per_call_cache: Dict[str, str] = {}
+
+            for item in data:
+                policy = self._normalize_policy(item.get("policy", ""))
+
+                # Skip if user requested policy filter and item's policy does not match
+                if harm_categories_normalized is not None and policy not in harm_categories_normalized:
+                    continue
+
+                # Count items that pass the filter
+                total_items_processed += 1
+
+                image_rel_path = item.get("image_path", "")
+                if not image_rel_path:
+                    missing_images += 1
+                    continue
+
+                image_abs_path = self._resolve_image_path(
+                    rel_path=image_rel_path,
+                    local_directory=zip_extracted_path,
+                    call_cache=per_call_cache,
+                )
+
+                if not image_abs_path:
+                    missing_images += 1
+                    continue
+
+                # Create linked text and image prompts
+                group_id = uuid.uuid4()
+
+                seed_objective = SeedObjective(
+                    value=item.get("redteam_query", ""),
+                    data_type="text",
+                    name="JailBreakV-28K",
+                    dataset_name=self.dataset_name,
+                    harm_categories=[policy],
+                    description=(
+                        "Benchmark for Assessing the Robustness of "
+                        "Multimodal Large Language Models against Jailbreak Attacks."
+                    ),
+                    authors=["Weidi Luo", "Siyuan Ma", "Xiaogeng Liu", "Chaowei Xiao", "Xiaoyu Guo"],
+                    groups=["The Ohio State University", "Peking University", "University of Wisconsin-Madison"],
+                    source="https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k",
+                    prompt_group_id=group_id,
+                )
+
+                text_seed_prompt = SeedPrompt(
+                    value=item.get("jailbreak_query", ""),
+                    data_type="text",
+                    name="JailBreakV-28K",
+                    dataset_name=self.dataset_name,
+                    harm_categories=[policy],
+                    description=(
+                        "Benchmark for Assessing the Robustness of "
+                        "Multimodal Large Language Models against Jailbreak Attacks."
+                    ),
+                    authors=["Weidi Luo", "Siyuan Ma", "Xiaogeng Liu", "Chaowei Xiao", "Xiaoyu Guo"],
+                    groups=["The Ohio State University", "Peking University", "University of Wisconsin-Madison"],
+                    source="https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k",
+                    prompt_group_id=group_id,
+                    sequence=0,
+                )
+
+                image_seed_prompt = SeedPrompt(
+                    value=image_abs_path,
+                    data_type="image_path",
+                    name="JailBreakV-28K",
+                    dataset_name=self.dataset_name,
+                    harm_categories=[policy],
+                    description=(
+                        "Benchmark for Assessing the Robustness of "
+                        "Multimodal Large Language Models against Jailbreak Attacks."
+                    ),
+                    authors=["Weidi Luo", "Siyuan Ma", "Xiaogeng Liu", "Chaowei Xiao", "Xiaoyu Guo"],
+                    groups=["The Ohio State University", "Peking University", "University of Wisconsin-Madison"],
+                    source="https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k",
+                    prompt_group_id=group_id,
+                    sequence=0,
+                )
+
+                seed_prompts.append(seed_objective)
+                seed_prompts.append(text_seed_prompt)
+                seed_prompts.append(image_seed_prompt)
+
+        except Exception as e:
+            logger.error(f"Failed to load JailBreakV-28K dataset: {str(e)}")
+            raise
+
+        # Validation: Check if 50% or more of the responses are unpaired
+        if total_items_processed == 0:
+            raise ValueError(
+                "JailBreakV-28K fetch produced 0 items after filtering. "
+                "Try adjusting your harm_categories filter or check the dataset source."
+            )
+
+        successful_pairs = len(seed_prompts) // 3  # Each pair has objective + text + image
+        unpaired_percentage = (missing_images / total_items_processed) * 100
+
+        if unpaired_percentage >= 50:
+            raise ValueError(
+                f"JailBreakV-28K fetch failed: {unpaired_percentage:.1f}% of items are missing images "
+                f"({missing_images} out of {total_items_processed} items processed). "
+                f"Only {successful_pairs} valid pairs were created. "
+                f"At least 50% of items must have valid images. "
+                f"Please ensure the ZIP file contains the full image set."
+            )
+
+        if missing_images > 0:
+            logger.warning(
+                f"Failed to resolve {missing_images} image paths in JailBreakV-28K dataset "
+                f"({unpaired_percentage:.1f}% unpaired)"
+            )
+
+        logger.info(
+            f"Successfully loaded {successful_pairs} multimodal pairs "
+            f"({len(seed_prompts)} total prompts) from JailBreakV-28K dataset"
+        )
+
+        return SeedDataset(seeds=seed_prompts, dataset_name=self.dataset_name)
+
+    def _normalize_policy(self, policy: str) -> str:
+        """
+        Create a machine-friendly variant of the policy category.
+
+        Args:
+            policy: The human-readable policy category.
+
+        Returns:
+            str: The normalized policy category.
+        """
+        return policy.strip().lower().replace(" ", "_").replace("-", "_")
+
+    def _resolve_image_path(
+        self,
+        *,
+        rel_path: str,
+        local_directory: pathlib.Path,
+        call_cache: Dict[str, str],
+    ) -> str:
+        """
+        Resolve a repository-relative image path to a local absolute path.
+
+        Uses a cache to avoid re-checking the same file multiple times.
+
+        Args:
+            rel_path: Path relative to the dataset repository root (e.g., "images/0001.png").
+            local_directory: Directory to search for the image.
+            call_cache: Cache dictionary to store resolved paths.
+
+        Returns:
+            str: Absolute local path if resolved, else empty string.
+        """
+        if not rel_path:
+            return ""
+
+        # Check if image has already been cached
+        if rel_path in call_cache:
+            return call_cache[rel_path]
+
+        image_path = local_directory / rel_path
+
+        try:
+            if image_path.exists():
+                abs_path = str(image_path)
+            else:
+                logger.debug(f"File {image_path} not found in {local_directory}")
+                abs_path = ""
+
+            call_cache[rel_path] = abs_path
+            return abs_path
+
+        except Exception as e:
+            logger.error(f"Failed to resolve image path {rel_path}: {str(e)}")
+            call_cache[rel_path] = ""
+            return ""