Azure
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py‎
Lines changed: 10 additions & 5 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/tests/unittests/test_direct_attack_simulator.py‎
Lines changed: 183 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/tests/unittests/test_direct_attack_simulator.py‎
Lines changed: 183 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/tests/unittests/test_safety_evaluation.py‎
Lines changed: 86 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/tests/unittests/test_safety_evaluation.py‎
Lines changed: 86 additions & 0 deletions
@@ -134,9 +134,9 @@ async def __call__(
         :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
             Defaults to 3.
         :paramtype concurrent_async_task: int
-        :keyword randomization_seed: Seed used to randomize prompt selection, shared by both jailbreak
-            and regular simulation to ensure consistent results. If not provided, a random seed will be generated
-            and shared between simulations.
+        :keyword randomization_seed: Seed used to randomize prompt selection. This seed is used to derive
+            different but deterministic seeds for regular and jailbreak simulations to ensure consistent 
+            results while avoiding duplicate queries. If not provided, a random seed will be generated.
         :paramtype randomization_seed: Optional[int]
         :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
 
@@ -201,6 +201,11 @@ async def __call__(
         if not randomization_seed:
             randomization_seed = randint(0, 1000000)
 
+        # Derive different seeds for regular and jailbreak simulations to avoid duplicate queries
+        # This ensures deterministic behavior while preventing identical results
+        regular_seed = randomization_seed
+        jailbreak_seed = randomization_seed + 1 if randomization_seed < 999999 else randomization_seed - 1
+
         regular_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
         regular_sim_results = await regular_sim(
             scenario=scenario,
@@ -212,7 +217,7 @@ async def __call__(
             api_call_delay_sec=api_call_delay_sec,
             concurrent_async_task=concurrent_async_task,
             randomize_order=False,
-            randomization_seed=randomization_seed,
+            randomization_seed=regular_seed,
         )
         jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
         jb_sim_results = await jb_sim(
@@ -226,6 +231,6 @@ async def __call__(
             concurrent_async_task=concurrent_async_task,
             _jailbreak_type="upia",
             randomize_order=False,
-            randomization_seed=randomization_seed,
+            randomization_seed=jailbreak_seed,
         )
         return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
@@ -0,0 +1,183 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from azure.ai.evaluation.simulator import DirectAttackSimulator, AdversarialScenario
+from azure.ai.evaluation.simulator._utils import JsonLineList
+from azure.core.credentials import TokenCredential
+
+
+@pytest.fixture
+def mock_credential():
+    return MagicMock(spec=TokenCredential)
+
+
+@pytest.fixture
+def mock_azure_ai_project():
+    return {
+        "subscription_id": "mock-sub",
+        "resource_group_name": "mock-rg",
+        "project_name": "mock-proj"
+    }
+
+
+@pytest.fixture
+def mock_target():
+    async def mock_target_fn(query: str) -> str:
+        return "mock response"
+    return mock_target_fn
+
+
+@pytest.mark.unittest
+class TestDirectAttackSimulator:
+    
+    @pytest.mark.asyncio
+    @patch("azure.ai.evaluation.simulator._direct_attack_simulator.DirectAttackSimulator._ensure_service_dependencies")
+    @patch("azure.ai.evaluation.simulator.AdversarialSimulator.__init__", return_value=None)
+    @patch("azure.ai.evaluation.simulator.AdversarialSimulator.__call__", new_callable=AsyncMock)
+    async def test_different_randomization_seeds_fix(
+        self, 
+        mock_adv_call, 
+        mock_adv_init, 
+        mock_ensure_deps,
+        mock_azure_ai_project, 
+        mock_credential, 
+        mock_target
+    ):
+        """Test that DirectAttackSimulator uses different seeds for regular and jailbreak simulations."""
+        
+        # Setup mock returns
+        mock_result = JsonLineList([
+            {"messages": [{"content": "test_query", "role": "user"}]}
+        ])
+        mock_adv_call.return_value = mock_result
+        
+        # Create DirectAttackSimulator
+        simulator = DirectAttackSimulator(
+            azure_ai_project=mock_azure_ai_project, 
+            credential=mock_credential
+        )
+        
+        # Call with fixed randomization seed
+        result = await simulator(
+            scenario=AdversarialScenario.ADVERSARIAL_QA,
+            target=mock_target,
+            max_simulation_results=3,
+            randomization_seed=42
+        )
+        
+        # Verify that AdversarialSimulator was called twice (regular and jailbreak)
+        assert mock_adv_call.call_count == 2
+        
+        # Extract the randomization_seed from each call
+        call_kwargs_list = [call[1] for call in mock_adv_call.call_args_list]
+        regular_seed = call_kwargs_list[0].get("randomization_seed")
+        jailbreak_seed = call_kwargs_list[1].get("randomization_seed")
+        
+        # The fix should ensure different seeds are used
+        assert regular_seed != jailbreak_seed, "Regular and jailbreak simulations should use different seeds"
+        assert regular_seed == 42, "Regular simulation should use the original seed"
+        assert jailbreak_seed == 43, "Jailbreak simulation should use derived seed (original + 1)"
+        
+        # Verify the structure of the result
+        assert "regular" in result
+        assert "jailbreak" in result
+        assert result["regular"] == mock_result
+        assert result["jailbreak"] == mock_result
+
+    @pytest.mark.asyncio
+    @patch("azure.ai.evaluation.simulator._direct_attack_simulator.DirectAttackSimulator._ensure_service_dependencies")
+    @patch("azure.ai.evaluation.simulator.AdversarialSimulator.__init__", return_value=None)
+    @patch("azure.ai.evaluation.simulator.AdversarialSimulator.__call__", new_callable=AsyncMock)
+    async def test_edge_case_max_seed_value(
+        self, 
+        mock_adv_call, 
+        mock_adv_init, 
+        mock_ensure_deps,
+        mock_azure_ai_project, 
+        mock_credential, 
+        mock_target
+    ):
+        """Test edge case when randomization_seed is at maximum value."""
+        
+        # Setup mock returns
+        mock_result = JsonLineList([
+            {"messages": [{"content": "test_query", "role": "user"}]}
+        ])
+        mock_adv_call.return_value = mock_result
+        
+        # Create DirectAttackSimulator
+        simulator = DirectAttackSimulator(
+            azure_ai_project=mock_azure_ai_project, 
+            credential=mock_credential
+        )
+        
+        # Call with max seed value
+        max_seed = 999999
+        result = await simulator(
+            scenario=AdversarialScenario.ADVERSARIAL_QA,
+            target=mock_target,
+            max_simulation_results=3,
+            randomization_seed=max_seed
+        )
+        
+        # Verify that AdversarialSimulator was called twice
+        assert mock_adv_call.call_count == 2
+        
+        # Extract the randomization_seed from each call
+        call_kwargs_list = [call[1] for call in mock_adv_call.call_args_list]
+        regular_seed = call_kwargs_list[0].get("randomization_seed")
+        jailbreak_seed = call_kwargs_list[1].get("randomization_seed")
+        
+        # When at max value, jailbreak seed should be original - 1
+        assert regular_seed != jailbreak_seed, "Seeds should still be different at max value"
+        assert regular_seed == max_seed, "Regular simulation should use the original max seed"
+        assert jailbreak_seed == max_seed - 1, "Jailbreak simulation should use max seed - 1"
+
+    @pytest.mark.asyncio
+    @patch("azure.ai.evaluation.simulator._direct_attack_simulator.DirectAttackSimulator._ensure_service_dependencies")
+    @patch("azure.ai.evaluation.simulator.AdversarialSimulator.__init__", return_value=None)
+    @patch("azure.ai.evaluation.simulator.AdversarialSimulator.__call__", new_callable=AsyncMock)
+    async def test_no_seed_provided_generates_different_seeds(
+        self, 
+        mock_adv_call, 
+        mock_adv_init, 
+        mock_ensure_deps,
+        mock_azure_ai_project, 
+        mock_credential, 
+        mock_target
+    ):
+        """Test that when no seed is provided, different seeds are still generated."""
+        
+        # Setup mock returns
+        mock_result = JsonLineList([
+            {"messages": [{"content": "test_query", "role": "user"}]}
+        ])
+        mock_adv_call.return_value = mock_result
+        
+        # Create DirectAttackSimulator
+        simulator = DirectAttackSimulator(
+            azure_ai_project=mock_azure_ai_project, 
+            credential=mock_credential
+        )
+        
+        # Call without providing randomization_seed (it will be generated randomly)
+        result = await simulator(
+            scenario=AdversarialScenario.ADVERSARIAL_QA,
+            target=mock_target,
+            max_simulation_results=3
+        )
+        
+        # Verify that AdversarialSimulator was called twice
+        assert mock_adv_call.call_count == 2
+        
+        # Extract the randomization_seed from each call
+        call_kwargs_list = [call[1] for call in mock_adv_call.call_args_list]
+        regular_seed = call_kwargs_list[0].get("randomization_seed")
+        jailbreak_seed = call_kwargs_list[1].get("randomization_seed")
+        
+        # Even with random generation, seeds should be different
+        assert regular_seed != jailbreak_seed, "Generated seeds should be different"
+        assert jailbreak_seed == regular_seed + 1 or jailbreak_seed == regular_seed - 1, "Jailbreak seed should be derived from regular seed"
@@ -395,3 +395,89 @@ def test_local_random_no_global_state_pollution(self):
         # Global state should be unchanged
         after_value = random.random()
         assert initial_value == after_value, "Local Random usage should not affect global state"
+
+    @pytest.mark.asyncio
+    @patch("azure.ai.evaluation.simulator.DirectAttackSimulator.__init__", return_value=None)
+    @patch("azure.ai.evaluation.simulator.AdversarialSimulator.__init__", return_value=None)
+    @patch("azure.ai.evaluation.simulator.AdversarialSimulator.__call__", new_callable=AsyncMock)
+    async def test_direct_attack_different_seeds_fix(self, mock_adv_call, mock_adv_init, mock_direct_init, safety_eval, mock_target):
+        """Test that DirectAttackSimulator uses different seeds for regular and jailbreak simulations."""
+        
+        # Mock AdversarialSimulator calls
+        mock_adv_call.return_value = JsonLineList([
+            {"messages": [{"content": "test_query", "role": "user"}]}
+        ])
+        
+        # Import and create DirectAttackSimulator manually to test the fix
+        from azure.ai.evaluation.simulator._direct_attack_simulator import DirectAttackSimulator
+        from azure.ai.evaluation.simulator import AdversarialScenario
+        
+        # Create a real DirectAttackSimulator instance with mocked dependencies
+        simulator = DirectAttackSimulator.__new__(DirectAttackSimulator)
+        simulator.azure_ai_project = {"test": "project"}
+        simulator.credential = MagicMock()
+        simulator.adversarial_template_handler = MagicMock()
+        
+        # Call the fixed method
+        result = await simulator.__call__(
+            scenario=AdversarialScenario.ADVERSARIAL_QA,
+            target=mock_target,
+            max_simulation_results=2,
+            randomization_seed=42
+        )
+        
+        # Verify that AdversarialSimulator was called twice (regular and jailbreak)
+        assert mock_adv_call.call_count == 2
+        
+        # Check that different seeds were used
+        call_kwargs_list = [call[1] for call in mock_adv_call.call_args_list]
+        regular_seed = call_kwargs_list[0].get("randomization_seed")
+        jailbreak_seed = call_kwargs_list[1].get("randomization_seed")
+        
+        # The fix should use different seeds
+        assert regular_seed != jailbreak_seed
+        assert regular_seed == 42  # Original seed for regular simulation
+        assert jailbreak_seed == 43  # Derived seed for jailbreak simulation
+        
+        # Verify the structure of the result
+        assert "regular" in result
+        assert "jailbreak" in result
+
+    @pytest.mark.asyncio
+    @patch("azure.ai.evaluation.simulator.DirectAttackSimulator.__init__", return_value=None)
+    @patch("azure.ai.evaluation.simulator.DirectAttackSimulator.__call__", new_callable=AsyncMock)
+    async def test_direct_attack_duplicate_queries_issue(self, mock_direct_call, mock_direct_init, safety_eval, mock_target):
+        """Test that DirectAttackSimulator doesn't produce duplicate queries when using same randomization_seed."""
+        
+        # Mock the DirectAttackSimulator to expose the issue
+        # Simulate what happens when both regular and jailbreak simulations use the same seed
+        mock_regular_results = [
+            {"messages": [{"content": "query_1", "role": "user"}]},
+            {"messages": [{"content": "query_2", "role": "user"}]},
+        ]
+        mock_jailbreak_results = [
+            {"messages": [{"content": "query_1", "role": "user"}]},  # Same as regular - this is the bug!
+            {"messages": [{"content": "query_2", "role": "user"}]},  # Same as regular - this is the bug!
+        ]
+        
+        mock_direct_call.return_value = {
+            "regular": JsonLineList(mock_regular_results),
+            "jailbreak": JsonLineList(mock_jailbreak_results)
+        }
+        
+        # Call safety evaluation with DIRECT_ATTACK
+        await safety_eval._simulate(
+            target=mock_target,
+            direct_attack=True,
+            adversarial_scenario=AdversarialScenario.ADVERSARIAL_QA,
+            max_simulation_results=2,
+            randomization_seed=42
+        )
+        
+        # Verify DirectAttackSimulator was called
+        mock_direct_call.assert_called_once()
+        call_kwargs = mock_direct_call.call_args[1]
+        
+        # The issue is that the same randomization_seed gets passed to both regular and jailbreak simulators
+        # This test documents the current problematic behavior
+        assert call_kwargs.get("randomization_seed") == 42