Groundedness detection support in Non adversarial simulator (Azure#38087)

nagkumar91 · Nagkumar Arkalgud · Nagkumar Arkalgud · web-flow · commit 3607859af0e6 · 2024-10-25T11:40:30.000-04:00
* Update task_query_response.prompty

remove required keys

* Update task_simulate.prompty

* Update task_query_response.prompty

* Update task_simulate.prompty

* Fix the api_key needed

* Update for release

* Black fix for file

* Add original text in global context

* Update test

* Update the indirect attack simulator

* Black suggested fixes

* Update simulator prompty

* Update adversarial scenario enum to exclude XPIA

* Update changelog

* Black fixes

* Remove duplicate import

* Fix the mypy error

* Mypy please be happy

* Updates to non adv simulator

* accept context from assistant messages, exclude them when using them for conversation

* update changelog

* pylint fixes

* pylint fixes

* remove redundant quotes

* Fix typo

* pylint fix

* Update broken tests

* Non adv simulator accepts object for conversation starter

* Fix warning message being displayed incoorectly

* Adding the grounding json in package resource and update changelog

* Fix typo

* Added the grouding file

* Update test, add concurrent async runs to conversation_starter based simualtor

* Add grounding json to cspell ignore

---------

Co-authored-by: Nagkumar Arkalgud &lt;nagkumar@naarkalg-work-mac.local&gt;
Co-authored-by: Nagkumar Arkalgud &lt;nagkumar@naarkalgworkmac.lan&gt;
Co-authored-by: Nagkumar Arkalgud &lt;nagkumar@Mac.lan&gt;
diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -4,6 +4,24 @@
 ## 1.0.0b5 (Unreleased)
 
 ### Features Added
+- Groundedness detection in Non Adversarial Simulator via query/context pairs
+```python
+import importlib.resources as pkg_resources
+package = "azure.ai.evaluation.simulator._data_sources"
+resource_name = "grounding.json"
+custom_simulator = Simulator(model_config=model_config)
+conversation_turns = []
+with pkg_resources.path(package, resource_name) as grounding_file:
+    with open(grounding_file, "r") as file:
+        data = json.load(file)
+for item in data:
+    conversation_turns.append([item])
+outputs = asyncio.run(custom_simulator(
+    target=callback,
+    conversation_turns=conversation_turns,
+    max_conversation_turns=1,
+))
+```
 
 ### Breaking Changes
 - Renamed environment variable `PF_EVALS_BATCH_USE_ASYNC` to `AI_EVALS_BATCH_USE_ASYNC`.
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/grounding.json b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/grounding.json
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py
@@ -92,7 +92,8 @@ async def __call__(
         api_call_delay_sec: float = 1,
         query_response_generating_prompty_kwargs: Dict[str, Any] = {},
         user_simulator_prompty_kwargs: Dict[str, Any] = {},
-        conversation_turns: List[List[str]] = [],
+        conversation_turns: List[List[Union[str, Dict[str, Any]]]] = [],
+        concurrent_async_tasks: int = 5,
         **kwargs,
     ) -> List[JsonLineChatProtocol]:
         """
@@ -119,7 +120,10 @@ async def __call__(
         :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
         :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
         :keyword conversation_turns: Predefined conversation turns to simulate.
-        :paramtype conversation_turns: List[List[str]]
+        :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]]
+        :keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation.
+            Defaults to 5.
+        :paramtype concurrent_async_tasks: int
         :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
         :rtype: List[JsonLineChatProtocol]
 
@@ -134,12 +138,12 @@ async def __call__(
         if conversation_turns and (text or tasks):
             raise ValueError("Cannot specify both conversation_turns and text/tasks")
 
-        if num_queries > len(tasks):
+        if text and num_queries > len(tasks):
             warnings.warn(
                 f"You have specified 'num_queries' > len('tasks') ({num_queries} > {len(tasks)}). "
                 f"All tasks will be used for generation and the remaining {num_queries - len(tasks)} lines will be simulated in task-free mode"
             )
-        elif num_queries < len(tasks):
+        elif text and num_queries < len(tasks):
             warnings.warn(
                 f"You have specified 'num_queries' < len('tasks') ({num_queries} < {len(tasks)}). "
                 f"Only the first {num_queries} lines of the specified tasks will be simulated."
@@ -157,6 +161,7 @@ async def __call__(
                 user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
                 api_call_delay_sec=api_call_delay_sec,
                 prompty_model_config=prompty_model_config,
+                concurrent_async_tasks=concurrent_async_tasks,
             )
 
         query_responses = await self._generate_query_responses(
@@ -183,11 +188,12 @@ async def _simulate_with_predefined_turns(
         *,
         target: Callable,
         max_conversation_turns: int,
-        conversation_turns: List[List[str]],
+        conversation_turns: List[List[Union[str, Dict[str, Any]]]],
         user_simulator_prompty: Optional[str],
         user_simulator_prompty_kwargs: Dict[str, Any],
         api_call_delay_sec: float,
         prompty_model_config: Any,
+        concurrent_async_tasks: int,
     ) -> List[JsonLineChatProtocol]:
         """
         Simulates conversations using predefined conversation turns.
@@ -197,7 +203,7 @@ async def _simulate_with_predefined_turns(
         :keyword max_conversation_turns: Maximum number of turns for the simulation.
         :paramtype max_conversation_turns: int
         :keyword conversation_turns: A list of predefined conversation turns.
-        :paramtype conversation_turns: List[List[str]]
+        :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]]
         :keyword user_simulator_prompty: Path to the user simulator prompty file.
         :paramtype user_simulator_prompty: Optional[str]
         :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
@@ -206,53 +212,68 @@ async def _simulate_with_predefined_turns(
         :paramtype api_call_delay_sec: float
         :keyword prompty_model_config: The configuration for the prompty model.
         :paramtype prompty_model_config: Any
+        :keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation.
+        :paramtype concurrent_async_tasks: int
         :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
         :rtype: List[JsonLineChatProtocol]
         """
-        simulated_conversations = []
         progress_bar = tqdm(
             total=int(len(conversation_turns) * (max_conversation_turns / 2)),
             desc="Simulating with predefined conversation turns: ",
             ncols=100,
             unit="messages",
         )
-
-        for simulation in conversation_turns:
-            current_simulation = ConversationHistory()
-            for simulated_turn in simulation:
-                user_turn = Turn(role=ConversationRole.USER, content=simulated_turn)
-                current_simulation.add_to_history(user_turn)
-                assistant_response, assistant_context = await self._get_target_response(
-                    target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation
-                )
-                assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context)
-                current_simulation.add_to_history(assistant_turn)
-                progress_bar.update(1)  # Update progress bar for both user and assistant turns
-
-            if len(current_simulation) < max_conversation_turns:
-                await self._extend_conversation_with_simulator(
-                    current_simulation=current_simulation,
-                    max_conversation_turns=max_conversation_turns,
-                    user_simulator_prompty=user_simulator_prompty,
-                    user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
-                    api_call_delay_sec=api_call_delay_sec,
-                    prompty_model_config=prompty_model_config,
-                    target=target,
-                    progress_bar=progress_bar,
-                )
-            simulated_conversations.append(
-                JsonLineChatProtocol(
+        semaphore = asyncio.Semaphore(concurrent_async_tasks)
+        progress_bar_lock = asyncio.Lock()
+
+        async def run_simulation(simulation: List[Union[str, Dict[str, Any]]]) -> JsonLineChatProtocol:
+            async with semaphore:
+                current_simulation = ConversationHistory()
+                for simulated_turn in simulation:
+                    if isinstance(simulated_turn, str):
+                        user_turn = Turn(role=ConversationRole.USER, content=simulated_turn)
+                    elif isinstance(simulated_turn, dict):
+                        user_turn = Turn(
+                            role=ConversationRole.USER,
+                            content=str(simulated_turn.get("content")),
+                            context=str(simulated_turn.get("context"))
+                        )
+                    else:
+                        raise ValueError("Each simulated turn must be a string or a dict with 'content' and 'context' keys")
+                    current_simulation.add_to_history(user_turn)
+                    assistant_response, assistant_context = await self._get_target_response(
+                        target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation
+                    )
+                    assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context)
+                    current_simulation.add_to_history(assistant_turn)
+                    async with progress_bar_lock:
+                        progress_bar.update(1)
+
+                if len(current_simulation) < max_conversation_turns:
+                    await self._extend_conversation_with_simulator(
+                        current_simulation=current_simulation,
+                        max_conversation_turns=max_conversation_turns,
+                        user_simulator_prompty=user_simulator_prompty,
+                        user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
+                        api_call_delay_sec=api_call_delay_sec,
+                        prompty_model_config=prompty_model_config,
+                        target=target,
+                        progress_bar=progress_bar,
+                        progress_bar_lock=progress_bar_lock,
+                    )
+                return JsonLineChatProtocol(
                     {
                         "messages": current_simulation.to_list(),
                         "finish_reason": ["stop"],
                         "context": {},
                         "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
                     }
                 )
-            )
 
+        tasks = [asyncio.create_task(run_simulation(simulation)) for simulation in conversation_turns]
+        results = await asyncio.gather(*tasks)
         progress_bar.close()
-        return simulated_conversations
+        return results
 
     async def _extend_conversation_with_simulator(
         self,
@@ -265,6 +286,7 @@ async def _extend_conversation_with_simulator(
         prompty_model_config: Dict[str, Any],
         target: Callable,
         progress_bar: tqdm,
+        progress_bar_lock: asyncio.Lock
     ):
         """
         Extends an ongoing conversation using a user simulator until the maximum number of turns is reached.
@@ -285,6 +307,8 @@ async def _extend_conversation_with_simulator(
         :paramtype target: Callable,
         :keyword progress_bar: Progress bar for tracking simulation progress.
         :paramtype progress_bar: tqdm,
+        :keyword progress_bar_lock: Lock for updating the progress bar safely.
+        :paramtype progress_bar_lock: asyncio.Lock
         """
         user_flow = self._load_user_simulation_flow(
             user_simulator_prompty=user_simulator_prompty,  # type: ignore
@@ -307,7 +331,8 @@ async def _extend_conversation_with_simulator(
             )
             assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context)
             current_simulation.add_to_history(assistant_turn)
-            progress_bar.update(1)
+            async with progress_bar_lock:
+                progress_bar.update(1)
 
     def _load_user_simulation_flow(
         self,
diff --git a/sdk/evaluation/azure-ai-evaluation/cspell.json b/sdk/evaluation/azure-ai-evaluation/cspell.json
@@ -0,0 +1,3 @@
+{
+    "ignorePaths": ["sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/grounding.json"]
+}
diff --git a/sdk/evaluation/azure-ai-evaluation/setup.py b/sdk/evaluation/azure-ai-evaluation/setup.py
@@ -85,5 +85,6 @@
     package_data={
         "pytyped": ["py.typed"],
         "azure.ai.evaluation.simulator._prompty": ["*.prompty"],
+        "azure.ai.evaluation.simulator._data_sources": ["*.json"],
     },
 )
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py
@@ -330,6 +330,7 @@ async def test_simulate_with_predefined_turns(
             prompty_model_config={},
             user_simulator_prompty=None,
             user_simulator_prompty_kwargs={},
+            concurrent_async_tasks=1,
         )
 
         assert len(result) == 1

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{`
	`2`	`+ "ignorePaths": ["sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/grounding.json"]`
	`3`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -85,5 +85,6 @@`
`85`	`85`	`package_data={`
`86`	`86`	`"pytyped": ["py.typed"],`
`87`	`87`	`"azure.ai.evaluation.simulator._prompty": ["*.prompty"],`
	`88`	`+ "azure.ai.evaluation.simulator._data_sources": ["*.json"],`
`88`	`89`	`},`
`89`	`90`	`)`
Original file line number	Diff line number	Diff line change
`@@ -330,6 +330,7 @@ async def test_simulate_with_predefined_turns(`
`330`	`330`	`prompty_model_config={},`
`331`	`331`	`user_simulator_prompty=None,`
`332`	`332`	`user_simulator_prompty_kwargs={},`
	`333`	`+ concurrent_async_tasks=1,`
`333`	`334`	`)`
`334`	`335`
`335`	`336`	`assert len(result) == 1`