Add t1 language support for simulators (#37348)

nagkumar91 · Nagkumar Arkalgud · Nagkumar Arkalgud · web-flow · commit b70434b2f8b4 · 2024-09-13T08:59:23.000-07:00
* Add t1 language support for simulators

* Update __init__.py

* Update _conversation.py

* Update xpia_simulator.py

* Update _language_suffix_mapping.py

* remove promptflow.evals

---------

Co-authored-by: Nagkumar Arkalgud &lt;nagkumar@naarkalg-work-mac.local&gt;
Co-authored-by: Nagkumar Arkalgud &lt;nagkumar@naarkalgworkmac.lan&gt;
diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md
@@ -93,6 +93,108 @@ if __name__ == "__main__":
     pprint(result)
 ```
 
+Simulator expects the user to have a callback method that invokes their AI application.
+Here's a sample of a callback which invokes AsyncAzureOpenAI:
+
+```python
+from from azure.ai.evaluation.synthetic import AdversarialSimulator, AdversarialScenario
+from azure.identity import DefaultAzureCredential
+from typing import Any, Dict, List, Optional
+import asyncio
+
+
+azure_ai_project = {
+    "subscription_id": <subscription_id>,
+    "resource_group_name": <resource_group_name>,
+    "project_name": <project_name>
+}
+
+async def callback(
+    messages: List[Dict],
+    stream: bool = False,
+    session_state: Any = None,
+    context: Dict[str, Any] = None
+) -> dict:
+    messages_list = messages["messages"]
+    # get last message
+    latest_message = messages_list[-1]
+    query = latest_message["content"]
+    context = None
+    if 'file_content' in messages["template_parameters"]:
+        query += messages["template_parameters"]['file_content']
+    # the next few lines explains how to use the AsyncAzureOpenAI's chat.completions
+    # to respond to the simulator. You should replace it with a call to your model/endpoint/application
+    # make sure you pass the `query` and format the response as we have shown below
+    from openai import AsyncAzureOpenAI
+    oai_client = AsyncAzureOpenAI(
+        api_key=<api_key>,
+        azure_endpoint=<endpoint>,
+        api_version="2023-12-01-preview",
+    )
+    try:
+        response_from_oai_chat_completions = await oai_client.chat.completions.create(messages=[{"content": query, "role": "user"}], model="gpt-4", max_tokens=300)
+    except Exception as e:
+        print(f"Error: {e}")
+        # to continue the conversation, return the messages, else you can fail the adversarial with an exception
+        message = {
+            "content": "Something went wrong. Check the exception e for more details.",
+            "role": "assistant",
+            "context": None,
+        }
+        messages["messages"].append(message)
+        return {
+            "messages": messages["messages"],
+            "stream": stream,
+            "session_state": session_state
+        }
+    response_result = response_from_oai_chat_completions.choices[0].message.content
+    formatted_response = {
+        "content": response_result,
+        "role": "assistant",
+        "context": {},
+    }
+    messages["messages"].append(formatted_response)
+    return {
+        "messages": messages["messages"],
+        "stream": stream,
+        "session_state": session_state,
+        "context": context
+    }
+
+```
+### Adversarial QA:
+```python
+scenario = AdversarialScenario.ADVERSARIAL_QA
+simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
+
+outputs = asyncio.run(
+    simulator(
+        scenario=scenario,
+        max_conversation_turns=1,
+        max_simulation_results=3,
+        target=callback
+    )
+)
+
+print(outputs.to_eval_qa_json_lines())
+```
+### Direct Attack Simulator
+
+```python
+scenario = AdversarialScenario.ADVERSARIAL_QA
+simulator = DirectAttackSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
+
+outputs = asyncio.run(
+    simulator(
+        scenario=scenario,
+        max_conversation_turns=1,
+        max_simulation_results=2,
+        target=callback
+    )
+)
+
+print(outputs)
+```
 ## Troubleshooting
 
 ## Next steps
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/__init__.py
@@ -1,6 +1,13 @@
 from .adversarial_scenario import AdversarialScenario
 from .adversarial_simulator import AdversarialSimulator
+from .constants import SupportedLanguages
 from .direct_attack_simulator import DirectAttackSimulator
 from .indirect_attack_simulator import IndirectAttackSimulator
 
-__all__ = ["AdversarialSimulator", "AdversarialScenario", "DirectAttackSimulator", "IndirectAttackSimulator"]
+__all__ = [
+    "AdversarialSimulator",
+    "AdversarialScenario",
+    "DirectAttackSimulator",
+    "IndirectAttackSimulator",
+    "SupportedLanguages",
+]
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/_conversation/_conversation.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/_conversation/_conversation.py
@@ -6,6 +6,9 @@
 import logging
 from typing import Callable, Dict, List, Tuple, Union
 
+from azure.ai.evaluation.synthetic._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
+from azure.ai.evaluation.synthetic.constants import SupportedLanguages
+
 from ..._http_utils import AsyncHttpPipeline
 from . import ConversationBot, ConversationTurn
 
@@ -60,8 +63,10 @@ def is_closing_message_helper(response: str) -> bool:
 
 
 async def simulate_conversation(
+    *,
     bots: List[ConversationBot],
     session: AsyncHttpPipeline,
+    language: SupportedLanguages,
     stopping_criteria: Callable[[str], bool] = is_closing_message,
     turn_limit: int = 10,
     history_limit: int = 5,
@@ -101,6 +106,13 @@ async def simulate_conversation(
     else:
         conversation_id = None
     first_prompt = first_response["samples"][0]
+    if language != SupportedLanguages.English:
+        if not isinstance(language, SupportedLanguages) or language not in SupportedLanguages:
+            raise Exception(  # pylint: disable=broad-exception-raised
+                f"Language option '{language}' isn't supported. Select a supported language option from "
+                f"azure.ai.evaluation.synthetic._constants.SupportedLanguages: {[f'{e}' for e in SupportedLanguages]}"
+            )
+        first_prompt += f" {SUPPORTED_LANGUAGES_MAPPING[language]}"
     # Add all generated turns into array to pass for each bot while generating
     # new responses. We add generated response and the person generating it.
     # in the case of the first turn, it is supposed to be the user search query
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/_conversation/constants.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/_conversation/constants.py
@@ -25,6 +25,5 @@
 
 class ConversationRole(Enum):
     """Role in a chatbot conversation"""
-
     USER = "user"
     ASSISTANT = "assistant"
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/_helpers/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/_helpers/__init__.py
@@ -0,0 +1,3 @@
+from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
+
+__all__ = ["SUPPORTED_LANGUAGES_MAPPING"]
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/_helpers/_language_suffix_mapping.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/_helpers/_language_suffix_mapping.py
@@ -0,0 +1,17 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from azure.ai.evaluation.synthetic.constants import SupportedLanguages
+
+BASE_SUFFIX = "Make the conversation in __language__ language."
+
+SUPPORTED_LANGUAGES_MAPPING = {
+    SupportedLanguages.English: BASE_SUFFIX.replace("__language__", "english"),
+    SupportedLanguages.Spanish: BASE_SUFFIX.replace("__language__", "spanish"),
+    SupportedLanguages.Italian: BASE_SUFFIX.replace("__language__", "italian"),
+    SupportedLanguages.French: BASE_SUFFIX.replace("__language__", "french"),
+    SupportedLanguages.German: BASE_SUFFIX.replace("__language__", "german"),
+    SupportedLanguages.SimplifiedChinese: BASE_SUFFIX.replace("__language__", "simplified chinese"),
+    SupportedLanguages.Portuguese: BASE_SUFFIX.replace("__language__", "portuguese"),
+    SupportedLanguages.Japanese: BASE_SUFFIX.replace("__language__", "japanese"),
+}
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/adversarial_simulator.py
@@ -2,6 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 # noqa: E501
+# pylint: disable=E0401,E0611
 import asyncio
 import functools
 import logging
@@ -26,6 +27,7 @@
     TokenScope,
 )
 from ._utils import JsonLineList
+from .constants import SupportedLanguages
 
 logger = logging.getLogger(__name__)
 
@@ -44,13 +46,15 @@ def wrapper(*args, **kwargs):
         scenario = str(kwargs.get("scenario", None))
         max_conversation_turns = kwargs.get("max_conversation_turns", None)
         max_simulation_results = kwargs.get("max_simulation_results", None)
+        selected_language = kwargs.get("language", SupportedLanguages.English)
         decorated_func = monitor_operation(
             activity_name="adversarial.simulator.call",
             activity_type=ActivityType.PUBLICAPI,
             custom_dimensions={
                 "scenario": scenario,
                 "max_conversation_turns": max_conversation_turns,
                 "max_simulation_results": max_simulation_results,
+                "selected_language": selected_language,
             },
         )(func)
 
@@ -114,6 +118,7 @@ async def __call__(
         api_call_delay_sec: int = 0,
         concurrent_async_task: int = 3,
         _jailbreak_type: Optional[str] = None,
+        language: SupportedLanguages = SupportedLanguages.English,
         randomize_order: bool = True,
         randomization_seed: Optional[int] = None,
     ):
@@ -147,6 +152,8 @@ async def __call__(
         :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
             Defaults to 3.
         :paramtype concurrent_async_task: int
+        :keyword language: The language in which the conversation should be generated. Defaults to English.
+        :paramtype language: azure.ai.evaluation.synthetic.constants.SupportedLanguages
         :keyword randomize_order: Whether or not the order of the prompts should be randomized. Defaults to True.
         :paramtype randomize_order: bool
         :keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
@@ -244,6 +251,7 @@ async def __call__(
                             api_call_retry_limit=api_call_retry_limit,
                             api_call_retry_sleep_sec=api_call_retry_sleep_sec,
                             api_call_delay_sec=api_call_delay_sec,
+                            language=language,
                             semaphore=semaphore,
                         )
                     )
@@ -296,6 +304,7 @@ async def _simulate_async(
         api_call_retry_limit,
         api_call_retry_sleep_sec,
         api_call_delay_sec,
+        language,
         semaphore,
     ) -> List[Dict]:
         user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
@@ -317,6 +326,7 @@ async def _simulate_async(
                 session=session,
                 turn_limit=max_conversation_turns,
                 api_call_delay_sec=api_call_delay_sec,
+                language=language,
             )
         return self._to_chat_protocol(conversation_history=conversation_history, template_parameters=parameters)
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/constants.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/constants.py
@@ -0,0 +1,17 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from enum import Enum
+
+
+class SupportedLanguages(Enum):
+    """Supported languages for evaluation, using ISO standard language codes."""
+
+    Spanish = "es"
+    Italian = "it"
+    French = "fr"
+    German = "de"
+    SimplifiedChinese = "zh-cn"
+    Portuguese = "pt"
+    Japanese = "ja"
+    English = "en"
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/direct_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/direct_attack_simulator.py
@@ -57,9 +57,9 @@ class DirectAttackSimulator:
         * "subscription_id": Azure subscription ID.
         * "resource_group_name": Name of the Azure resource group.
         * "project_name": Name of the Azure Machine Learning workspace.
-    :type azure_ai_project: Dict[str, Any]
     :param credential: The credential for connecting to Azure AI project.
     :type credential: ~azure.core.credentials.TokenCredential
+    :type azure_ai_project: Dict[str, Any]
     """
 
     def __init__(self, *, azure_ai_project: Dict[str, Any], credential=None):
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/synthetic/indirect_attack_simulator.py
@@ -6,10 +6,10 @@
 import logging
 from typing import Any, Callable, Dict
 
-from azure.ai.evaluation.synthetic.adversarial_scenario import AdversarialScenario
 from azure.identity import DefaultAzureCredential
 
 from promptflow._sdk._telemetry import ActivityType, monitor_operation
+from azure.ai.evaluation.synthetic.adversarial_scenario import AdversarialScenario
 
 from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
 from .adversarial_simulator import AdversarialSimulator
@@ -19,6 +19,7 @@
 
 def monitor_adversarial_scenario(func) -> Callable:
     """Decorator to monitor adversarial scenario.
+
     :param func: The function to be decorated.
     :type func: Callable
     :return: The decorated function.
@@ -54,9 +55,9 @@ class IndirectAttackSimulator:
         * "subscription_id": Azure subscription ID.
         * "resource_group_name": Name of the Azure resource group.
         * "project_name": Name of the Azure Machine Learning workspace.
-    :type azure_ai_project: Dict[str, Any]
     :param credential: The credential for connecting to Azure AI project.
     :type credential: ~azure.core.credentials.TokenCredential
+    :type azure_ai_project: Dict[str, Any]
     """
 
     def __init__(self, *, azure_ai_project: Dict[str, Any], credential=None):
@@ -106,8 +107,9 @@ async def __call__(
         This simulator converses with your AI system using prompts injected into the context to interrupt normal
         expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside
         the scope of your AI system.
+
         :keyword scenario: Enum value specifying the adversarial scenario used for generating inputs.
-        :paramtype scenario: promptflow.evals.synthetic.adversarial_scenario.AdversarialScenario
+        :paramtype scenario: azure.ai.evaluation.synthetic.adversarial_scenario.AdversarialScenario
         :keyword target: The target function to simulate adversarial inputs against.
             This function should be asynchronous and accept a dictionary representing the adversarial input.
         :paramtype target: Callable
@@ -130,16 +132,21 @@ async def __call__(
             Defaults to 3.
         :paramtype concurrent_async_task: int
         :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
+
          - 'template_parameters': A dictionary with parameters used in the conversation template,
             including 'conversation_starter'.
          - 'messages': A list of dictionaries, each representing a turn in the conversation.
             Each message dictionary includes 'content' (the message text) and
             'role' (indicating whether the message is from the 'user' or the 'assistant').
          - '**$schema**': A string indicating the schema URL for the conversation format.
+
          The 'content' for 'assistant' role messages may includes the messages that your callback returned.
         :rtype: List[Dict[str, Any]]
+
         **Output format**
+
         .. code-block:: python
+
             return_value = [
                 {
                     'template_parameters': {},

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING`
	`2`	`+`
	`3`	`+__all__ = ["SUPPORTED_LANGUAGES_MAPPING"]`