Multimodal Adv Simulator for Image-Gen-Understanding (Azure#38584)

w-javed · web-flow · commit 3497eff2f718 · 2024-11-20T10:39:37.000-08:00
* sim-multi-modal

* fix

* unit test fix

* adding tests recording

* test recording

* fix lint

* assets

* skip-test

* asset

* asset

* refactor-after-nag-comments

* refactor-after-nag-comments

* test fix

* asset

* Fix with comments

* refactor

* conf-test-fix

* removing logs
diff --git a/sdk/evaluation/azure-ai-evaluation/assets.json b/sdk/evaluation/azure-ai-evaluation/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/evaluation/azure-ai-evaluation",
-  "Tag": "python/evaluation/azure-ai-evaluation_fdb88346b8"
+  "Tag": "python/evaluation/azure-ai-evaluation_a63b4a27cf"
 }
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
@@ -91,23 +91,40 @@ def _store_multimodal_content(messages, tmpdir: str):
     for message in messages:
         if isinstance(message.get("content", []), list):
             for content in message.get("content", []):
-                if content.get("type") == "image_url":
-                    image_url = content.get("image_url")
-                    if image_url and "url" in image_url and image_url["url"].startswith("data:image/jpg;base64,"):
-                        # Extract the base64 string
-                        base64image = image_url["url"].replace("data:image/jpg;base64,", "")
-
-                        # Generate a unique filename
-                        image_file_name = f"{str(uuid.uuid4())}.jpg"
-                        image_url["url"] = f"images/{image_file_name}"  # Replace the base64 URL with the file path
-
-                        # Decode the base64 string to binary image data
-                        image_data_binary = base64.b64decode(base64image)
-
-                        # Write the binary image data to the file
-                        image_file_path = os.path.join(images_folder_path, image_file_name)
-                        with open(image_file_path, "wb") as f:
-                            f.write(image_data_binary)
+                process_message_content(content, images_folder_path)
+
+
+def process_message_content(content, images_folder_path):
+    if content.get("type", "") == "image_url":
+        image_url = content.get("image_url")
+
+        if not image_url or "url" not in image_url:
+            return None
+
+        url = image_url["url"]
+        if not url.startswith("data:image/"):
+            return None
+
+        match = re.search("data:image/([^;]+);", url)
+        if not match:
+            return None
+
+        ext = match.group(1)
+        # Extract the base64 string
+        base64image = image_url["url"].replace(f"data:image/{ext};base64,", "")
+
+        # Generate a unique filename
+        image_file_name = f"{str(uuid.uuid4())}.{ext}"
+        image_url["url"] = f"images/{image_file_name}"  # Replace the base64 URL with the file path
+
+        # Decode the base64 string to binary image data
+        image_data_binary = base64.b64decode(base64image)
+
+        # Write the binary image data to the file
+        image_file_path = os.path.join(images_folder_path, image_file_name)
+        with open(image_file_path, "wb") as f:
+            f.write(image_data_binary)
+    return None
 
 
 def _log_metrics_and_instance_results(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py
@@ -28,6 +28,8 @@ class AdversarialScenario(Enum):
     ADVERSARIAL_CONTENT_GEN_UNGROUNDED = "adv_content_gen_ungrounded"
     ADVERSARIAL_CONTENT_GEN_GROUNDED = "adv_content_gen_grounded"
     ADVERSARIAL_CONTENT_PROTECTED_MATERIAL = "adv_content_protected_material"
+    ADVERSARIAL_IMAGE_GEN = "adv_image_gen"
+    ADVERSARIAL_IMAGE_UNDERSTANDING = "adv_image_understanding"
 
 
 @experimental
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py
@@ -16,13 +16,19 @@
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import get_async_http_client
 from azure.ai.evaluation._model_configurations import AzureAIProject
-from azure.ai.evaluation.simulator import AdversarialScenario
+from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialScenarioJailbreak
 from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
 from azure.core.credentials import TokenCredential
 from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
 
 from ._constants import SupportedLanguages
-from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole, ConversationTurn
+from ._conversation import (
+    CallbackConversationBot,
+    MultiModalConversationBot,
+    ConversationBot,
+    ConversationRole,
+    ConversationTurn,
+)
 from ._conversation._conversation import simulate_conversation
 from ._model_tools import (
     AdversarialTemplateHandler,
@@ -231,6 +237,7 @@ async def __call__(
                             api_call_delay_sec=api_call_delay_sec,
                             language=language,
                             semaphore=semaphore,
+                            scenario=scenario,
                         )
                     )
                 )
@@ -292,10 +299,13 @@ async def _simulate_async(
         api_call_delay_sec: int,
         language: SupportedLanguages,
         semaphore: asyncio.Semaphore,
+        scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
     ) -> List[Dict]:
-        user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
+        user_bot = self._setup_bot(
+            role=ConversationRole.USER, template=template, parameters=parameters, scenario=scenario
+        )
         system_bot = self._setup_bot(
-            target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters
+            target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
         )
         bots = [user_bot, system_bot]
         session = get_async_http_client().with_policies(
@@ -341,6 +351,7 @@ def _setup_bot(
         template: AdversarialTemplate,
         parameters: TemplateParameters,
         target: Optional[Callable] = None,
+        scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
     ) -> ConversationBot:
         if role is ConversationRole.USER:
             model = self._get_user_proxy_completion_model(
@@ -372,6 +383,21 @@ def __init__(self):
                 def __call__(self) -> None:
                     pass
 
+            if scenario in [
+                AdversarialScenario.ADVERSARIAL_IMAGE_GEN,
+                AdversarialScenario.ADVERSARIAL_IMAGE_UNDERSTANDING,
+            ]:
+                return MultiModalConversationBot(
+                    callback=target,
+                    role=role,
+                    model=DummyModel(),
+                    user_template=str(template),
+                    user_template_parameters=parameters,
+                    rai_client=self.rai_client,
+                    conversation_template="",
+                    instantiation_parameters={},
+                )
+
             return CallbackConversationBot(
                 callback=target,
                 role=role,
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/__init__.py
@@ -9,12 +9,12 @@
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
 
+import re
 import jinja2
 
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline
-
-from .._model_tools import LLMBase, OpenAIChatCompletionsModel
+from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
 from .._model_tools._template_handler import TemplateParameters
 from .constants import ConversationRole
 
@@ -271,8 +271,6 @@ async def generate_response(
                 "id": None,
                 "template_parameters": {},
             }
-        self.logger.info("Using user provided callback returning response.")
-
         time_taken = end_time - start_time
         try:
             response = {
@@ -290,8 +288,6 @@ async def generate_response(
                 blame=ErrorBlame.USER_ERROR,
             ) from exc
 
-        self.logger.info("Parsed callback response")
-
         return response, {}, time_taken, result
 
     # Bug 3354264: template is unused in the method - is this intentional?
@@ -308,9 +304,127 @@ def _to_chat_protocol(self, template, conversation_history, template_parameters)
         }
 
 
+class MultiModalConversationBot(ConversationBot):
+    """MultiModal Conversation bot that uses a user provided callback to generate responses.
+
+    :param callback: The callback function to use to generate responses.
+    :type callback: Callable
+    :param user_template: The template to use for the request.
+    :type user_template: str
+    :param user_template_parameters: The template parameters to use for the request.
+    :type user_template_parameters: Dict
+    :param args: Optional arguments to pass to the parent class.
+    :type args: Any
+    :param kwargs: Optional keyword arguments to pass to the parent class.
+    :type kwargs: Any
+    """
+
+    def __init__(
+        self,
+        callback: Callable,
+        user_template: str,
+        user_template_parameters: TemplateParameters,
+        rai_client: RAIClient,
+        *args,
+        **kwargs,
+    ) -> None:
+        self.callback = callback
+        self.user_template = user_template
+        self.user_template_parameters = user_template_parameters
+        self.rai_client = rai_client
+
+        super().__init__(*args, **kwargs)
+
+    async def generate_response(
+        self,
+        session: AsyncHttpPipeline,
+        conversation_history: List[Any],
+        max_history: int,
+        turn_number: int = 0,
+    ) -> Tuple[dict, dict, float, dict]:
+        previous_prompt = conversation_history[-1]
+        chat_protocol_message = await self._to_chat_protocol(conversation_history, self.user_template_parameters)
+
+        # replace prompt with {image.jpg} tags with image content data.
+        conversation_history.pop()
+        conversation_history.append(
+            ConversationTurn(
+                role=previous_prompt.role,
+                name=previous_prompt.name,
+                message=chat_protocol_message["messages"][0]["content"],
+                full_response=previous_prompt.full_response,
+                request=chat_protocol_message,
+            )
+        )
+        msg_copy = copy.deepcopy(chat_protocol_message)
+        result = {}
+        start_time = time.time()
+        result = await self.callback(msg_copy)
+        end_time = time.time()
+        if not result:
+            result = {
+                "messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
+                "finish_reason": ["stop"],
+                "id": None,
+                "template_parameters": {},
+            }
+
+        time_taken = end_time - start_time
+        try:
+            response = {
+                "samples": [result["messages"][-1]["content"]],
+                "finish_reason": ["stop"],
+                "id": None,
+            }
+        except Exception as exc:
+            msg = "User provided callback does not conform to chat protocol standard."
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
+                category=ErrorCategory.INVALID_VALUE,
+                blame=ErrorBlame.USER_ERROR,
+            ) from exc
+
+        return response, chat_protocol_message, time_taken, result
+
+    async def _to_chat_protocol(self, conversation_history, template_parameters):  # pylint: disable=unused-argument
+        messages = []
+
+        for _, m in enumerate(conversation_history):
+            if "image:" in m.message:
+                content = await self._to_multi_modal_content(m.message)
+                messages.append({"content": content, "role": m.role.value})
+            else:
+                messages.append({"content": m.message, "role": m.role.value})
+
+        return {
+            "template_parameters": template_parameters,
+            "messages": messages,
+            "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
+        }
+
+    async def _to_multi_modal_content(self, text: str) -> list:
+        split_text = re.findall(r"[^{}]+|\{[^{}]*\}", text)
+        messages = [
+            text.strip("{}").replace("image:", "").strip() if text.startswith("{") else text for text in split_text
+        ]
+        contents = []
+        for msg in messages:
+            if msg.startswith("image_understanding/"):
+                encoded_image = await self.rai_client.get_image_data(msg)
+                contents.append(
+                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
+                )
+            else:
+                contents.append({"type": "text", "text": msg})
+        return contents
+
+
 __all__ = [
     "ConversationRole",
     "ConversationBot",
     "CallbackConversationBot",
+    "MultiModalConversationBot",
     "ConversationTurn",
 ]
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/_conversation.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/_conversation.py
@@ -9,7 +9,6 @@
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation.simulator._constants import SupportedLanguages
 from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
-
 from ..._http_utils import AsyncHttpPipeline
 from . import ConversationBot, ConversationTurn
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py
@@ -189,6 +189,7 @@ async def __call__(
                             api_call_delay_sec=api_call_delay_sec,
                             language=language,
                             semaphore=semaphore,
+                            scenario=scenario,
                         )
                     )
                 )
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py
@@ -4,6 +4,7 @@
 import os
 from typing import Any
 from urllib.parse import urljoin, urlparse
+import base64
 
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
@@ -57,6 +58,7 @@ def __init__(  # pylint: disable=missing-client-constructor-parameter-credential
         # add a "/" at the end of the url
         self.api_url = self.api_url.rstrip("/") + "/"
         self.parameter_json_endpoint = urljoin(self.api_url, "simulation/template/parameters")
+        self.parameter_image_endpoint = urljoin(self.api_url, "simulation/template/parameters/image")
         self.jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak")
         self.simulation_submit_endpoint = urljoin(self.api_url, "simulation/chat/completions/submit")
         self.xpia_jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak/xpia")
@@ -166,3 +168,41 @@ async def get(self, url: str) -> Any:
             category=ErrorCategory.UNKNOWN,
             blame=ErrorBlame.USER_ERROR,
         )
+
+    async def get_image_data(self, path: str) -> Any:
+        """Make a GET Image request to the given url
+
+        :param path: The url of the image
+        :type path: str
+        :raises EvaluationException: If the Azure safety evaluation service is not available in the current region
+        :return: The response
+        :rtype: Any
+        """
+        token = self.token_manager.get_token()
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+            "User-Agent": USER_AGENT,
+        }
+
+        session = self._create_async_client()
+        params = {"path": path}
+        async with session:
+            response = await session.get(
+                url=self.parameter_image_endpoint, params=params, headers=headers
+            )  # pylint: disable=unexpected-keyword-arg
+
+        if response.status_code == 200:
+            return base64.b64encode(response.content).decode("utf-8")
+
+        msg = (
+            "Azure safety evaluation service is not available in your current region, "
+            + "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
+        )
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            target=ErrorTarget.RAI_CLIENT,
+            category=ErrorCategory.UNKNOWN,
+            blame=ErrorBlame.USER_ERROR,
+        )
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/conftest.py b/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_adv_simulator.py
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_sim_and_eval.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_sim_and_eval.py
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_simulator.py

Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`"AssetsRepo": "Azure/azure-sdk-assets",`
`3`	`3`	`"AssetsRepoPrefixPath": "python",`
`4`	`4`	`"TagPrefix": "python/evaluation/azure-ai-evaluation",`
`5`		`- "Tag": "python/evaluation/azure-ai-evaluation_fdb88346b8"`
	`5`	`+ "Tag": "python/evaluation/azure-ai-evaluation_a63b4a27cf"`
`6`	`6`	`}`
Original file line number	Diff line number	Diff line change
`@@ -189,6 +189,7 @@ async def __call__(`
`189`	`189`	`api_call_delay_sec=api_call_delay_sec,`
`190`	`190`	`language=language,`
`191`	`191`	`semaphore=semaphore,`
	`192`	`+ scenario=scenario,`
`192`	`193`	`)`
`193`	`194`	`)`
`194`	`195`	`)`