Supporting passing Model Configuration object (#34088)

singankit · web-flow · commit 1dab88104a4a · 2024-02-02T14:07:18.000-08:00
* Supporting passing Model Configuration object

* Update ai_samples_evaluate.py

* Update ai_samples_evaluate.py

* Update _evaluate.py
diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_evaluate.py b/sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_evaluate.py
@@ -12,7 +12,7 @@
 from collections import Counter
 from json import JSONDecodeError
 from pathlib import Path
-from typing import Callable, Optional, Dict, List, Mapping
+from typing import Callable, Optional, Dict, List, Mapping, Union
 from types import FunctionType
 
 import mlflow
@@ -37,6 +37,7 @@
 
 from ._utils import _write_properties_to_run_history
 from .metrics._custom_metric import CodeMetric, PromptMetric, Metric as GenAIMetric
+from azure.ai.resources.entities import AzureOpenAIModelConfiguration
 
 LOGGER = logging.getLogger(__name__)
 
@@ -125,7 +126,7 @@ def evaluate(
         data: Optional[str] = None,
         task_type: Optional[str] = None,
         metrics_list: Optional[List[str]] = None,
-        model_config: Optional[Dict[str, str]] = None,
+        model_config: Optional[Union[Dict[str, str], "AzureOpenAIModelConfiguration"]] = None,
         data_mapping: Optional[Dict[str, str]] = None,
         output_path: Optional[str] = None,
         **kwargs
@@ -145,7 +146,7 @@ def evaluate(
     :keyword metrics_list: List of metrics to calculate. A default list is picked based on task_type if not set.
     :paramtype metrics_list: Optional[List[str]]
     :keyword model_config: GPT configuration details needed for AI-assisted metrics.
-    :paramtype model_config: Optional[Dict[str, str]]
+    :paramtype model_config: Dict[str, str]
     :keyword data_mapping: GPT configuration details needed for AI-assisted metrics.
     :paramtype data_mapping: Optional[Dict[str, str]]
     :keyword output_path: The local folder path to save evaluation artifacts to if set
@@ -163,15 +164,34 @@ def evaluate(
             :language: python
             :dedent: 8
             :caption: Evaluates target or data with built-in evaluation metrics.
+
+    .. admonition:: Example:
+
+        .. literalinclude:: ../samples/ai_samples_evaluate.py
+            :start-after: [START evaluate_custom_metrics]
+            :end-before: [END evaluate_custom_metrics]
+            :language: python
+            :dedent: 8
+            :caption: Evaluates target or data with custom evaluation metrics.
+
     """
 
     results_list = []
-    metrics_config = {}
     if "tracking_uri" in kwargs:
         mlflow.set_tracking_uri(kwargs.get("tracking_uri"))
 
+    model_config_dict: Dict[str, str] = {}
     if model_config:
-        metrics_config.update({"openai_params": model_config})
+        if isinstance(model_config, Dict):
+            model_config_dict = model_config
+        elif isinstance(model_config, AzureOpenAIModelConfiguration):
+            model_config_dict.update({
+                "api_version": model_config.api_version,
+                "api_base": model_config.api_base,
+                "api_type": "azure",
+                "api_key": model_config.api_key,
+                "deployment_id": model_config.deployment_name
+            })
 
 
     if data_mapping:
@@ -204,7 +224,7 @@ def evaluate(
                     target=target,
                     data=data,
                     task_type=task_type,
-                    model_config=model_config,
+                    model_config=model_config_dict,
                     data_mapping=data_mapping,
                     params_dict=params_permutations_dict,
                     metrics=metrics_list,
@@ -219,7 +239,7 @@ def evaluate(
             target=target,
             data=data,
             task_type=task_type,
-            model_config=model_config,
+            model_config=model_config_dict,
             data_mapping=data_mapping,
             metrics=metrics_list,
             output_path=output_path,
diff --git a/sdk/ai/azure-ai-generative/samples/ai_samples_evaluate.py b/sdk/ai/azure-ai-generative/samples/ai_samples_evaluate.py
@@ -57,6 +57,88 @@ def sample_chat(question):
 
         # [END evaluate_task_type_qa]
 
+        # [START evaluate_custom_metrics]
+        import os
+        from azure.ai.generative import evaluate
+        from azure.ai.resources.client import AIClient
+        from azure.identity import DefaultAzureCredential
+        from azure.ai.generative.evaluate.metrics import PromptMetric
+
+        data_location = "<path_to_data_in_jsonl_format>"
+
+        def sample_chat(question):
+            # Logic for chat application ....
+            return question
+
+        # Code Metric
+        def answer_length(*, data, **kwargs):
+            return {
+                "answer_length": len(data.get("answer")),
+            }
+
+        # Prompt Metric
+        custom_relevance = PromptMetric(
+            name="custom_relevance",
+            prompt="""
+        System:
+        You are an AI assistant. You will be given the definition of an evaluation metric for assessing the quality of an answer in a question-answering task. Your job is to compute an accurate evaluation score using the provided evaluation metric.
+
+        User:
+        Relevance measures how well the answer addresses the main aspects of the question, based on the context. Consider whether all and only the important aspects are contained in the answer when evaluating relevance. Given the context and question, score the relevance of the answer between one to five stars using the following rating scale:
+        One star: the answer completely lacks relevance
+        Two stars: the answer mostly lacks relevance
+        Three stars: the answer is partially relevant
+        Four stars: the answer is mostly relevant
+        Five stars: the answer has perfect relevance
+
+        This rating value should always be an integer between 1 and 5. So the rating produced should be 1 or 2 or 3 or 4 or 5.
+
+        context: Marie Curie was a Polish-born physicist and chemist who pioneered research on radioactivity and was the first woman to win a Nobel Prize.
+        question: What field did Marie Curie excel in?
+        answer: Marie Curie was a renowned painter who focused mainly on impressionist styles and techniques.
+        stars: 1
+
+        context: The Beatles were an English rock band formed in Liverpool in 1960, and they are widely regarded as the most influential music band in history.
+        question: Where were The Beatles formed?
+        answer: The band The Beatles began their journey in London, England, and they changed the history of music.
+        stars: 2
+
+        context: {{context}}
+        question: {{question}}
+        answer: {{answer}}
+        stars:
+
+        Your response must include following fields and should be in json format:
+        score: Number of stars based on definition above
+        reason: Reason why the score was given
+                    """
+        )
+
+        client = AIClient.from_config(DefaultAzureCredential())
+        result = evaluate(
+            evaluation_name="my-evaluation",
+            target=sample_chat,  # Optional if provided evaluate will call target with data provided
+            data=data_location,
+            task_type="qa",
+            metrics_list=["gpt_groundedness", answer_length, custom_relevance],
+            data_mapping={
+                "questions": "question",
+                "contexts": "context",
+                "y_pred": "answer",
+                "y_test": "truth"
+            },
+            model_config={
+                "api_version": "2023-05-15",
+                "api_base": os.getenv("OPENAI_API_BASE"),
+                "api_type": "azure",
+                "api_key": os.getenv("OPENAI_API_KEY"),
+                "deployment_id": os.getenv("AZURE_OPENAI_EVALUATION_DEPLOYMENT")
+            },
+            tracking_uri=client.tracking_uri,
+        )
+
+        # [END evaluate_custom_metrics]
+
 
 if __name__ == "__main__":
     sample = AIEvaluateSamples()
diff --git a/sdk/ai/azure-ai-generative/tests/evaluate/e2etests/test_evaluate_e2e.py b/sdk/ai/azure-ai-generative/tests/evaluate/e2etests/test_evaluate_e2e.py
@@ -10,6 +10,7 @@
 import pytest
 from devtools_testutils import AzureRecordedTestCase, recorded_by_proxy
 from azure.ai.generative.evaluate import evaluate
+from azure.ai.resources.entities import AzureOpenAIModelConfiguration
 
 logger = logging.getLogger(__name__)
 
@@ -97,6 +98,16 @@ def test_duplicate_metrics_name(self, e2e_openai_api_base, e2e_openai_api_key, e
 
 
     def test_custom_metrics_name(self, e2e_openai_api_base, e2e_openai_api_key, e2e_openai_completion_deployment_name, tmpdir):
+
+        aoai_configuration = AzureOpenAIModelConfiguration(
+            api_version="2023-03-15-preview",
+            api_base=e2e_openai_api_base,
+            api_key=e2e_openai_api_key,
+            deployment_name=e2e_openai_completion_deployment_name,
+            model_name=e2e_openai_completion_deployment_name,
+            model_kwargs=None
+        )
+
         test_data = [
             {"question": "How do you create a run?", "context": "AML API only",
              "answer": "To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment."},
@@ -124,13 +135,7 @@ async def answer_length(*, data, **kwargs):
                 data=test_data,
                 task_type="qa",
                 metrics_list=[custom_prompt_metric, answer_length],
-                model_config={
-                    "api_version": "2023-07-01-preview",
-                    "api_base": e2e_openai_api_base,
-                    "api_type": "azure",
-                    "api_key": e2e_openai_api_key,
-                    "deployment_id": e2e_openai_completion_deployment_name,
-                },
+                model_config=aoai_configuration,
                 data_mapping={
                     "questions": "question",
                     "contexts": "context",
diff --git a/sdk/ai/azure-ai-generative/tests/evaluate/e2etests/test_template.jinja2 b/sdk/ai/azure-ai-generative/tests/evaluate/e2etests/test_template.jinja2
@@ -21,21 +21,6 @@ question: Where were The Beatles formed?
 answer: The band The Beatles began their journey in London, England, and they changed the history of music.
 stars: 2
 
-context: The recent Mars rover, Perseverance, was launched in 2020 with the main goal of searching for signs of ancient life on Mars. The rover also carries an experiment called MOXIE, which aims to generate oxygen from the Martian atmosphere.
-question: What are the main goals of Perseverance Mars rover mission?
-answer: The Perseverance Mars rover mission focuses on searching for signs of ancient life on Mars.
-stars: 3
-
-context: The Mediterranean diet is a commonly recommended dietary plan that emphasizes fruits, vegetables, whole grains, legumes, lean proteins, and healthy fats. Studies have shown that it offers numerous health benefits, including a reduced risk of heart disease and improved cognitive health.
-question: What are the main components of the Mediterranean diet?
-answer: The Mediterranean diet primarily consists of fruits, vegetables, whole grains, and legumes.
-stars: 4
-
-context: The Queen's Royal Castle is a well-known tourist attraction in the United Kingdom. It spans over 500 acres and contains extensive gardens and parks. The castle was built in the 15th century and has been home to generations of royalty.
-question: What are the main attractions of the Queen's Royal Castle?
-answer: The main attractions of the Queen's Royal Castle are its expansive 500-acre grounds, extensive gardens, parks, and the historical castle itself, which dates back to the 15th century and has housed generations of royalty.
-stars: 5
-
 context: {{context}}
 question: {{question}}
 answer: {{answer}}