Skip to content

Commit 1dab881

Browse files
authored
Supporting passing Model Configuration object (#34088)
* Supporting passing Model Configuration object * Update ai_samples_evaluate.py * Update ai_samples_evaluate.py * Update _evaluate.py
1 parent a97069d commit 1dab881

File tree

4 files changed

+121
-29
lines changed

4 files changed

+121
-29
lines changed

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_evaluate.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from collections import Counter
1313
from json import JSONDecodeError
1414
from pathlib import Path
15-
from typing import Callable, Optional, Dict, List, Mapping
15+
from typing import Callable, Optional, Dict, List, Mapping, Union
1616
from types import FunctionType
1717

1818
import mlflow
@@ -37,6 +37,7 @@
3737

3838
from ._utils import _write_properties_to_run_history
3939
from .metrics._custom_metric import CodeMetric, PromptMetric, Metric as GenAIMetric
40+
from azure.ai.resources.entities import AzureOpenAIModelConfiguration
4041

4142
LOGGER = logging.getLogger(__name__)
4243

@@ -125,7 +126,7 @@ def evaluate(
125126
data: Optional[str] = None,
126127
task_type: Optional[str] = None,
127128
metrics_list: Optional[List[str]] = None,
128-
model_config: Optional[Dict[str, str]] = None,
129+
model_config: Optional[Union[Dict[str, str], "AzureOpenAIModelConfiguration"]] = None,
129130
data_mapping: Optional[Dict[str, str]] = None,
130131
output_path: Optional[str] = None,
131132
**kwargs
@@ -145,7 +146,7 @@ def evaluate(
145146
:keyword metrics_list: List of metrics to calculate. A default list is picked based on task_type if not set.
146147
:paramtype metrics_list: Optional[List[str]]
147148
:keyword model_config: GPT configuration details needed for AI-assisted metrics.
148-
:paramtype model_config: Optional[Dict[str, str]]
149+
:paramtype model_config: Dict[str, str]
149150
:keyword data_mapping: GPT configuration details needed for AI-assisted metrics.
150151
:paramtype data_mapping: Optional[Dict[str, str]]
151152
:keyword output_path: The local folder path to save evaluation artifacts to if set
@@ -163,15 +164,34 @@ def evaluate(
163164
:language: python
164165
:dedent: 8
165166
:caption: Evaluates target or data with built-in evaluation metrics.
167+
168+
.. admonition:: Example:
169+
170+
.. literalinclude:: ../samples/ai_samples_evaluate.py
171+
:start-after: [START evaluate_custom_metrics]
172+
:end-before: [END evaluate_custom_metrics]
173+
:language: python
174+
:dedent: 8
175+
:caption: Evaluates target or data with custom evaluation metrics.
176+
166177
"""
167178

168179
results_list = []
169-
metrics_config = {}
170180
if "tracking_uri" in kwargs:
171181
mlflow.set_tracking_uri(kwargs.get("tracking_uri"))
172182

183+
model_config_dict: Dict[str, str] = {}
173184
if model_config:
174-
metrics_config.update({"openai_params": model_config})
185+
if isinstance(model_config, Dict):
186+
model_config_dict = model_config
187+
elif isinstance(model_config, AzureOpenAIModelConfiguration):
188+
model_config_dict.update({
189+
"api_version": model_config.api_version,
190+
"api_base": model_config.api_base,
191+
"api_type": "azure",
192+
"api_key": model_config.api_key,
193+
"deployment_id": model_config.deployment_name
194+
})
175195

176196

177197
if data_mapping:
@@ -204,7 +224,7 @@ def evaluate(
204224
target=target,
205225
data=data,
206226
task_type=task_type,
207-
model_config=model_config,
227+
model_config=model_config_dict,
208228
data_mapping=data_mapping,
209229
params_dict=params_permutations_dict,
210230
metrics=metrics_list,
@@ -219,7 +239,7 @@ def evaluate(
219239
target=target,
220240
data=data,
221241
task_type=task_type,
222-
model_config=model_config,
242+
model_config=model_config_dict,
223243
data_mapping=data_mapping,
224244
metrics=metrics_list,
225245
output_path=output_path,

sdk/ai/azure-ai-generative/samples/ai_samples_evaluate.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,88 @@ def sample_chat(question):
5757

5858
# [END evaluate_task_type_qa]
5959

60+
# [START evaluate_custom_metrics]
61+
import os
62+
from azure.ai.generative import evaluate
63+
from azure.ai.resources.client import AIClient
64+
from azure.identity import DefaultAzureCredential
65+
from azure.ai.generative.evaluate.metrics import PromptMetric
66+
67+
data_location = "<path_to_data_in_jsonl_format>"
68+
69+
def sample_chat(question):
70+
# Logic for chat application ....
71+
return question
72+
73+
# Code Metric
74+
def answer_length(*, data, **kwargs):
75+
return {
76+
"answer_length": len(data.get("answer")),
77+
}
78+
79+
# Prompt Metric
80+
custom_relevance = PromptMetric(
81+
name="custom_relevance",
82+
prompt="""
83+
System:
84+
You are an AI assistant. You will be given the definition of an evaluation metric for assessing the quality of an answer in a question-answering task. Your job is to compute an accurate evaluation score using the provided evaluation metric.
85+
86+
User:
87+
Relevance measures how well the answer addresses the main aspects of the question, based on the context. Consider whether all and only the important aspects are contained in the answer when evaluating relevance. Given the context and question, score the relevance of the answer between one to five stars using the following rating scale:
88+
One star: the answer completely lacks relevance
89+
Two stars: the answer mostly lacks relevance
90+
Three stars: the answer is partially relevant
91+
Four stars: the answer is mostly relevant
92+
Five stars: the answer has perfect relevance
93+
94+
This rating value should always be an integer between 1 and 5. So the rating produced should be 1 or 2 or 3 or 4 or 5.
95+
96+
context: Marie Curie was a Polish-born physicist and chemist who pioneered research on radioactivity and was the first woman to win a Nobel Prize.
97+
question: What field did Marie Curie excel in?
98+
answer: Marie Curie was a renowned painter who focused mainly on impressionist styles and techniques.
99+
stars: 1
100+
101+
context: The Beatles were an English rock band formed in Liverpool in 1960, and they are widely regarded as the most influential music band in history.
102+
question: Where were The Beatles formed?
103+
answer: The band The Beatles began their journey in London, England, and they changed the history of music.
104+
stars: 2
105+
106+
context: {{context}}
107+
question: {{question}}
108+
answer: {{answer}}
109+
stars:
110+
111+
Your response must include following fields and should be in json format:
112+
score: Number of stars based on definition above
113+
reason: Reason why the score was given
114+
"""
115+
)
116+
117+
client = AIClient.from_config(DefaultAzureCredential())
118+
result = evaluate(
119+
evaluation_name="my-evaluation",
120+
target=sample_chat, # Optional if provided evaluate will call target with data provided
121+
data=data_location,
122+
task_type="qa",
123+
metrics_list=["gpt_groundedness", answer_length, custom_relevance],
124+
data_mapping={
125+
"questions": "question",
126+
"contexts": "context",
127+
"y_pred": "answer",
128+
"y_test": "truth"
129+
},
130+
model_config={
131+
"api_version": "2023-05-15",
132+
"api_base": os.getenv("OPENAI_API_BASE"),
133+
"api_type": "azure",
134+
"api_key": os.getenv("OPENAI_API_KEY"),
135+
"deployment_id": os.getenv("AZURE_OPENAI_EVALUATION_DEPLOYMENT")
136+
},
137+
tracking_uri=client.tracking_uri,
138+
)
139+
140+
# [END evaluate_custom_metrics]
141+
60142

61143
if __name__ == "__main__":
62144
sample = AIEvaluateSamples()

sdk/ai/azure-ai-generative/tests/evaluate/e2etests/test_evaluate_e2e.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pytest
1111
from devtools_testutils import AzureRecordedTestCase, recorded_by_proxy
1212
from azure.ai.generative.evaluate import evaluate
13+
from azure.ai.resources.entities import AzureOpenAIModelConfiguration
1314

1415
logger = logging.getLogger(__name__)
1516

@@ -97,6 +98,16 @@ def test_duplicate_metrics_name(self, e2e_openai_api_base, e2e_openai_api_key, e
9798

9899

99100
def test_custom_metrics_name(self, e2e_openai_api_base, e2e_openai_api_key, e2e_openai_completion_deployment_name, tmpdir):
101+
102+
aoai_configuration = AzureOpenAIModelConfiguration(
103+
api_version="2023-03-15-preview",
104+
api_base=e2e_openai_api_base,
105+
api_key=e2e_openai_api_key,
106+
deployment_name=e2e_openai_completion_deployment_name,
107+
model_name=e2e_openai_completion_deployment_name,
108+
model_kwargs=None
109+
)
110+
100111
test_data = [
101112
{"question": "How do you create a run?", "context": "AML API only",
102113
"answer": "To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment."},
@@ -124,13 +135,7 @@ async def answer_length(*, data, **kwargs):
124135
data=test_data,
125136
task_type="qa",
126137
metrics_list=[custom_prompt_metric, answer_length],
127-
model_config={
128-
"api_version": "2023-07-01-preview",
129-
"api_base": e2e_openai_api_base,
130-
"api_type": "azure",
131-
"api_key": e2e_openai_api_key,
132-
"deployment_id": e2e_openai_completion_deployment_name,
133-
},
138+
model_config=aoai_configuration,
134139
data_mapping={
135140
"questions": "question",
136141
"contexts": "context",

sdk/ai/azure-ai-generative/tests/evaluate/e2etests/test_template.jinja2

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,6 @@ question: Where were The Beatles formed?
2121
answer: The band The Beatles began their journey in London, England, and they changed the history of music.
2222
stars: 2
2323

24-
context: The recent Mars rover, Perseverance, was launched in 2020 with the main goal of searching for signs of ancient life on Mars. The rover also carries an experiment called MOXIE, which aims to generate oxygen from the Martian atmosphere.
25-
question: What are the main goals of Perseverance Mars rover mission?
26-
answer: The Perseverance Mars rover mission focuses on searching for signs of ancient life on Mars.
27-
stars: 3
28-
29-
context: The Mediterranean diet is a commonly recommended dietary plan that emphasizes fruits, vegetables, whole grains, legumes, lean proteins, and healthy fats. Studies have shown that it offers numerous health benefits, including a reduced risk of heart disease and improved cognitive health.
30-
question: What are the main components of the Mediterranean diet?
31-
answer: The Mediterranean diet primarily consists of fruits, vegetables, whole grains, and legumes.
32-
stars: 4
33-
34-
context: The Queen's Royal Castle is a well-known tourist attraction in the United Kingdom. It spans over 500 acres and contains extensive gardens and parks. The castle was built in the 15th century and has been home to generations of royalty.
35-
question: What are the main attractions of the Queen's Royal Castle?
36-
answer: The main attractions of the Queen's Royal Castle are its expansive 500-acre grounds, extensive gardens, parks, and the historical castle itself, which dates back to the 15th century and has housed generations of royalty.
37-
stars: 5
38-
3924
context: {{context}}
4025
question: {{question}}
4126
answer: {{answer}}

0 commit comments

Comments
 (0)