Skip to content

Commit bb3b3e2

Browse files
authored
Change Model Configurations to TypedDict (#37429)
* remove public evaluators and evaluate modules * rename synthetic to simulator and expose only one namespace * clean up some references * fix some broken imports * add details on breaking change * fix changelog grammar issue * fix changelog grammar issue * attempt at fixing tests * change patch * disable verifytypes * start work on adding model config classes * add model configuration classes * change type hint for azure_ai_project to AzureAIProject typeddict * convert model configs to dictionaries * update readme and changelog * fix spell check issue
1 parent bd2d9b2 commit bb3b3e2

File tree

33 files changed

+249
-154
lines changed

33 files changed

+249
-154
lines changed

sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77
- The `synthetic` namespace has been renamed to `simulator`, and sub-namespaces under this module have been removed
88
- The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
99
- The parameter name `project_scope` in content safety evaluators have been renamed to `azure_ai_project` for consistency with evaluate API and simulators.
10+
- Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
1011
- Updated the parameter names for `question` and `answer` in built-in evaluators to more generic terms: `query` and `response`.
1112

1213

1314
### Features Added
1415

1516
- First preview
1617
- This package is port of `promptflow-evals`. New features will be added only to this package moving forward.
18+
- Added a `TypedDict` for `AzureAIProject` that allows for better intellisense and type checking when passing in project information

sdk/evaluation/azure-ai-evaluation/README.md

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ Install the Azure AI Evaluation library for Python with:
88

99
```bash
1010
pip install azure-ai-evaluation
11-
pip install azure-identity
1211
```
1312

1413
## Key concepts
@@ -23,8 +22,6 @@ Users can create evaluator runs on the local machine as shown in the example bel
2322
import os
2423
from pprint import pprint
2524

26-
from promptflow.core import AzureOpenAIModelConfiguration
27-
2825
from azure.ai.evaluation import evaluate, RelevanceEvaluator, ViolenceEvaluator
2926

3027

@@ -34,12 +31,12 @@ def response_length(response, **kwargs):
3431

3532
if __name__ == "__main__":
3633
# Built-in evaluators
37-
# Initialize Azure OpenAI Connection
38-
model_config = AzureOpenAIModelConfiguration(
39-
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
40-
api_key=os.environ.get("AZURE_OPENAI_KEY"),
41-
azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
42-
)
34+
# Initialize Azure OpenAI Model Configuration
35+
model_config = {
36+
"azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
37+
"api_key": os.environ.get("AZURE_OPENAI_KEY"),
38+
"azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
39+
}
4340

4441
# Initialzing Relevance Evaluator
4542
relevance_eval = RelevanceEvaluator(model_config)
@@ -91,8 +88,8 @@ if __name__ == "__main__":
9188
pprint(result)
9289
```
9390

94-
Simulator expects the user to have a callback method that invokes their AI application.
95-
Here's a sample of a callback which invokes AsyncAzureOpenAI:
91+
Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
92+
their AI application. Here's a sample of a callback which invokes AsyncAzureOpenAI:
9693

9794
```python
9895
from from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
from ._evaluators._rouge import RougeScoreEvaluator, RougeType
2626
from ._evaluators._similarity import SimilarityEvaluator
2727
from ._evaluators._xpia import IndirectAttackEvaluator
28+
from ._model_configurations import (
29+
AzureAIProject,
30+
AzureOpenAIModelConfiguration,
31+
OpenAIModelConfiguration,
32+
)
2833

2934
__all__ = [
3035
"evaluate",
@@ -49,4 +54,7 @@
4954
"RougeScoreEvaluator",
5055
"RougeType",
5156
"ProtectedMaterialEvaluator",
57+
"AzureAIProject",
58+
"AzureOpenAIModelConfiguration",
59+
"OpenAIModelConfiguration",
5260
]

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from azure.identity import DefaultAzureCredential
1616

1717
from azure.ai.evaluation._http_utils import get_async_http_client
18+
from azure.ai.evaluation._model_configurations import AzureAIProject
1819

1920
from .constants import (
2021
CommonConstants,
@@ -327,11 +328,11 @@ def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -
327328
return result
328329

329330

330-
async def _get_service_discovery_url(azure_ai_project: dict, token: str) -> str:
331+
async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: str) -> str:
331332
"""Get the discovery service URL for the Azure AI project
332333
333334
:param azure_ai_project: The Azure AI project details.
334-
:type azure_ai_project: Dict
335+
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
335336
:param token: The Azure authentication token.
336337
:type token: str
337338
:return: The discovery service URL.
@@ -410,7 +411,7 @@ async def fetch_or_reuse_token(credential: TokenCredential, token: str = None) -
410411

411412

412413
async def evaluate_with_rai_service(
413-
query: str, response: str, metric_name: str, project_scope: dict, credential: TokenCredential
414+
query: str, response: str, metric_name: str, project_scope: AzureAIProject, credential: TokenCredential
414415
):
415416
""" "Evaluate the content safety of the response using Responsible AI service
416417

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# ---------------------------------------------------------
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
4+
5+
from typing import Optional, Union
6+
7+
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
8+
49
try:
510
from . import constants
611
except ImportError:
@@ -54,3 +59,22 @@ def nltk_tokenize(text: str) -> List[str]:
5459
tokens = nltk.word_tokenize(text)
5560

5661
return list(tokens)
62+
63+
64+
def check_and_add_api_version_for_aoai_model_config(
65+
model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
66+
default_api_version: str,
67+
) -> None:
68+
if (
69+
"azure_endpoint" in model_config or "azure_deployment" in model_config
70+
):
71+
model_config["api_version"] = model_config.get("api_version", default_api_version)
72+
73+
74+
def check_and_add_user_agent_for_aoai_model_config(
75+
model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
76+
prompty_model_config: dict,
77+
user_agent: Optional[str] = None,
78+
) -> None:
79+
if user_agent and ("azure_endpoint" in model_config or "azure_deployment" in model_config):
80+
prompty_model_config["parameters"]["extra_headers"].update({"x-ms-useragent": user_agent})

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from promptflow._sdk._constants import LINE_NUMBER
1313
from promptflow.client import PFClient
1414

15+
from .._model_configurations import AzureAIProject
1516
from .._constants import (
1617
CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT,
1718
EvaluationMetrics,
@@ -360,7 +361,7 @@ def evaluate(
360361
data: Optional[str] = None,
361362
evaluators: Optional[Dict[str, Callable]] = None,
362363
evaluator_config: Optional[Dict[str, Dict[str, str]]] = None,
363-
azure_ai_project: Optional[Dict] = None,
364+
azure_ai_project: Optional[AzureAIProject] = None,
364365
output_path: Optional[str] = None,
365366
**kwargs,
366367
):
@@ -386,7 +387,7 @@ def evaluate(
386387
the results will be saved to a file named `evaluation_results.json` in the folder.
387388
:paramtype output_path: Optional[str]
388389
:keyword azure_ai_project: Logs evaluation results to AI Studio if set.
389-
:paramtype azure_ai_project: Optional[Dict]
390+
:paramtype azure_ai_project: Optional[~azure.ai.evaluation.AzureAIProject]
390391
:return: Evaluation results.
391392
:rtype: dict
392393
@@ -396,15 +397,14 @@ def evaluate(
396397
397398
.. code-block:: python
398399
399-
from promptflow.core import AzureOpenAIModelConfiguration
400400
from azure.ai.evaluation import evaluate, RelevanceEvaluator, CoherenceEvaluator
401401
402402
403-
model_config = AzureOpenAIModelConfiguration(
404-
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
405-
api_key=os.environ.get("AZURE_OPENAI_KEY"),
406-
azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT")
407-
)
403+
model_config = {
404+
"azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
405+
"api_key": os.environ.get("AZURE_OPENAI_KEY"),
406+
"azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT")
407+
}
408408
409409
coherence_eval = CoherenceEvaluator(model_config=model_config)
410410
relevance_eval = RelevanceEvaluator(model_config=model_config)
@@ -466,7 +466,7 @@ def _evaluate( # pylint: disable=too-many-locals
466466
data: Optional[str] = None,
467467
evaluators: Optional[Dict[str, Callable]] = None,
468468
evaluator_config: Optional[Dict[str, Dict[str, str]]] = None,
469-
azure_ai_project: Optional[Dict] = None,
469+
azure_ai_project: Optional[AzureAIProject] = None,
470470
output_path: Optional[str] = None,
471471
**kwargs,
472472
):

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_chat/_chat.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88

99
import numpy as np
1010

11-
from promptflow.core import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
1211
from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
1312

1413
from .._coherence import CoherenceEvaluator
1514
from .._fluency import FluencyEvaluator
1615
from .._groundedness import GroundednessEvaluator
16+
from ..._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
1717
from .._relevance import RelevanceEvaluator
1818
from .retrieval import RetrievalChatEvaluator
1919

@@ -25,8 +25,8 @@ class ChatEvaluator:
2525
Initialize a chat evaluator configured for a specific Azure OpenAI model.
2626
2727
:param model_config: Configuration for the Azure OpenAI model.
28-
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
29-
~promptflow.core.OpenAIModelConfiguration]
28+
:type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
29+
~azure.ai.evaluation.OpenAIModelConfiguration]
3030
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
3131
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
3232
:type eval_last_turn: bool
@@ -46,7 +46,7 @@ class ChatEvaluator:
4646
{"role": "assistant", "content": "2 + 2 = 4", "context": {
4747
"citations": [
4848
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
49-
]
49+
]
5050
}
5151
}
5252
]
@@ -74,7 +74,7 @@ class ChatEvaluator:
7474

7575
def __init__(
7676
self,
77-
model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
77+
model_config: dict,
7878
eval_last_turn: bool = False,
7979
parallel: bool = True,
8080
):

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,13 @@
1111
import numpy as np
1212

1313
from promptflow._utils.async_utils import async_run_allowing_running_loop
14-
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration, OpenAIModelConfiguration
14+
from promptflow.core import AsyncPrompty
15+
16+
from ...._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
17+
from ...._common.utils import (
18+
check_and_add_api_version_for_aoai_model_config,
19+
check_and_add_user_agent_for_aoai_model_config,
20+
)
1521

1622
logger = logging.getLogger(__name__)
1723

@@ -27,21 +33,20 @@ class _AsyncRetrievalChatEvaluator:
2733
LLM_CALL_TIMEOUT = 600
2834
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
2935

30-
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
31-
if (
32-
isinstance(model_config, AzureOpenAIModelConfiguration)
33-
and (not hasattr(model_config, "api_version") or model_config.api_version) is None
34-
):
35-
model_config.api_version = self.DEFAULT_OPEN_API_VERSION
36+
def __init__(self, model_config: dict):
37+
check_and_add_api_version_for_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
3638

3739
prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
3840

3941
# Handle "RuntimeError: Event loop is closed" from httpx AsyncClient
4042
# https://github.com/encode/httpx/discussions/2959
4143
prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
4244

43-
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration):
44-
prompty_model_config["parameters"]["extra_headers"].update({"x-ms-useragent": USER_AGENT})
45+
check_and_add_user_agent_for_aoai_model_config(
46+
model_config,
47+
prompty_model_config,
48+
USER_AGENT,
49+
)
4550

4651
current_dir = os.path.dirname(__file__)
4752
prompty_path = os.path.join(current_dir, self.PROMPTY_FILE)
@@ -107,8 +112,8 @@ class RetrievalChatEvaluator:
107112
Initialize an evaluator configured for a specific Azure OpenAI model.
108113
109114
:param model_config: Configuration for the Azure OpenAI model.
110-
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
111-
~promptflow.core.OpenAIModelConfiguration]
115+
:type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
116+
~azure.ai.evaluation.OpenAIModelConfiguration]
112117
:return: A function that evaluates and generates metrics for "chat" scenario.
113118
:rtype: Callable
114119
**Usage**
@@ -141,7 +146,7 @@ class RetrievalChatEvaluator:
141146
}
142147
"""
143148

144-
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
149+
def __init__(self, model_config: dict):
145150
self._async_evaluator = _AsyncRetrievalChatEvaluator(model_config)
146151

147152
def __call__(self, *, conversation, **kwargs):

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_coherence/_coherence.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
import numpy as np
1010

1111
from promptflow._utils.async_utils import async_run_allowing_running_loop
12-
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration, OpenAIModelConfiguration
12+
from promptflow.core import AsyncPrompty
13+
14+
from ..._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
15+
from ..._common.utils import (
16+
check_and_add_api_version_for_aoai_model_config,
17+
check_and_add_user_agent_for_aoai_model_config,
18+
)
1319

1420
try:
1521
from ..._user_agent import USER_AGENT
@@ -23,21 +29,20 @@ class _AsyncCoherenceEvaluator:
2329
LLM_CALL_TIMEOUT = 600
2430
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
2531

26-
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
27-
if (
28-
isinstance(model_config, AzureOpenAIModelConfiguration)
29-
and (not hasattr(model_config, "api_version") or model_config.api_version) is None
30-
):
31-
model_config.api_version = self.DEFAULT_OPEN_API_VERSION
32+
def __init__(self, model_config: dict):
33+
check_and_add_api_version_for_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
3234

3335
prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
3436

3537
# Handle "RuntimeError: Event loop is closed" from httpx AsyncClient
3638
# https://github.com/encode/httpx/discussions/2959
3739
prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
3840

39-
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration):
40-
prompty_model_config["parameters"]["extra_headers"].update({"x-ms-useragent": USER_AGENT})
41+
check_and_add_user_agent_for_aoai_model_config(
42+
model_config,
43+
prompty_model_config,
44+
USER_AGENT,
45+
)
4146

4247
current_dir = os.path.dirname(__file__)
4348
prompty_path = os.path.join(current_dir, self.PROMPTY_FILE)
@@ -68,8 +73,8 @@ class CoherenceEvaluator:
6873
Initialize a coherence evaluator configured for a specific Azure OpenAI model.
6974
7075
:param model_config: Configuration for the Azure OpenAI model.
71-
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
72-
~promptflow.core.OpenAIModelConfiguration]
76+
:type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
77+
~azure.ai.evaluation.OpenAIModelConfiguration]
7378
7479
**Usage**
7580
@@ -89,7 +94,7 @@ class CoherenceEvaluator:
8994
}
9095
"""
9196

92-
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
97+
def __init__(self, model_config: dict):
9398
self._async_evaluator = _AsyncCoherenceEvaluator(model_config)
9499

95100
def __call__(self, *, query: str, response: str, **kwargs):

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
77

8+
from azure.ai.evaluation._model_configurations import AzureAIProject
9+
810
try:
911
from ._hate_unfairness import HateUnfairnessEvaluator
1012
from ._self_harm import SelfHarmEvaluator
@@ -23,7 +25,7 @@ class ContentSafetyEvaluator:
2325
2426
:param azure_ai_project: The scope of the Azure AI project.
2527
It contains subscription id, resource group, and project name.
26-
:type azure_ai_project: dict
28+
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
2729
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
2830
Default is True.
2931
:param credential: The credential for connecting to Azure AI project.

0 commit comments

Comments
 (0)