Skip to content

Commit b45d4cc

Browse files
Copilotnagkumar91
andauthored
Add is_reasoning_model parameter support to evaluators (#43195)
* Initial plan * Add is_reasoning_model parameter support to evaluators Co-authored-by: nagkumar91 <[email protected]> * Add is_reasoning_model support to RetrievalEvaluator and RelevanceEvaluator Co-authored-by: nagkumar91 <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: nagkumar91 <[email protected]>
1 parent cd35ef3 commit b45d4cc

File tree

7 files changed

+42
-5
lines changed

7 files changed

+42
-5
lines changed

sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
### Features Added
66

77
- AOAI Graders now accept a "credential" parameter that can be used for authentication with an AzureOpenAIModelConfiguration
8+
- Added `is_reasoning_model` parameter support to `CoherenceEvaluator`, `FluencyEvaluator`, `SimilarityEvaluator`, `GroundednessEvaluator`, `RetrievalEvaluator`, and `RelevanceEvaluator` to enable reasoning model configuration for o1/o3 models.
89

910
### Breaking Changes
1011

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_coherence/_coherence.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
2323
~azure.ai.evaluation.OpenAIModelConfiguration]
2424
:param threshold: The threshold for the coherence evaluator. Default is 3.
2525
:type threshold: int
26+
:param credential: The credential for authenticating to Azure AI service.
27+
:type credential: ~azure.core.credentials.TokenCredential
28+
:keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
29+
This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
30+
:paramtype is_reasoning_model: bool
2631
2732
.. admonition:: Example:
2833
@@ -66,7 +71,7 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
6671
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
6772

6873
@override
69-
def __init__(self, model_config, *, threshold=3, credential=None):
74+
def __init__(self, model_config, *, threshold=3, credential=None, **kwargs):
7075
current_dir = os.path.dirname(__file__)
7176
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
7277
self._threshold = threshold
@@ -78,6 +83,7 @@ def __init__(self, model_config, *, threshold=3, credential=None):
7883
threshold=threshold,
7984
credential=credential,
8085
_higher_is_better=self._higher_is_better,
86+
**kwargs,
8187
)
8288

8389
@overload

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_fluency/_fluency.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
2525
~azure.ai.evaluation.OpenAIModelConfiguration]
2626
:param threshold: The threshold for the fluency evaluator. Default is 3.
2727
:type threshold: int
28+
:param credential: The credential for authenticating to Azure AI service.
29+
:type credential: ~azure.core.credentials.TokenCredential
30+
:keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
31+
This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
32+
:paramtype is_reasoning_model: bool
2833
2934
.. admonition:: Example:
3035
@@ -68,7 +73,7 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
6873
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
6974

7075
@override
71-
def __init__(self, model_config, *, credential=None, threshold=3):
76+
def __init__(self, model_config, *, credential=None, threshold=3, **kwargs):
7277
current_dir = os.path.dirname(__file__)
7378
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
7479
self._threshold = threshold
@@ -80,6 +85,7 @@ def __init__(self, model_config, *, credential=None, threshold=3):
8085
threshold=threshold,
8186
credential=credential,
8287
_higher_is_better=self._higher_is_better,
88+
**kwargs,
8389
)
8490

8591
@overload

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
4949
~azure.ai.evaluation.OpenAIModelConfiguration]
5050
:param threshold: The threshold for the groundedness evaluator. Default is 3.
5151
:type threshold: int
52+
:param credential: The credential for authenticating to Azure AI service.
53+
:type credential: ~azure.core.credentials.TokenCredential
54+
:keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
55+
This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
56+
:paramtype is_reasoning_model: bool
5257
5358
.. admonition:: Example:
5459
@@ -106,6 +111,7 @@ def __init__(self, model_config, *, threshold=3, credential=None, **kwargs):
106111
threshold=threshold,
107112
credential=credential,
108113
_higher_is_better=self._higher_is_better,
114+
**kwargs,
109115
)
110116
self._model_config = model_config
111117
self.threshold = threshold

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_relevance/_relevance.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ class RelevanceEvaluator(PromptyEvaluatorBase):
3535
~azure.ai.evaluation.OpenAIModelConfiguration]
3636
:param threshold: The threshold for the relevance evaluator. Default is 3.
3737
:type threshold: int
38+
:param credential: The credential for authenticating to Azure AI service.
39+
:type credential: ~azure.core.credentials.TokenCredential
40+
:keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
41+
This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
42+
:paramtype is_reasoning_model: bool
3843
3944
.. admonition:: Example:
4045
@@ -79,7 +84,7 @@ class RelevanceEvaluator(PromptyEvaluatorBase):
7984
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
8085

8186
@override
82-
def __init__(self, model_config, *, credential=None, threshold=3):
87+
def __init__(self, model_config, *, credential=None, threshold=3, **kwargs):
8388
current_dir = os.path.dirname(__file__)
8489
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
8590
self._threshold = threshold
@@ -91,6 +96,7 @@ def __init__(self, model_config, *, credential=None, threshold=3):
9196
threshold=threshold,
9297
credential=credential,
9398
_higher_is_better=self._higher_is_better,
99+
**kwargs,
94100
)
95101

96102
@overload

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ class RetrievalEvaluator(PromptyEvaluatorBase[Union[str, float]]):
3333
~azure.ai.evaluation.OpenAIModelConfiguration]
3434
:param threshold: The threshold for the evaluation. Default is 3.
3535
:type threshold: float
36+
:param credential: The credential for authenticating to Azure AI service.
37+
:type credential: ~azure.core.credentials.TokenCredential
38+
:keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
39+
This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
40+
:paramtype is_reasoning_model: bool
3641
:return: A function that evaluates and generates metrics for "chat" scenario.
3742
:rtype: Callable
3843
@@ -78,7 +83,7 @@ class RetrievalEvaluator(PromptyEvaluatorBase[Union[str, float]]):
7883
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
7984

8085
@override
81-
def __init__(self, model_config, *, threshold: float = 3, credential=None):
86+
def __init__(self, model_config, *, threshold: float = 3, credential=None, **kwargs):
8287
current_dir = os.path.dirname(__file__)
8388
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
8489
self._threshold = threshold
@@ -90,6 +95,7 @@ def __init__(self, model_config, *, threshold: float = 3, credential=None):
9095
threshold=threshold,
9196
credential=credential,
9297
_higher_is_better=self._higher_is_better,
98+
**kwargs,
9399
)
94100

95101
@overload

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ class SimilarityEvaluator(PromptyEvaluatorBase):
3030
~azure.ai.evaluation.OpenAIModelConfiguration]
3131
:param threshold: The threshold for the similarity evaluator. Default is 3.
3232
:type threshold: int
33+
:param credential: The credential for authenticating to Azure AI service.
34+
:type credential: ~azure.core.credentials.TokenCredential
35+
:keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
36+
This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
37+
:paramtype is_reasoning_model: bool
3338
3439
.. admonition:: Example:
3540
@@ -75,7 +80,7 @@ class SimilarityEvaluator(PromptyEvaluatorBase):
7580
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
7681

7782
@override
78-
def __init__(self, model_config, *, threshold=3, credential=None):
83+
def __init__(self, model_config, *, threshold=3, credential=None, **kwargs):
7984
current_dir = os.path.dirname(__file__)
8085
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
8186
self._threshold = threshold
@@ -87,6 +92,7 @@ def __init__(self, model_config, *, threshold=3, credential=None):
8792
threshold=threshold,
8893
credential=credential,
8994
_higher_is_better=self._higher_is_better,
95+
**kwargs,
9096
)
9197

9298
# Ignoring a mypy error about having only 1 overload function.

0 commit comments

Comments
 (0)