-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Reasoning support for evaluators #42482
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4318329
192b980
758adb4
de09fd1
ef60fe6
8ca51d0
98bfc3a
a5f32e8
5fd88b6
51f2b44
a5be8b5
75965b7
d0c5e53
b790276
d5ca243
8d62e36
59a70f2
4d146d7
f7a4c83
79e3a40
588cbec
7514472
28b2513
8603e0e
895f226
b4b2daf
023f07f
45b5f5d
1ccb4db
6fd9aa5
f871855
59ac230
794a2c4
b33363c
464e2dd
4585b14
89c2988
6805018
aad48df
db75552
b8eebf3
2899ad4
c431563
79ed63c
a3be3fc
056ac4d
1779059
43fecff
b0c102b
7bf5f1f
3248ad0
d76f59b
4d60e43
98d1626
9248c38
74b760f
23dbc85
467ccb6
c2beee8
be9a19a
de3a1e1
f9faa61
69e783a
8ebea2a
3f9c818
3b3159c
d78b834
ae3fc52
19cce75
e59ca7f
98b4618
706c042
c418513
86f24ba
a1e55b4
bd6809f
3ae37cb
6b8d4ce
733ee1a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,7 +1,9 @@ | ||||||
# --------------------------------------------------------- | ||||||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||||||
# --------------------------------------------------------- | ||||||
import os, logging | ||||||
import os | ||||||
import logging | ||||||
from inspect import signature | ||||||
from typing import Dict, List, Optional, Union | ||||||
|
||||||
from typing_extensions import overload, override | ||||||
|
@@ -49,6 +51,9 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]): | |||||
~azure.ai.evaluation.OpenAIModelConfiguration] | ||||||
:param threshold: The threshold for the groundedness evaluator. Default is 3. | ||||||
:type threshold: int | ||||||
:keyword is_reasoning_model: (Preview) config for chat completions is | ||||||
updated to use reasoning models | ||||||
:type is_reasoning_model: bool | ||||||
|
||||||
.. admonition:: Example: | ||||||
|
||||||
|
@@ -105,10 +110,16 @@ def __init__(self, model_config, *, threshold=3, **kwargs): | |||||
result_key=self._RESULT_KEY, | ||||||
threshold=threshold, | ||||||
_higher_is_better=self._higher_is_better, | ||||||
**kwargs, | ||||||
) | ||||||
self._model_config = model_config | ||||||
self.threshold = threshold | ||||||
# Needs to be set because it's used in call method to re-validate prompt if `query` is provided | ||||||
|
||||||
# Cache whether AsyncPrompty.load supports the is_reasoning_model parameter. | ||||||
try: | ||||||
self._has_is_reasoning_model_param: bool = "is_reasoning_model" in signature(AsyncPrompty.load).parameters | ||||||
except Exception: # Very defensive: if inspect fails, assume not supported | ||||||
self._has_is_reasoning_model_param = False | ||||||
|
||||||
@overload | ||||||
def __call__( | ||||||
|
@@ -202,7 +213,18 @@ def __call__( # pylint: disable=docstring-missing-param | |||||
self._DEFAULT_OPEN_API_VERSION, | ||||||
UserAgentSingleton().value, | ||||||
) | ||||||
self._flow = AsyncPrompty.load(source=self._prompty_file, model=prompty_model_config) | ||||||
|
||||||
if self._has_is_reasoning_model_param: | ||||||
self._flow = AsyncPrompty.load( | ||||||
source=self._prompty_file, | ||||||
model=prompty_model_config, | ||||||
is_reasoning_model=self._is_reasoning_model, | ||||||
) | ||||||
else: | ||||||
self._flow = AsyncPrompty.load( | ||||||
source=self._prompty_file, | ||||||
model=prompty_model_config, | ||||||
) | ||||||
|
||||||
return super().__call__(*args, **kwargs) | ||||||
|
||||||
|
@@ -282,4 +304,4 @@ def _get_context_from_agent_response(self, response, tool_definitions): | |||||
logger.debug(f"Error extracting context from agent response : {str(ex)}") | ||||||
context = "" | ||||||
|
||||||
return context if context else None | ||||||
return context | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The function
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The docstring is missing a comma. It should read 'Evaluator identifier, experimental, to be used only with cloud evaluation' or 'Evaluator identifier (experimental) to be used only with cloud evaluation'.
Copilot uses AI. Check for mistakes.