Skip to content

Commit 2765bd7

Browse files
authored
fix evaluation conversation handling (Azure#38043)
* fix evaluation conversation handling * recodings
1 parent 1861870 commit 2765bd7

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

sdk/evaluation/azure-ai-evaluation/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/evaluation/azure-ai-evaluation",
5-
"Tag": "python/evaluation/azure-ai-evaluation_73f2254a1c"
5+
"Tag": "python/evaluation/azure-ai-evaluation_1390701e9d"
66
}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,16 +172,16 @@ def converter(conversation: Dict) -> List[DerivedEvalInput]:
172172
response_context = response.get("context", None)
173173
if global_context:
174174
context["global_context"] = global_context
175-
if query_context and not include_query:
175+
if query_context and include_query:
176176
context["query_context"] = query_context
177-
if response_context and not include_response:
177+
if response_context and include_response:
178178
context["response_context"] = response_context
179179

180180
eval_input: DerivedEvalInput = {}
181181
if include_query:
182-
eval_input["query"] = query
182+
eval_input["query"] = query.get("content", "")
183183
if include_response:
184-
eval_input["response"] = response
184+
eval_input["response"] = response.get("content", "")
185185
if include_context:
186186
eval_input["context"] = str(context)
187187
eval_inputs.append(eval_input)
@@ -274,7 +274,6 @@ def _aggregate_results(self, per_turn_results: List[DoEvalResult[T_EvalValue]])
274274
aggregated[metric] = list_mean(cast(List[Union[int, float]], values))
275275
# Slap the per-turn results back in.
276276
aggregated["evaluation_per_turn"] = evaluation_per_turn
277-
278277
return aggregated
279278

280279
async def _real_call(self, **kwargs) -> Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]:
@@ -315,7 +314,7 @@ def __init__(self, real_call): # DO NOT ADD TYPEHINT PROMPT FLOW WILL SCREAM AT
315314

316315
# Don't look at my shame. Nothing to see here....
317316
# Oh, you're still here? Ok, the reason this has such a gross call signature and behavior is due
318-
# to our broken async code not properly handling inputs; keyword arguments that aren't in the signature#
317+
# to our broken async code not properly handling inputs; keyword arguments that aren't in the signature
319318
# are just not passed into this function instead of ending up in kwargs.
320319
# Since we want this to be relatively call-agnostic, we just account for every input that any children
321320
# are known to throw at this, mash them into kwargs, and then pass them into the real call.

0 commit comments

Comments
 (0)