2525class OAIEvalRunCreationInfo (TypedDict , total = True ):
2626 """Configuration for an evaluator"""
2727
28+ client : Union [AzureOpenAI , OpenAI ]
2829 eval_group_id : str
2930 eval_run_id : str
3031 grader_name_map : Dict [str , str ]
@@ -55,7 +56,6 @@ def _split_evaluators_and_grader_configs(
5556
5657@experimental
5758def _begin_aoai_evaluation (
58- client : Union [OpenAI , AzureOpenAI ],
5959 graders : Dict [str , AzureOpenAIGrader ],
6060 column_mappings : Optional [Dict [str , Dict [str , str ]]],
6161 data : pd .DataFrame ,
@@ -88,11 +88,8 @@ def _begin_aoai_evaluation(
8888 LOGGER .info ("AOAI: Aoai graders detected among evaluator inputs. Preparing to create OAI eval group..." )
8989 all_eval_run_info : List [OAIEvalRunCreationInfo ] = []
9090
91- if len (all_eval_run_info ) > 1 :
92- LOGGER .info ("AOAI: Grader-specific column mappings detected. Splitting up evaluation runs to avoid conflicts..." )
9391 for selected_graders , selected_column_mapping in _get_graders_and_column_mappings (graders , column_mappings ):
9492 all_eval_run_info .append (_begin_single_aoai_evaluation (
95- client ,
9693 selected_graders ,
9794 data ,
9895 selected_column_mapping ,
@@ -102,7 +99,6 @@ def _begin_aoai_evaluation(
10299 return all_eval_run_info
103100
104101def _begin_single_aoai_evaluation (
105- client : Union [OpenAI , AzureOpenAI ],
106102 graders : Dict [str , AzureOpenAIGrader ],
107103 data : pd .DataFrame ,
108104 column_mapping : Dict [str , str ],
@@ -113,8 +109,6 @@ def _begin_single_aoai_evaluation(
113109 AOAI evaluation runs must be queried for completion, so this returns a poller to accomplish that task
114110 at a later time.
115111
116- :param client: The AOAI client to use for the evaluation.
117- :type client: Union[OpenAI, AzureOpenAI]
118112 :param graders: The graders to use for the evaluation. Should be a dictionary of string to AOAIGrader.
119113 :type graders: Dict[str, AoaiGrader]
120114 :param data_source_config: The data source configuration to apply to the
@@ -129,7 +123,10 @@ def _begin_single_aoai_evaluation(
129123 # Format data for eval group creation
130124 grader_name_list = []
131125 grader_list = []
132-
126+ # It's expected that all graders supplied for a single eval run use the same credentials
127+ # so grab a client from the first grader.
128+ client = list (graders .values ())[0 ].get_client ()
129+
133130 for name , grader in graders .items ():
134131 grader_name_list .append (name )
135132 grader_list .append (grader .get_grader_config ())
@@ -163,7 +160,7 @@ def _begin_single_aoai_evaluation(
163160 LOGGER .info (f"AOAI: Eval run created with id { eval_run_id } ." +
164161 " Results will be retrieved after normal evaluation is complete..." )
165162
166- return OAIEvalRunCreationInfo (eval_group_id = eval_group_info .id , eval_run_id = eval_run_id , grader_name_map = grader_name_map )
163+ return OAIEvalRunCreationInfo (client = client , eval_group_id = eval_group_info .id , eval_run_id = eval_run_id , grader_name_map = grader_name_map )
167164
168165def _get_evaluation_run_results (
169166 client : Union [OpenAI , AzureOpenAI ],
@@ -174,8 +171,6 @@ def _get_evaluation_run_results(
174171 pipeline to consume. This method accepts a list of eval run information, and will combine the
175172 results into a single dataframe and metrics dictionary.
176173
177- :param client: The AOAI client to use for the evaluation.
178- :type client: Union[OpenAI, AzureOpenAI]
179174 :param all_run_info: A list of evaluation run information that contains the needed values
180175 to retrieve the results of the evaluation run.
181176 :type all_run_info: List[OAIEvalRunCreationInfo]
@@ -188,25 +183,19 @@ def _get_evaluation_run_results(
188183 run_metrics = {}
189184 output_df = pd .DataFrame ()
190185 for run_info in all_run_info :
191- cur_output_df , cur_run_metrics = _get_single_run_results (
192- client ,
193- run_info
194- )
186+ cur_output_df , cur_run_metrics = _get_single_run_results (run_info )
195187 output_df = pd .concat ([output_df , cur_output_df ], axis = 1 )
196188 run_metrics .update (cur_run_metrics )
197189
198190 return output_df , run_metrics
199191
200192def _get_single_run_results (
201- client : Union [OpenAI , AzureOpenAI ],
202193 run_info : OAIEvalRunCreationInfo ,
203194 ) -> Tuple [pd .DataFrame , Dict [str , Any ]]:
204195 """
205196 Get the results of an OAI evaluation run, formatted in a way that is easy for the rest of the evaluation
206197 pipeline to consume.
207198
208- :param client: The AOAI client to use for the evaluation.
209- :type client: Union[OpenAI, AzureOpenAI]
210199 :param run_info: The evaluation run information that contains the needed values
211200 to retrieve the results of the evaluation run.
212201 :type run_info: OAIEvalRunCreationInfo
@@ -216,7 +205,7 @@ def _get_single_run_results(
216205 :raises EvaluationException: If the evaluation run fails or is not completed before timing out.
217206 """
218207 # Wait for evaluation run to complete
219- run_results = _wait_for_run_conclusion (client , run_info ["eval_group_id" ], run_info ["eval_run_id" ])
208+ run_results = _wait_for_run_conclusion (run_info [ " client" ] , run_info ["eval_group_id" ], run_info ["eval_run_id" ])
220209 if run_results .status != "completed" :
221210 raise EvaluationException (
222211 message = f"AOAI evaluation run { run_info ['eval_group_id' ]} /{ run_info ['eval_run_id' ]} "
@@ -248,7 +237,7 @@ def _get_single_run_results(
248237 # The passed and score values are then added to the results dictionary, prepended with the grader's name
249238 # as entered by the user in the inputted dictionary.
250239 # Other values, if they exist, are also added to the results dictionary.
251- raw_list_results = client .evals .runs .output_items .list (
240+ raw_list_results = run_info [ " client" ] .evals .runs .output_items .list (
252241 eval_id = run_info ["eval_group_id" ],
253242 run_id = run_info ["eval_run_id" ]
254243 )
@@ -273,41 +262,6 @@ def _get_single_run_results(
273262
274263 return output_df , run_metrics
275264
276- def _are_individual_runs_needed (
277- graders : Dict [str , AzureOpenAIGrader ],
278- column_mapping : Optional [Dict [str , str ]] = None
279- ) -> bool :
280- """
281- Given an input set of graders and their column mapping, determine if
282- the graders can be executed together under a single evaluation run,
283- or if they must be handled individually.
284-
285- For simplicity's sake, the individual run condition is met if there are at least
286- two graders, and if any of them have a unique column mapping.
287-
288- This is done to avoid the possibility of conflicting mappings, since OAI requires
289- unique input name assignments to each evaluation group.
290-
291- :param graders: The graders to use for the evaluation. Should be a dictionary of string to AOAIGrader.
292- :type graders: Dict[str, AoaiGrader]
293- :param column_mapping: The column mapping to check.
294- :type column_mapping: Optional[Dict[str, str]]
295- :return: True if the graders require individual runs, False otherwise.
296- :rtype: bool
297- """
298- if len (graders ) < 2 :
299- # Only one grader, no need for individual runs.
300- return False
301- if column_mapping is None :
302- # No column mapping provided, no need for individual runs.
303- return False
304- # Check if any of the graders have a unique column mapping.
305- for name in graders .keys ():
306- if name in column_mapping :
307- # Grader with a unique column mapping found. Individual runs are needed.
308- return True
309- return False
310-
311265
312266def _convert_remote_eval_params_to_grader (grader_id : str , init_params : Dict [str , Any ]) -> AzureOpenAIGrader :
313267 """
@@ -380,6 +334,10 @@ def _get_graders_and_column_mappings(
380334 the OAI API can't. So, if if there's a possibility that such a conflict might arise,
381335 we need to split the incoming data up.
382336
337+ Currently splits each grader into its own eval group/run to ensure they each use
338+ their own credentials later on. Planned fast follow is to group things by
339+ matching credentials later.
340+
383341 :param graders: The graders to use for the evaluation. Should be a dictionary of string to AOAIGrader.
384342 :type graders: Dict[str, AoaiGrader]
385343 :param column_mappings: The column mappings to use for the evaluation.
@@ -388,15 +346,9 @@ def _get_graders_and_column_mappings(
388346 and the column mapping they should use.
389347 :rtype: List[Tuple[Dict[str, AoaiGrader], Optional[Dict[str, str]]]]
390348 """
391- if column_mappings is None :
392- # No column mappings provided, no need to split.
393- return [(graders , None )]
349+
394350 default_mapping = column_mappings .get ("default" , None )
395- if not any (name in column_mappings for name in graders .keys ()):
396- # No unique column mappings provided, no need to split.
397- return [(graders , default_mapping )]
398- # At least one grader has a unique column mapping, split graders up
399- return [({name :grader }, column_mappings .get (name , default_mapping )) for name , grader in graders .items ()]
351+ return [({name : grader }, column_mappings .get (name , default_mapping )) for name , grader in graders .items ()]
400352
401353def _generate_data_source_config (input_data_df : pd .DataFrame , column_mapping : Dict [str , str ]) -> Dict [str , Any ]:
402354 """Produce a data source config that maps all columns from the supplied data source into
@@ -499,7 +451,6 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
499451 }
500452 }
501453
502-
503454def _begin_eval_run (
504455 client : Union [OpenAI , AzureOpenAI ],
505456 eval_group_id : str ,
0 commit comments