1919 Prefixes ,
2020 _InternalEvaluationMetrics ,
2121)
22- from .._model_configurations import AzureAIProject
22+ from .._model_configurations import AzureAIProject , EvaluatorConfig
2323from .._user_agent import USER_AGENT
2424from ._batch_run_client import BatchRunContext , CodeClient , ProxyClient
2525from ._utils import (
@@ -273,7 +273,7 @@ def _validate_columns(
273273 df : pd .DataFrame ,
274274 evaluators : Dict [str , Any ],
275275 target : Optional [Callable ],
276- evaluator_config : Dict [str , Dict [str , str ]],
276+ column_mapping : Dict [str , Dict [str , str ]],
277277) -> None :
278278 """
279279 Check that all columns needed by evaluator or target function are present.
@@ -284,8 +284,8 @@ def _validate_columns(
284284 :type evaluators: Dict[str, Any]
285285 :param target: The callable to be applied to data set.
286286 :type target: Optional[Callable]
287- :param evaluator_config: The configuration for evaluators.
288- :type evaluator_config : Dict[str, Dict[str, str]]
287+ :param column_mapping: Dictionary mapping evaluator name to evaluator column mapping
288+ :type column_mapping : Dict[str, Dict[str, str]]
289289 :raises EvaluationException: If column starts from "__outputs." while target is defined.
290290 """
291291 if target :
@@ -306,7 +306,7 @@ def _validate_columns(
306306 else :
307307 for evaluator_name , evaluator in evaluators .items ():
308308 # Apply column mapping
309- mapping_config = evaluator_config .get (evaluator_name , evaluator_config .get ("default" , None ))
309+ mapping_config = column_mapping .get (evaluator_name , column_mapping .get ("default" , None ))
310310 new_df = _apply_column_mapping (df , mapping_config )
311311
312312 # Validate input data for evaluator
@@ -372,11 +372,11 @@ def _apply_target_to_data(
372372 return target_output , generated_columns , run
373373
374374
375- def _process_evaluator_config ( evaluator_config : Dict [str , Dict [str , str ]]) -> Dict [str , Dict [str , str ]]:
376- """Process evaluator_config to replace ${target.} with ${data.}
375+ def _process_column_mappings ( column_mapping : Dict [str , Dict [str , str ]]) -> Dict [str , Dict [str , str ]]:
376+ """Process column_mapping to replace ${target.} with ${data.}
377377
378- :param evaluator_config : The configuration for evaluators.
379- :type evaluator_config : Dict[str, Dict[str, str]]
378+ :param column_mapping : The configuration for evaluators.
379+ :type column_mapping : Dict[str, Dict[str, str]]
380380 :return: The processed configuration.
381381 :rtype: Dict[str, Dict[str, str]]
382382 """
@@ -385,15 +385,15 @@ def _process_evaluator_config(evaluator_config: Dict[str, Dict[str, str]]) -> Di
385385
386386 unexpected_references = re .compile (r"\${(?!target\.|data\.).+?}" )
387387
388- if evaluator_config :
389- for evaluator , mapping_config in evaluator_config .items ():
388+ if column_mapping :
389+ for evaluator , mapping_config in column_mapping .items ():
390390 if isinstance (mapping_config , dict ):
391391 processed_config [evaluator ] = {}
392392
393393 for map_to_key , map_value in mapping_config .items ():
394394 # Check if there's any unexpected reference other than ${target.} or ${data.}
395395 if unexpected_references .search (map_value ):
396- msg = "Unexpected references detected in 'evaluator_config '. Ensure only ${target.} and ${data.} are used."
396+ msg = "Unexpected references detected in 'column_mapping '. Ensure only ${target.} and ${data.} are used."
397397 raise EvaluationException (
398398 message = msg ,
399399 internal_message = msg ,
@@ -439,7 +439,7 @@ def evaluate(
439439 evaluators : Dict [str , Callable ],
440440 evaluation_name : Optional [str ] = None ,
441441 target : Optional [Callable ] = None ,
442- evaluator_config : Optional [Dict [str , Dict [ str , str ] ]] = None ,
442+ evaluator_config : Optional [Dict [str , EvaluatorConfig ]] = None ,
443443 azure_ai_project : Optional [AzureAIProject ] = None ,
444444 output_path : Optional [str ] = None ,
445445 ** kwargs ,
@@ -458,10 +458,10 @@ def evaluate(
458458 :keyword target: Target to be evaluated. `target` and `data` both cannot be None
459459 :paramtype target: Optional[Callable]
460460 :keyword evaluator_config: Configuration for evaluators. The configuration should be a dictionary with evaluator
461- names as keys and a dictionary of column mappings as values . The column mappings should be a dictionary with
462- keys as the column names in the evaluator input and values as the column names in the input data or data
463- generated by target.
464- :paramtype evaluator_config: Optional[Dict[str, Dict[str, str ]]
461+ names as keys and a values that are dictionaries containing the column mappings . The column mappings should
462+ be a dictionary with keys as the column names in the evaluator input and values as the column names in the
463+ input data or data generated by target.
464+ :paramtype evaluator_config: Optional[Dict[str, ~azure.ai.evaluation.EvaluatorConfig ]]
465465 :keyword output_path: The local folder or file path to save evaluation results to if set. If folder path is provided
466466 the results will be saved to a file named `evaluation_results.json` in the folder.
467467 :paramtype output_path: Optional[str]
@@ -482,7 +482,7 @@ def evaluate(
482482 model_config = {
483483 "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
484484 "api_key": os.environ.get("AZURE_OPENAI_KEY"),
485- "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT")
485+ "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
486486 }
487487
488488 coherence_eval = CoherenceEvaluator(model_config=model_config)
@@ -497,15 +497,19 @@ def evaluate(
497497 },
498498 evaluator_config={
499499 "coherence": {
500- "response": "${data.response}",
501- "query": "${data.query}"
500+ "column_mapping": {
501+ "response": "${data.response}",
502+ "query": "${data.query}",
503+ },
502504 },
503505 "relevance": {
504- "response": "${data.response}",
505- "context": "${data.context}",
506- "query": "${data.query}"
507- }
508- }
506+ "column_mapping": {
507+ "response": "${data.response}",
508+ "context": "${data.context}",
509+ "query": "${data.query}",
510+ },
511+ },
512+ },
509513 )
510514
511515 """
@@ -544,13 +548,13 @@ def evaluate(
544548 raise e
545549
546550
547- def _evaluate ( # pylint: disable=too-many-locals
551+ def _evaluate ( # pylint: disable=too-many-locals,too-many-statements
548552 * ,
549553 evaluation_name : Optional [str ] = None ,
550554 target : Optional [Callable ] = None ,
551555 data : Optional [str ] = None ,
552556 evaluators : Optional [Dict [str , Callable ]] = None ,
553- evaluator_config : Optional [Dict [str , Dict [ str , str ] ]] = None ,
557+ evaluator_config : Optional [Dict [str , EvaluatorConfig ]] = None ,
554558 azure_ai_project : Optional [AzureAIProject ] = None ,
555559 output_path : Optional [str ] = None ,
556560 ** kwargs ,
@@ -560,8 +564,13 @@ def _evaluate( # pylint: disable=too-many-locals
560564 # Process evaluator config to replace ${target.} with ${data.}
561565 if evaluator_config is None :
562566 evaluator_config = {}
563- evaluator_config = _process_evaluator_config (evaluator_config )
564- _validate_columns (input_data_df , evaluators , target , evaluator_config )
567+ # extract column mapping dicts into dictionary mapping evaluator name to column mapping
568+ column_mapping = {
569+ evaluator_name : evaluator_configuration .get ("column_mapping" , None )
570+ for evaluator_name , evaluator_configuration in evaluator_config .items ()
571+ }
572+ column_mapping = _process_column_mappings (column_mapping )
573+ _validate_columns (input_data_df , evaluators , target , column_mapping )
565574
566575 # Target Run
567576 pf_client = PFClient (
@@ -577,30 +586,30 @@ def _evaluate( # pylint: disable=too-many-locals
577586
578587 # Create default configuration for evaluators that directly maps
579588 # input data names to keyword inputs of the same name in the evaluators.
580- evaluator_config = evaluator_config or {}
581- evaluator_config .setdefault ("default" , {})
589+ column_mapping = column_mapping or {}
590+ column_mapping .setdefault ("default" , {})
582591
583592 # If target is set, apply 1-1 column mapping from target outputs to evaluator inputs
584593 if data is not None and target is not None :
585594 input_data_df , target_generated_columns , target_run = _apply_target_to_data (
586595 target , data , pf_client , input_data_df , evaluation_name , _run_name = kwargs .get ("_run_name" )
587596 )
588597
589- for evaluator_name , mapping in evaluator_config .items ():
598+ for evaluator_name , mapping in column_mapping .items ():
590599 mapped_to_values = set (mapping .values ())
591600 for col in target_generated_columns :
592601 # If user defined mapping differently, do not change it.
593602 # If it was mapped to target, we have already changed it
594- # in _process_evaluator_config
603+ # in _process_column_mappings
595604 run_output = f"${{run.outputs.{ col } }}"
596605 # We will add our mapping only if
597606 # customer did not mapped target output.
598607 if col not in mapping and run_output not in mapped_to_values :
599- evaluator_config [evaluator_name ][col ] = run_output # pylint: disable=unnecessary-dict-index-lookup
608+ column_mapping [evaluator_name ][col ] = run_output # pylint: disable=unnecessary-dict-index-lookup
600609
601610 # After we have generated all columns we can check if we have
602611 # everything we need for evaluators.
603- _validate_columns (input_data_df , evaluators , target = None , evaluator_config = evaluator_config )
612+ _validate_columns (input_data_df , evaluators , target = None , column_mapping = column_mapping )
604613
605614 # Apply 1-1 mapping from input data to evaluator inputs, excluding values already assigned
606615 # via target mapping.
@@ -610,8 +619,8 @@ def _evaluate( # pylint: disable=too-many-locals
610619 for col in input_data_df .columns :
611620 # Ignore columns added by target mapping. These are formatted as "__outputs.<column_name>"
612621 # Also ignore columns that are already in config, since they've been covered by target mapping.
613- if not col .startswith (Prefixes .TSG_OUTPUTS ) and col not in evaluator_config ["default" ].keys ():
614- evaluator_config ["default" ][col ] = f"${{data.{ col } }}"
622+ if not col .startswith (Prefixes .TSG_OUTPUTS ) and col not in column_mapping ["default" ].keys ():
623+ column_mapping ["default" ][col ] = f"${{data.{ col } }}"
615624 # Batch Run
616625 evaluators_info = {}
617626 use_pf_client = kwargs .get ("_use_pf_client" , True )
@@ -632,7 +641,7 @@ def _evaluate( # pylint: disable=too-many-locals
632641 flow = evaluator ,
633642 run = target_run ,
634643 evaluator_name = evaluator_name ,
635- column_mapping = evaluator_config .get (evaluator_name , evaluator_config .get ("default" , None )),
644+ column_mapping = column_mapping .get (evaluator_name , column_mapping .get ("default" , None )),
636645 data = data ,
637646 stream = True ,
638647 name = kwargs .get ("_run_name" ),
0 commit comments