Skip to content

Commit 7ff7224

Browse files
w-javedCopilot
andauthored
ScoreModel Grader - Audio support (Azure#43605)
* linkfix * initial commit * fix * added new sample * fix * adding image sample * remove audio files * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> * fix * fix black --------- Co-authored-by: Copilot <[email protected]>
1 parent 584e975 commit 7ff7224

File tree

9 files changed

+890
-10
lines changed

9 files changed

+890
-10
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
963963
if need_oai_run:
964964
try:
965965
aoi_name = evaluation_name if evaluation_name else DEFAULT_OAI_EVAL_RUN_NAME
966-
eval_run_info_list = _begin_aoai_evaluation(graders, column_mapping, input_data_df, aoi_name)
966+
eval_run_info_list = _begin_aoai_evaluation(graders, column_mapping, input_data_df, aoi_name, **kwargs)
967967
need_get_oai_results = len(eval_run_info_list) > 0
968968
except EvaluationException as e:
969969
if need_local_run:

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate_aoai.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def _begin_aoai_evaluation(
7979
column_mappings: Optional[Dict[str, Dict[str, str]]],
8080
data: pd.DataFrame,
8181
run_name: str,
82+
**kwargs: Any,
8283
) -> List[OAIEvalRunCreationInfo]:
8384
"""
8485
Use the AOAI SDK to start an evaluation of the inputted dataset against the supplied graders.
@@ -114,15 +115,19 @@ def _begin_aoai_evaluation(
114115
f"AOAI: Starting evaluation run {idx + 1}/{len(grader_mapping_list)} with {len(selected_graders)} grader(s)..."
115116
)
116117
all_eval_run_info.append(
117-
_begin_single_aoai_evaluation(selected_graders, data, selected_column_mapping, run_name)
118+
_begin_single_aoai_evaluation(selected_graders, data, selected_column_mapping, run_name, **kwargs)
118119
)
119120

120121
LOGGER.info(f"AOAI: Successfully created {len(all_eval_run_info)} evaluation run(s).")
121122
return all_eval_run_info
122123

123124

124125
def _begin_single_aoai_evaluation(
125-
graders: Dict[str, AzureOpenAIGrader], data: pd.DataFrame, column_mapping: Optional[Dict[str, str]], run_name: str
126+
graders: Dict[str, AzureOpenAIGrader],
127+
data: pd.DataFrame,
128+
column_mapping: Optional[Dict[str, str]],
129+
run_name: str,
130+
**kwargs: Any,
126131
) -> OAIEvalRunCreationInfo:
127132
"""
128133
Use the AOAI SDK to start an evaluation of the inputted dataset against the supplied graders.
@@ -146,6 +151,16 @@ def _begin_single_aoai_evaluation(
146151
LOGGER.info(f"AOAI: Preparing evaluation for {len(graders)} grader(s): {list(graders.keys())}")
147152
grader_name_list = []
148153
grader_list = []
154+
155+
data_source = {}
156+
data_source_config = {}
157+
158+
if kwargs.get("data_source_config") is not None:
159+
data_source_config = kwargs.get("data_source_config")
160+
161+
if kwargs.get("data_source") is not None:
162+
data_source = kwargs.get("data_source")
163+
149164
# It's expected that all graders supplied for a single eval run use the same credentials
150165
# so grab a client from the first grader.
151166
client = list(graders.values())[0].get_client()
@@ -155,7 +170,8 @@ def _begin_single_aoai_evaluation(
155170
grader_list.append(grader._grader_config)
156171
effective_column_mapping: Dict[str, str] = column_mapping or {}
157172
LOGGER.info(f"AOAI: Generating data source config with {len(effective_column_mapping)} column mapping(s)...")
158-
data_source_config = _generate_data_source_config(data, effective_column_mapping)
173+
if data_source_config == {}:
174+
data_source_config = _generate_data_source_config(data, effective_column_mapping)
159175
LOGGER.info(f"AOAI: Data source config generated with schema type: {data_source_config.get('type')}")
160176

161177
# Create eval group
@@ -181,7 +197,7 @@ def _begin_single_aoai_evaluation(
181197

182198
# Create eval run
183199
LOGGER.info(f"AOAI: Creating eval run '{run_name}' with {len(data)} data rows...")
184-
eval_run_id = _begin_eval_run(client, eval_group_info.id, run_name, data, effective_column_mapping)
200+
eval_run_id = _begin_eval_run(client, eval_group_info.id, run_name, data, effective_column_mapping, data_source)
185201
LOGGER.info(
186202
f"AOAI: Eval run created with id {eval_run_id}."
187203
+ " Results will be retrieved after normal evaluation is complete..."
@@ -654,7 +670,10 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
654670
props = data_source_config["item_schema"]["properties"]
655671
req = data_source_config["item_schema"]["required"]
656672
for key in column_mapping.keys():
657-
props[key] = {"type": "string"}
673+
if key in input_data_df and len(input_data_df[key]) > 0 and isinstance(input_data_df[key].iloc[0], list):
674+
props[key] = {"type": "array"}
675+
else:
676+
props[key] = {"type": "string"}
658677
req.append(key)
659678
LOGGER.info(f"AOAI: Flat schema generated with {len(props)} properties: {list(props.keys())}")
660679
return data_source_config
@@ -821,8 +840,11 @@ def _convert_value_to_string(val: Any) -> str:
821840
# Safely fetch value
822841
val = row.get(df_col, None)
823842

824-
# Convert value to string to match schema's "type": "string" leaves.
825-
str_val = _convert_value_to_string(val)
843+
if isinstance(val, list):
844+
str_val = val
845+
else:
846+
# Convert value to string to match schema's "type": "string" leaves.
847+
str_val = _convert_value_to_string(val)
826848

827849
# Insert into nested dict
828850
cursor = item_root
@@ -842,7 +864,10 @@ def _convert_value_to_string(val: Any) -> str:
842864
for col_name in input_data_df.columns:
843865
if col_name not in processed_cols:
844866
val = row.get(col_name, None)
845-
str_val = _convert_value_to_string(val)
867+
if isinstance(val, list):
868+
str_val = val
869+
else:
870+
str_val = _convert_value_to_string(val)
846871
item_root[col_name] = str_val
847872

848873
content.append({WRAPPER_KEY: item_root})
@@ -863,6 +888,7 @@ def _begin_eval_run(
863888
run_name: str,
864889
input_data_df: pd.DataFrame,
865890
column_mapping: Dict[str, str],
891+
data_source_params: Optional[Dict[str, Any]] = None,
866892
) -> str:
867893
"""
868894
Given an eval group id and a dataset file path, use the AOAI API to
@@ -884,6 +910,10 @@ def _begin_eval_run(
884910

885911
LOGGER.info(f"AOAI: Creating eval run '{run_name}' for eval group {eval_group_id}...")
886912
data_source = _get_data_source(input_data_df, column_mapping)
913+
914+
if data_source_params is not None:
915+
data_source.update(data_source_params)
916+
887917
eval_run = client.evals.runs.create(
888918
eval_id=eval_group_id,
889919
data_source=cast(Any, data_source), # Cast for type checker: dynamic schema dict accepted by SDK at runtime

sdk/evaluation/azure-ai-evaluation/samples/aoai_score_model_grader_sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def demonstrate_score_model_grader():
193193
# 1. Configure Azure OpenAI model using environment variables
194194
model_config = AzureOpenAIModelConfiguration(
195195
azure_endpoint=os.environ.get("endpoint"),
196-
api_key=os.environ.get("key"),
196+
api_key=os.environ.get("api_key"),
197197
azure_deployment=os.environ.get("deployment_name"),
198198
api_version="2024-12-01-preview",
199199
)

0 commit comments

Comments
 (0)