Skip to content

Commit b9824f7

Browse files
authored
[evaluation] Refactor exceptions to use custom exception class (#37436)
* Remove Optional type annotation from get() methods * Remove duplicate overloads * Include xpia in handled_metrics for evaluation aggregation * Rename class from PromptflowEvalsException to EvaluationException * Update exceptions to use custom exception class * Revert "Include xpia in handled_metrics for evaluation aggregation" This reverts commit 8629de8. * Revert irrelevant commits * Update output.py * Update tests * Fix typo in error message * Address bandit error
1 parent d5cd9d3 commit b9824f7

File tree

34 files changed

+614
-132
lines changed

34 files changed

+614
-132
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from azure.identity import DefaultAzureCredential
1616

1717
from azure.ai.evaluation._http_utils import get_async_http_client
18+
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
1819
from azure.ai.evaluation._model_configurations import AzureAIProject
1920

2021
from .constants import (
@@ -72,15 +73,25 @@ async def ensure_service_availability(rai_svc_url: str, token: str, capability:
7273
)
7374

7475
if response.status_code != 200:
75-
raise Exception( # pylint: disable=broad-exception-raised
76-
f"RAI service is not available in this region. Status Code: {response.status_code}"
76+
msg = f"RAI service is not available in this region. Status Code: {response.status_code}"
77+
raise EvaluationException(
78+
message=msg,
79+
internal_message=msg,
80+
target=ErrorTarget.UNKNOWN,
81+
category=ErrorCategory.SERVICE_UNAVAILABLE,
82+
blame=ErrorBlame.USER_ERROR,
7783
)
7884

7985
capabilities = response.json()
8086

8187
if capability and capability not in capabilities:
82-
raise Exception( # pylint: disable=broad-exception-raised
83-
f"Capability '{capability}' is not available in this region"
88+
msg = f"Capability '{capability}' is not available in this region"
89+
raise EvaluationException(
90+
message=msg,
91+
internal_message=msg,
92+
target=ErrorTarget.RAI_CLIENT,
93+
category=ErrorCategory.SERVICE_UNAVAILABLE,
94+
blame=ErrorBlame.USER_ERROR,
8495
)
8596

8697

@@ -337,7 +348,15 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st
337348
)
338349

339350
if response.status_code != 200:
340-
raise Exception("Failed to retrieve the discovery service URL") # pylint: disable=broad-exception-raised
351+
msg = f"Failed to retrieve the discovery service URL."
352+
raise EvaluationException(
353+
message=msg,
354+
internal_message=msg,
355+
target=ErrorTarget.RAI_CLIENT,
356+
category=ErrorCategory.SERVICE_UNAVAILABLE,
357+
blame=ErrorBlame.UNKNOWN,
358+
)
359+
341360
base_url = urlparse(response.json()["properties"]["discoveryUrl"])
342361
return f"{base_url.scheme}://{base_url.netloc}"
343362

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from promptflow.contracts.types import AttrDict
1111
from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
1212
from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
13+
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
1314

1415
from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
1516

@@ -119,8 +120,12 @@ def run(self, flow, data, evaluator_name=None, column_mapping=None, **kwargs):
119120
try:
120121
json_data = load_jsonl(data)
121122
except json.JSONDecodeError as exc:
122-
raise ValueError(
123-
f"Failed to parse data as JSON: {data}. Please provide a valid json lines data."
123+
raise EvaluationException(
124+
message = f"Failed to parse data as JSON: {data}. Provide valid json lines data.",
125+
internal_message="Failed to parse data as JSON",
126+
target=ErrorTarget.CODE_CLIENT,
127+
category=ErrorCategory.INVALID_VALUE,
128+
blame=ErrorBlame.USER_ERROR,
124129
) from exc
125130

126131
input_df = pd.DataFrame(json_data)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_eval_run.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from promptflow._sdk.entities import Run
1919
from azure.ai.evaluation._http_utils import get_http_client
2020
from azure.ai.evaluation._version import VERSION
21+
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
2122

2223
LOGGER = logging.getLogger(__name__)
2324

@@ -199,7 +200,7 @@ def _end_run(self, reason: str) -> None:
199200
200201
:param reason: Reason for run termination. Possible values are "FINISHED" "FAILED", and "KILLED"
201202
:type reason: str
202-
:raises ValueError: Raised if the run is not in ("FINISHED", "FAILED", "KILLED")
203+
:raises EvaluationException: Raised if the run is not in ("FINISHED", "FAILED", "KILLED")
203204
"""
204205
if not self._check_state_and_log(
205206
"stop run", {RunStatus.BROKEN, RunStatus.NOT_STARTED, RunStatus.TERMINATED}, False
@@ -210,8 +211,12 @@ def _end_run(self, reason: str) -> None:
210211
self._status = RunStatus.TERMINATED
211212
return
212213
if reason not in ("FINISHED", "FAILED", "KILLED"):
213-
raise ValueError(
214-
f"Incorrect terminal status {reason}. " 'Valid statuses are "FINISHED", "FAILED" and "KILLED".'
214+
raise EvaluationException(
215+
message=f"Incorrect terminal status {reason}. Valid statuses are 'FINISHED', 'FAILED' and 'KILLED'.",
216+
internal_message="Incorrect terminal status. Valid statuses are 'FINISHED', 'FAILED' and 'KILLED'",
217+
target=ErrorTarget.EVAL_RUN,
218+
category=ErrorCategory.FAILED_EXECUTION,
219+
blame=ErrorBlame.UNKNOWN
215220
)
216221
url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/update"
217222
body = {
@@ -337,14 +342,20 @@ def _check_state_and_log(self, action: str, bad_states: Set[RunStatus], should_r
337342
:type bad_states: Set[RunStatus]
338343
:param should_raise: Should we raise an error if the bad state has been encountered
339344
:type should_raise: bool
340-
:raises: RuntimeError if should_raise is True and invalid state was encountered.
345+
:raises: ~azure.ai.evaluations._exceptions.EvaluationException if should_raise is True and invalid state was encountered.
341346
:return: Whether or not run is in the correct state.
342347
:rtype: bool
343348
"""
344349
if self._status in bad_states:
345350
msg = f"Unable to {action} due to Run status={self._status}."
346351
if should_raise:
347-
raise RuntimeError(msg)
352+
raise EvaluationException(
353+
message=msg,
354+
internal_message=msg,
355+
target=ErrorTarget.EVAL_RUN,
356+
category=ErrorCategory.FAILED_EXECUTION,
357+
blame=ErrorBlame.UNKNOWN
358+
)
348359
LOGGER.warning(msg)
349360
return False
350361
return True

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 103 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
_trace_destination_from_project_scope,
2929
_write_output,
3030
)
31+
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
3132

3233

3334
# pylint: disable=line-too-long
@@ -160,44 +161,111 @@ def _validate_input_data_for_evaluator(evaluator, evaluator_name, df_data, is_ta
160161
missing_inputs = [col for col in required_inputs if col not in df_data.columns]
161162
if missing_inputs:
162163
if not is_target_fn:
163-
raise ValueError(f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}.")
164-
raise ValueError(f"Missing required inputs for target : {missing_inputs}.")
164+
msg = f"Missing required inputs for evaluator {evaluator_name} : {missing_inputs}."
165+
raise EvaluationException(
166+
message=msg,
167+
internal_message=msg,
168+
target=ErrorTarget.EVALUATE,
169+
category=ErrorCategory.MISSING_FIELD,
170+
blame=ErrorBlame.USER_ERROR,
171+
)
172+
msg = f"Missing required inputs for target : {missing_inputs}."
173+
raise EvaluationException(
174+
message=msg,
175+
internal_message=msg,
176+
target=ErrorTarget.EVALUATE,
177+
category=ErrorCategory.MISSING_FIELD,
178+
blame=ErrorBlame.USER_ERROR,
179+
)
165180

166181

167182
def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name):
168183
if data is None:
169-
raise ValueError("data must be provided for evaluation.")
184+
msg = "data parameter must be provided for evaluation."
185+
raise EvaluationException(
186+
message=msg,
187+
internal_message=msg,
188+
target=ErrorTarget.EVALUATE,
189+
category=ErrorCategory.MISSING_FIELD,
190+
blame=ErrorBlame.USER_ERROR,
191+
)
170192

171193
if target is not None:
172194
if not callable(target):
173-
raise ValueError("target must be a callable function.")
195+
msg = "target parameter must be a callable function."
196+
raise EvaluationException(
197+
message=msg,
198+
internal_message=msg,
199+
target=ErrorTarget.EVALUATE,
200+
category=ErrorCategory.INVALID_VALUE,
201+
blame=ErrorBlame.USER_ERROR,
202+
)
174203

175204
if data is not None:
176205
if not isinstance(data, str):
177-
raise ValueError("data must be a string.")
206+
msg = "data parameter must be a string."
207+
raise EvaluationException(
208+
message=msg,
209+
internal_message=msg,
210+
target=ErrorTarget.EVALUATE,
211+
category=ErrorCategory.INVALID_VALUE,
212+
blame=ErrorBlame.USER_ERROR,
213+
)
178214

179215
if evaluators is not None:
180216
if not isinstance(evaluators, dict):
181-
raise ValueError("evaluators must be a dictionary.")
217+
msg = "evaluators parameter must be a dictionary."
218+
raise EvaluationException(
219+
message=msg,
220+
internal_message=msg,
221+
target=ErrorTarget.EVALUATE,
222+
category=ErrorCategory.INVALID_VALUE,
223+
blame=ErrorBlame.USER_ERROR,
224+
)
182225

183226
if output_path is not None:
184227
if not isinstance(output_path, str):
185-
raise ValueError("output_path must be a string.")
228+
msg = "output_path parameter must be a string."
229+
raise EvaluationException(
230+
message=msg,
231+
internal_message=msg,
232+
target=ErrorTarget.EVALUATE,
233+
category=ErrorCategory.INVALID_VALUE,
234+
blame=ErrorBlame.USER_ERROR,
235+
)
186236

187237
if azure_ai_project is not None:
188238
if not isinstance(azure_ai_project, Dict):
189-
raise ValueError("azure_ai_project must be a Dict.")
239+
msg = "azure_ai_project parameter must be a dictionary."
240+
raise EvaluationException(
241+
message=msg,
242+
internal_message=msg,
243+
target=ErrorTarget.EVALUATE,
244+
category=ErrorCategory.INVALID_VALUE,
245+
blame=ErrorBlame.USER_ERROR,
246+
)
190247

191248
if evaluation_name is not None:
192249
if not isinstance(evaluation_name, str):
193-
raise ValueError("evaluation_name must be a string.")
250+
msg = "evaluation_name parameter must be a string."
251+
raise EvaluationException(
252+
message=msg,
253+
internal_message=msg,
254+
target=ErrorTarget.EVALUATE,
255+
category=ErrorCategory.INVALID_VALUE,
256+
blame=ErrorBlame.USER_ERROR,
257+
)
194258

195259
try:
196260
initial_data_df = pd.read_json(data, lines=True)
197261
except Exception as e:
198-
raise ValueError(
199-
f"Failed to load data from {data}. Please validate it is a valid jsonl data. Error: {str(e)}."
200-
) from e
262+
raise EvaluationException(
263+
message=f"Failed to load data from {data}. Confirm that it is valid jsonl data. Error: {str(e)}.",
264+
internal_message="Failed to load data. Confirm that it is valid jsonl data.",
265+
target=ErrorTarget.EVALUATE,
266+
category=ErrorCategory.INVALID_VALUE,
267+
blame=ErrorBlame.USER_ERROR,
268+
) from e
201269

202270
return initial_data_df
203271

@@ -219,11 +287,18 @@ def _validate_columns(
219287
:type target: Optional[Callable]
220288
:param evaluator_config: The configuration for evaluators.
221289
:type evaluator_config: Dict[str, Dict[str, str]]
222-
:raises ValueError: If column starts from "__outputs." while target is defined.
290+
:raises EvaluationException: If column starts from "__outputs." while target is defined.
223291
"""
224292
if target:
225293
if any(c.startswith(Prefixes.TSG_OUTPUTS) for c in df.columns):
226-
raise ValueError("The column cannot start from " f'"{Prefixes.TSG_OUTPUTS}" if target was defined.')
294+
msg = "The column cannot start from " f'"{Prefixes.TSG_OUTPUTS}" if target was defined.'
295+
raise EvaluationException(
296+
message=msg,
297+
internal_message=msg,
298+
target=ErrorTarget.EVALUATE,
299+
category=ErrorCategory.INVALID_VALUE,
300+
blame=ErrorBlame.USER_ERROR,
301+
)
227302
# If the target function is given, it may return
228303
# several columns and hence we cannot check the availability of columns
229304
# without knowing target function semantics.
@@ -319,9 +394,13 @@ def _process_evaluator_config(evaluator_config: Dict[str, Dict[str, str]]) -> Di
319394
for map_to_key, map_value in mapping_config.items():
320395
# Check if there's any unexpected reference other than ${target.} or ${data.}
321396
if unexpected_references.search(map_value):
322-
raise ValueError(
323-
"Unexpected references detected in 'evaluator_config'. "
324-
"Ensure only ${target.} and ${data.} are used."
397+
msg = "Unexpected references detected in 'evaluator_config'. Ensure only ${target.} and ${data.} are used."
398+
raise EvaluationException(
399+
message=msg,
400+
internal_message=msg,
401+
target=ErrorTarget.EVALUATE,
402+
category=ErrorCategory.INVALID_VALUE,
403+
blame=ErrorBlame.USER_ERROR,
325404
)
326405

327406
# Replace ${target.} with ${run.outputs.}
@@ -455,7 +534,13 @@ def evaluate(
455534
" if __name__ == '__main__':\n"
456535
" evaluate(...)"
457536
)
458-
raise RuntimeError(error_message) from e
537+
raise EvaluationException(
538+
message=error_message,
539+
internal_message=error_message,
540+
target=ErrorTarget.EVALUATE,
541+
category=ErrorCategory.FAILED_EXECUTION,
542+
blame=ErrorBlame.UNKNOWN,
543+
) from e
459544

460545
raise e
461546

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import pandas as pd
1313

1414
from azure.ai.evaluation._constants import DEFAULT_EVALUATION_RESULTS_FILE_NAME, Prefixes
15+
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
1516
from azure.ai.evaluation._evaluate._eval_run import EvalRun
1617

1718
LOGGER = logging.getLogger(__name__)
@@ -31,11 +32,17 @@ def is_none(value):
3132
def extract_workspace_triad_from_trace_provider(trace_provider: str): # pylint: disable=name-too-long
3233
match = re.match(AZURE_WORKSPACE_REGEX_FORMAT, trace_provider)
3334
if not match or len(match.groups()) != 5:
34-
raise ValueError(
35-
"Malformed trace provider string, expected azureml://subscriptions/<subscription_id>/"
36-
"resourceGroups/<resource_group>/providers/Microsoft.MachineLearningServices/"
37-
f"workspaces/<workspace_name>, got {trace_provider}"
38-
)
35+
raise EvaluationException(
36+
message="Malformed trace provider string, expected azureml://subscriptions/<subscription_id>/"
37+
"resourceGroups/<resource_group>/providers/Microsoft.MachineLearningServices/"
38+
f"workspaces/<workspace_name>, got {trace_provider}",
39+
internal_message="Malformed trace provider string, expected azureml://subscriptions/<subscription_id>/"
40+
"resourceGroups/<resource_group>/providers/Microsoft.MachineLearningServices/"
41+
"workspaces/<workspace_name>,",
42+
target=ErrorTarget.UNKNOWN,
43+
category=ErrorCategory.INVALID_VALUE,
44+
blame=ErrorBlame.UNKNOWN,
45+
)
3946
subscription_id = match.group(1)
4047
resource_group_name = match.group(3)
4148
workspace_name = match.group(5)

0 commit comments

Comments
 (0)