Skip to content

Commit 886139f

Browse files
authored
Adding properties to evaluation for UI rendering (Azure#37718)
* Adding properties needed by UI * Updating tests * Fixing linting issues * Fixing formatting issues
1 parent 94396ed commit 886139f

File tree

4 files changed

+21
-4
lines changed

4 files changed

+21
-4
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ class DefaultOpenEncoding:
4848
"""SDK Default Encoding when writing a file"""
4949

5050

51+
class EvaluationRunProperties:
52+
"""Defines properties used to identify an evaluation run by UI"""
53+
54+
RUN_TYPE = "runType"
55+
EVALUATION_RUN = "_azureml.evaluation_run"
56+
57+
5158
DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
5259

5360
CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from .._constants import (
1717
CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT,
1818
EvaluationMetrics,
19+
EvaluationRunProperties,
1920
Prefixes,
2021
_InternalEvaluationMetrics,
2122
)
@@ -352,7 +353,7 @@ def _apply_target_to_data(
352353
flow=target,
353354
display_name=evaluation_name,
354355
data=data,
355-
properties={"runType": "eval_run", "isEvaluatorRun": "true"},
356+
properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"},
356357
stream=True,
357358
name=_run_name,
358359
)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212

1313
import pandas as pd
1414

15-
from azure.ai.evaluation._constants import DEFAULT_EVALUATION_RESULTS_FILE_NAME, DefaultOpenEncoding, Prefixes
15+
from azure.ai.evaluation._constants import (
16+
DEFAULT_EVALUATION_RESULTS_FILE_NAME,
17+
DefaultOpenEncoding,
18+
Prefixes,
19+
EvaluationRunProperties,
20+
)
1621
from azure.ai.evaluation._evaluate._eval_run import EvalRun
1722
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
1823

@@ -112,7 +117,8 @@ def _log_metrics_and_instance_results(
112117
if run is None:
113118
ev_run.write_properties_to_run_history(
114119
properties={
115-
"_azureml.evaluation_run": "azure-ai-generative-parent",
120+
EvaluationRunProperties.RUN_TYPE: "eval_run",
121+
EvaluationRunProperties.EVALUATION_RUN: "azure-ai-generative-parent",
116122
"_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
117123
"isEvaluatorRun": "true",
118124
}

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
F1ScoreEvaluator,
1616
FluencyEvaluator,
1717
GroundednessEvaluator,
18+
evaluate,
1819
)
1920
from azure.ai.evaluation._common.math import list_mean_nan_safe
2021

@@ -401,6 +402,7 @@ def test_evaluate_track_in_cloud(
401402
# module named test_evaluate and it will be a different module in unit test
402403
# folder. By keeping function in separate file we guarantee, it will be loaded
403404
# from there.
405+
# os.environ["AZURE_TEST_RUN_LIVE"] = "True"
404406
from .target_fn import target_fn
405407

406408
f1_score_eval = F1ScoreEvaluator()
@@ -415,7 +417,6 @@ def test_evaluate_track_in_cloud(
415417
)
416418
row_result_df = pd.DataFrame(result["rows"])
417419

418-
assert "outputs.answer" in row_result_df.columns
419420
assert "outputs.answer.length" in row_result_df.columns
420421
assert list(row_result_df["outputs.answer.length"]) == [28, 76, 22]
421422
assert "outputs.f1.f1_score" in row_result_df.columns
@@ -429,6 +430,7 @@ def test_evaluate_track_in_cloud(
429430
assert remote_run is not None
430431
assert remote_run["runMetadata"]["properties"]["azureml.promptflow.local_to_cloud"] == "true"
431432
assert remote_run["runMetadata"]["properties"]["runType"] == "eval_run"
433+
assert remote_run["runMetadata"]["properties"]["_azureml.evaluation_run"] == "promptflow.BatchRun"
432434
assert remote_run["runMetadata"]["displayName"] == evaluation_name
433435

434436
@pytest.mark.skipif(in_ci(), reason="This test fails in CI and needs to be investigate. Bug: 3458432")
@@ -472,6 +474,7 @@ def test_evaluate_track_in_cloud_no_target(
472474
remote_run = _get_run_from_run_history(run_id, azure_ml_client, project_scope)
473475

474476
assert remote_run is not None
477+
assert remote_run["runMetadata"]["properties"]["runType"] == "eval_run"
475478
assert remote_run["runMetadata"]["properties"]["_azureml.evaluation_run"] == "azure-ai-generative-parent"
476479
assert remote_run["runMetadata"]["displayName"] == evaluation_name
477480

0 commit comments

Comments
 (0)