Skip to content

Commit fa38e21

Browse files
authored
Evaluation: Disable local and remote tracing (Azure#38372)
* Disable tracing for target run * fix black issue * fix linting issue * update tests * disable local tracing * update changelog * fix black issue
1 parent bd4383f commit fa38e21

File tree

8 files changed

+29
-50
lines changed

8 files changed

+29
-50
lines changed

sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
### Other Changes
1717
- Refined error messages for serviced-based evaluators and simulators.
18+
- Tracing has been disabled due to Cosmos DB initialization issue.
1819
- Introduced environment variable `AI_EVALS_DISABLE_EXPERIMENTAL_WARNING` to disable the warning message for experimental features.
1920
- Changed the randomization pattern for `AdversarialSimulator` such that there is an almost equal number of Adversarial harm categories (e.g. Hate + Unfairness, Self-Harm, Violence, Sex) represented in the `AdversarialSimulator` outputs. Previously, for 200 `max_simulation_results` a user might see 140 results belonging to the 'Hate + Unfairness' category and 40 results belonging to the 'Self-Harm' category. Now, user will see 50 results for each of Hate + Unfairness, Self-Harm, Violence, and Sex.
2021
- For the `DirectAttackSimulator`, the prompt templates used to generate simulated outputs for each Adversarial harm category will no longer be in a randomized order by default. To override this behavior, pass `randomize_order=True` when you call the `DirectAttackSimulator`, for example:

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class EvaluationRunProperties:
6262

6363
PF_BATCH_TIMEOUT_SEC_DEFAULT = 3600
6464
PF_BATCH_TIMEOUT_SEC = "PF_BATCH_TIMEOUT_SEC"
65+
PF_DISABLE_TRACING = "PF_DISABLE_TRACING"
6566

6667
OTEL_EXPORTER_OTLP_TRACES_TIMEOUT = "OTEL_EXPORTER_OTLP_TRACES_TIMEOUT"
6768
OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT = 60

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT,
1515
PF_BATCH_TIMEOUT_SEC,
1616
PF_BATCH_TIMEOUT_SEC_DEFAULT,
17+
PF_DISABLE_TRACING,
1718
)
1819

1920
from ..._user_agent import USER_AGENT
@@ -49,6 +50,7 @@ def __enter__(self) -> None:
4950
if isinstance(self.client, ProxyClient):
5051
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
5152
os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false"
53+
os.environ[PF_DISABLE_TRACING] = "true"
5254

5355
if os.environ.get(PF_BATCH_TIMEOUT_SEC) is None:
5456
os.environ[PF_BATCH_TIMEOUT_SEC] = str(PF_BATCH_TIMEOUT_SEC_DEFAULT)
@@ -76,6 +78,7 @@ def __exit__(
7678
if isinstance(self.client, ProxyClient):
7779
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
7880
os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None)
81+
os.environ.pop(PF_DISABLE_TRACING, None)
7982

8083
if self._is_batch_timeout_set_by_system:
8184
os.environ.pop(PF_BATCH_TIMEOUT_SEC, None)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import Optional, Type
77

88
from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
9+
from azure.ai.evaluation._constants import PF_DISABLE_TRACING
910

1011

1112
class TargetRunContext:
@@ -29,6 +30,8 @@ def __enter__(self) -> None:
2930
if not self._upload_snapshot:
3031
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
3132

33+
os.environ[PF_DISABLE_TRACING] = "true"
34+
3235
def __exit__(
3336
self,
3437
exc_type: Optional[Type[BaseException]],
@@ -39,3 +42,5 @@ def __exit__(
3942

4043
if not self._upload_snapshot:
4144
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
45+
46+
os.environ.pop(PF_DISABLE_TRACING, None)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_eval_run.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,15 @@
3434
from azure.ai.ml.entities._datastore.datastore import Datastore
3535
from azure.storage.blob import BlobServiceClient
3636
except (ModuleNotFoundError, ImportError):
37-
# If the above mentioned modules cannot be imported, we are running
38-
# in local mode and MLClient in the constructor will be None, so
39-
# we will not arrive to Azure-dependent code.
40-
41-
# We are logging the import failure only if debug logging level is set because:
42-
# - If the project configuration was not provided this import is not needed.
43-
# - If the project configuration was provided, the error will be raised by PFClient.
44-
LOGGER.debug("promptflow.azure is not installed.")
37+
raise EvaluationException( # pylint: disable=raise-missing-from
38+
message=(
39+
"The required packages for remote tracking are missing.\n"
40+
'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
41+
),
42+
target=ErrorTarget.EVALUATE,
43+
category=ErrorCategory.MISSING_PACKAGE,
44+
blame=ErrorBlame.USER_ERROR,
45+
)
4546

4647

4748
@dataclasses.dataclass
@@ -103,7 +104,6 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
103104
_SCOPE = "https://management.azure.com/.default"
104105

105106
EVALUATION_ARTIFACT = "instance_results.jsonl"
106-
EVALUATION_ARTIFACT_DUMMY_RUN = "eval_results.jsonl"
107107

108108
def __init__(
109109
self,

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 7 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import pandas as pd
1111
from promptflow._sdk._constants import LINE_NUMBER
12-
from promptflow._sdk._errors import MissingAzurePackage, UserAuthenticationError, UploadInternalError
12+
from promptflow._sdk._errors import UserAuthenticationError, UploadInternalError
1313
from promptflow.client import PFClient
1414
from promptflow.entities import Run
1515

@@ -700,36 +700,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
700700
if target is not None:
701701
_validate_columns_for_target(input_data_df, target)
702702

703-
# Target Run
704-
try:
705-
pf_client = PFClient(
706-
config=(
707-
{"trace.destination": _trace_destination_from_project_scope(azure_ai_project)}
708-
if azure_ai_project
709-
else None
710-
),
711-
user_agent=USER_AGENT,
712-
)
713-
# pylint: disable=raise-missing-from
714-
except MissingAzurePackage:
715-
msg = (
716-
"The required packages for remote tracking are missing.\n"
717-
'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
718-
)
719-
720-
raise EvaluationException( # pylint: disable=raise-missing-from
721-
message=msg,
722-
target=ErrorTarget.EVALUATE,
723-
category=ErrorCategory.MISSING_PACKAGE,
724-
blame=ErrorBlame.USER_ERROR,
725-
)
726-
727-
trace_destination: Optional[str] = pf_client._config.get_trace_destination() # pylint: disable=protected-access
728-
729-
# Handle the case where the customer manually run "pf config set trace.destination=none"
730-
if trace_destination and trace_destination.lower() == "none":
731-
trace_destination = None
732-
703+
pf_client = PFClient(user_agent=USER_AGENT)
733704
target_run: Optional[Run] = None
734705

735706
# Create default configuration for evaluators that directly maps
@@ -803,11 +774,7 @@ def eval_batch_run(
803774
# Ensure the absolute path is passed to pf.run, as relative path doesn't work with
804775
# multiple evaluators. If the path is already absolute, abspath will return the original path.
805776
data = os.path.abspath(data)
806-
807-
# A user reported intermittent errors when PFClient uploads evaluation runs to the cloud.
808-
# The root cause is still unclear, but it seems related to a conflict between the async run uploader
809-
# and the async batch run. As a quick mitigation, use a PFClient without a trace destination for batch runs.
810-
per_evaluator_results = eval_batch_run(ProxyClient(PFClient(user_agent=USER_AGENT)), data=data)
777+
per_evaluator_results = eval_batch_run(ProxyClient(pf_client), data=data)
811778
else:
812779
data = input_data_df
813780
per_evaluator_results = eval_batch_run(CodeClient(), data=input_data_df)
@@ -849,6 +816,10 @@ def eval_batch_run(
849816
result_df = pd.concat([input_data_df, evaluators_result_df], axis=1, verify_integrity=True)
850817
metrics = _aggregate_metrics(evaluators_result_df, evaluators)
851818
metrics.update(evaluators_metric)
819+
820+
# Since tracing is disabled, pass None for target_run so a dummy evaluation run will be created each time.
821+
target_run = None
822+
trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None
852823
studio_url = _log_metrics_and_instance_results(
853824
metrics,
854825
result_df,

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def _log_metrics_and_instance_results(
137137
ml_client=azure_pf_client.ml_client,
138138
promptflow_run=run,
139139
) as ev_run:
140-
artifact_name = EvalRun.EVALUATION_ARTIFACT if run else EvalRun.EVALUATION_ARTIFACT_DUMMY_RUN
140+
artifact_name = EvalRun.EVALUATION_ARTIFACT
141141

142142
with tempfile.TemporaryDirectory() as tmpdir:
143143
# storing multi_modal images if exists
@@ -164,9 +164,8 @@ def _log_metrics_and_instance_results(
164164
ev_run.write_properties_to_run_history(
165165
properties={
166166
EvaluationRunProperties.RUN_TYPE: "eval_run",
167-
EvaluationRunProperties.EVALUATION_RUN: "azure-ai-generative-parent",
167+
EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun",
168168
"_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
169-
"isEvaluatorRun": "true",
170169
}
171170
)
172171

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,6 @@ def test_evaluate_track_in_cloud(
631631
remote_run = _get_run_from_run_history(run_id, azure_ml_client, project_scope)
632632

633633
assert remote_run is not None
634-
assert remote_run["runMetadata"]["properties"]["azureml.promptflow.local_to_cloud"] == "true"
635634
assert remote_run["runMetadata"]["properties"]["runType"] == "eval_run"
636635
assert remote_run["runMetadata"]["properties"]["_azureml.evaluation_run"] == "promptflow.BatchRun"
637636
assert remote_run["runMetadata"]["displayName"] == evaluation_name
@@ -678,7 +677,7 @@ def test_evaluate_track_in_cloud_no_target(
678677

679678
assert remote_run is not None
680679
assert remote_run["runMetadata"]["properties"]["runType"] == "eval_run"
681-
assert remote_run["runMetadata"]["properties"]["_azureml.evaluation_run"] == "azure-ai-generative-parent"
680+
assert remote_run["runMetadata"]["properties"]["_azureml.evaluation_run"] == "promptflow.BatchRun"
682681
assert remote_run["runMetadata"]["displayName"] == evaluation_name
683682

684683
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)