Evaluation: Fix file being used by another process error during target function execution (Azure#38021)

ninghu · web-flow · commit 3a437194556b · 2024-10-22T14:01:18.000-07:00
* Evaluation: Disable target snapshot upload by default

* update changlog
diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -10,6 +10,7 @@
 
 ### Bugs Fixed
 - Non adversarial simulator works with `gpt-4o` models using the `json_schema` response format
+- Fixed an issue where the `evaluate` API would fail with "[WinError 32] The process cannot access the file because it is being used by another process" when venv folder and target function file are in the same directory.
 - Fix evaluate API failure when `trace.destination` is set to `none`
 
 ### Other Changes
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/__init__.py
@@ -1,8 +1,9 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from .batch_run_context import BatchRunContext
+from .eval_run_context import EvalRunContext
 from .code_client import CodeClient
 from .proxy_client import ProxyClient
+from .target_run_context import TargetRunContext
 
-__all__ = ["CodeClient", "ProxyClient", "BatchRunContext"]
+__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"]
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/code_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/code_client.py
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py
@@ -22,13 +22,13 @@
 from .proxy_client import ProxyClient
 
 
-class BatchRunContext:
-    """Context manager for batch run clients.
+class EvalRunContext:
+    """Context manager for eval batch run.
 
     :param client: The client to run in the context.
     :type client: Union[
-        ~azure.ai.evaluation._evaluate._batch_run_client.code_client.CodeClient,
-        ~azure.ai.evaluation._evaluate._batch_run_client.proxy_client.ProxyClient
+        ~azure.ai.evaluation._evaluate._batch_run.code_client.CodeClient,
+        ~azure.ai.evaluation._evaluate._batch_run.proxy_client.ProxyClient
     ]
     """
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py
@@ -0,0 +1,35 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import os
+import types
+from typing import Optional, Type
+
+from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
+
+
+class TargetRunContext:
+    """Context manager for target batch run.
+
+    :param upload_snapshot: Whether to upload target snapshot.
+    :type upload_snapshot: bool
+    """
+
+    def __init__(self, upload_snapshot: bool) -> None:
+        self._upload_snapshot = upload_snapshot
+
+    def __enter__(self) -> None:
+        # Address "[WinError 32] The process cannot access the file" error,
+        # caused by conflicts when the venv and target function are in the same directory.
+        # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
+        if not self._upload_snapshot:
+            os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
+
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        exc_tb: Optional[types.TracebackType],
+    ) -> None:
+        if not self._upload_snapshot:
+            os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -26,7 +26,7 @@
 )
 from .._model_configurations import AzureAIProject, EvaluationResult, EvaluatorConfig
 from .._user_agent import USER_AGENT
-from ._batch_run_client import BatchRunContext, CodeClient, ProxyClient
+from ._batch_run import EvalRunContext, CodeClient, ProxyClient, TargetRunContext
 from ._utils import (
     _apply_column_mapping,
     _log_metrics_and_instance_results,
@@ -395,7 +395,7 @@ def _apply_target_to_data(
     pf_client: PFClient,
     initial_data: pd.DataFrame,
     evaluation_name: Optional[str] = None,
-    _run_name: Optional[str] = None,
+    **kwargs,
 ) -> Tuple[pd.DataFrame, Set[str], Run]:
     """
     Apply the target function to the data set and return updated data and generated columns.
@@ -410,22 +410,22 @@ def _apply_target_to_data(
     :type initial_data: pd.DataFrame
     :param evaluation_name: The name of the evaluation.
     :type evaluation_name: Optional[str]
-    :param _run_name: The name of target run. Used for testing only.
-    :type _run_name: Optional[str]
     :return: The tuple, containing data frame and the list of added columns.
     :rtype: Tuple[pandas.DataFrame, List[str]]
     """
-    # We are manually creating the temporary directory for the flow
-    # because the way tempdir remove temporary directories will
-    # hang the debugger, because promptflow will keep flow directory.
-    run: Run = pf_client.run(
-        flow=target,
-        display_name=evaluation_name,
-        data=data,
-        properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"},
-        stream=True,
-        name=_run_name,
-    )
+    _run_name = kwargs.get("_run_name")
+    upload_target_snaphot = kwargs.get("_upload_target_snapshot", False)
+
+    with TargetRunContext(upload_target_snaphot):
+        run: Run = pf_client.run(
+            flow=target,
+            display_name=evaluation_name,
+            data=data,
+            properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"},
+            stream=True,
+            name=_run_name,
+        )
+
     target_output: pd.DataFrame = pf_client.runs.get_details(run, all_results=True)
     # Remove input and output prefix
     generated_columns = {
@@ -706,7 +706,7 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     target_generated_columns: Set[str] = set()
     if data is not None and target is not None:
         input_data_df, target_generated_columns, target_run = _apply_target_to_data(
-            target, data, pf_client, input_data_df, evaluation_name, _run_name=kwargs.get("_run_name")
+            target, data, pf_client, input_data_df, evaluation_name, **kwargs
         )
 
         for evaluator_name, mapping in column_mapping.items():
@@ -738,7 +738,7 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     def eval_batch_run(
         batch_run_client: TClient, *, data=Union[str, os.PathLike, pd.DataFrame]
     ) -> Dict[str, __EvaluatorInfo]:
-        with BatchRunContext(batch_run_client):
+        with EvalRunContext(batch_run_client):
             runs = {
                 evaluator_name: batch_run_client.run(
                     flow=evaluator,
@@ -752,7 +752,7 @@ def eval_batch_run(
                 for evaluator_name, evaluator in evaluators.items()
             }
 
-            # get_details needs to be called within BatchRunContext scope in order to have user agent populated
+            # get_details needs to be called within EvalRunContext scope in order to have user agent populated
             return {
                 evaluator_name: {
                     "result": batch_run_client.get_details(run, all_results=True),
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_batch_run_context.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_batch_run_context.py
@@ -6,7 +6,7 @@
 
 from azure.ai.evaluation._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
 from azure.ai.evaluation._user_agent import USER_AGENT
-from azure.ai.evaluation._evaluate._batch_run_client import BatchRunContext, CodeClient, ProxyClient
+from azure.ai.evaluation._evaluate._batch_run import EvalRunContext, CodeClient, ProxyClient
 
 
 @pytest.fixture
@@ -20,15 +20,15 @@ def pf_client_mock():
 
 
 @pytest.mark.unittest
-class TestBatchRunContext:
+class TestEvalRunContext:
     def test_with_codeclient(self, mocker, code_client_mock):
         mock_append_user_agent = mocker.patch(
             "promptflow._utils.user_agent_utils.ClientUserAgentUtil.append_user_agent"
         )
         mock_inject_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.inject_openai_api")
         mock_recover_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.recover_openai_api")
 
-        with BatchRunContext(code_client_mock):
+        with EvalRunContext(code_client_mock):
             # TODO: Failed to mock inject_openai_api and recover_openai_api for some reason.
             # Need to investigate further.
             # mock_inject_openai_api.assert_called_once()
@@ -46,7 +46,7 @@ def test_with_pfclient(self, mocker, pf_client_mock):
         mock_inject_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.inject_openai_api")
         mock_recover_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.recover_openai_api")
 
-        with BatchRunContext(code_client_mock):
+        with EvalRunContext(code_client_mock):
             mock_append_user_agent.assert_not_called()
             mock_inject_openai_api.assert_not_called()
             pass
@@ -57,22 +57,22 @@ def test_batch_timeout_default(self):
         before_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC)
         assert before_timeout is None
 
-        with BatchRunContext(ProxyClient(PFClient)):
+        with EvalRunContext(ProxyClient(PFClient)):
             during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC))
             assert during_timeout == PF_BATCH_TIMEOUT_SEC_DEFAULT
 
-        # Default timeout should be reset after exiting BatchRunContext
+        # Default timeout should be reset after exiting EvalRunContext
         after_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC)
         assert after_timeout is None
 
     def test_batch_timeout_custom(self):
         custom_timeout = 1000
         os.environ[PF_BATCH_TIMEOUT_SEC] = str(custom_timeout)
 
-        with BatchRunContext(ProxyClient(PFClient)):
+        with EvalRunContext(ProxyClient(PFClient)):
             during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC))
             assert during_timeout == custom_timeout
 
-        # Custom timeouts should not be reset after exiting BatchRunContext
+        # Custom timeouts should not be reset after exiting EvalRunContext
         after_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC))
         assert after_timeout == custom_timeout