azure-sdk
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py‎
Lines changed: 30 additions & 16 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py‎
Lines changed: 30 additions & 16 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py‎
Lines changed: 8 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py‎
Lines changed: 5 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py‎
Lines changed: 17 additions & 1 deletion b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py‎
Lines changed: 29 additions & 38 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py‎
Lines changed: 29 additions & 38 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/_adapters/__init__.py‎
Lines changed: 0 additions & 14 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/_adapters/__init__.py‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/_adapters/_check.py‎
Lines changed: 17 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/_adapters/_check.py‎
Lines changed: 17 additions & 0 deletions
@@ -2,18 +2,21 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
+import asyncio
 import logging
 import pandas as pd
 import sys
 from collections import defaultdict
-from concurrent.futures import Future, ThreadPoolExecutor
+from concurrent.futures import Future
 from os import PathLike
 from typing import Any, Callable, Dict, Final, List, Mapping, Optional, Sequence, Union, cast
 
 from .batch_clients import BatchClientRun, HasAsyncCallable
 from ..._legacy._batch_engine._run_submitter import RunSubmitter
 from ..._legacy._batch_engine._config import BatchEngineConfig
 from ..._legacy._batch_engine._run import Run
+from ..._legacy._adapters._constants import LINE_NUMBER
+from ..._legacy._common._thread_pool_executor_with_context import ThreadPoolExecutorWithContext
 
 
 LOGGER = logging.getLogger(__name__)
@@ -22,7 +25,9 @@
 class RunSubmitterClient:
     def __init__(self, config: Optional[BatchEngineConfig] = None) -> None:
         self._config = config or BatchEngineConfig(LOGGER, use_async=True)
-        self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
+        self._thread_pool = ThreadPoolExecutorWithContext(
+            thread_name_prefix="evaluators_thread",
+            max_workers=self._config.max_concurrency)
 
     def run(
         self,
@@ -33,30 +38,36 @@ def run(
         **kwargs: Any,
     ) -> BatchClientRun:
         if not isinstance(data, pd.DataFrame):
-            # Should never get here
             raise ValueError("Data must be a pandas DataFrame")
-        if not column_mapping:
-            raise ValueError("Column mapping must be provided")
 
-        # The column mappings are index by data to indicate they come from the data
+        # The column mappings are indexed by data to indicate they come from the data
         # input. Update the inputs so that each entry is a dictionary with a data key
         # that contains the original input data.
         inputs = [{"data": input_data} for input_data in data.to_dict(orient="records")]
 
-        # always uses async behind the scenes
+        # Pass the correct previous run to the evaluator
+        run: Optional[BatchClientRun] = kwargs.pop("run", None)
+        if run:
+            kwargs["run"] = self._get_run(run)
+
+        # Try to get async function to use
         if isinstance(flow, HasAsyncCallable):
             flow = flow._to_async()  # pylint: disable=protected-access
 
-        run_submitter = RunSubmitter(self._config)
+        # Start an event loop for async execution on a thread pool thread to separate it
+        # from the caller's thread.
+        run_submitter = RunSubmitter(self._config, self._thread_pool)
         run_future = self._thread_pool.submit(
-            run_submitter.submit,
-            dynamic_callable=flow,
-            inputs=inputs,
-            column_mapping=column_mapping,
-            name_prefix=evaluator_name,
-            created_on=kwargs.pop("created_on", None),
-            storage_creator=kwargs.pop("storage_creator", None),
-            **kwargs,
+            asyncio.run,
+            run_submitter.submit(
+                dynamic_callable=flow,
+                inputs=inputs,
+                column_mapping=column_mapping,
+                name_prefix=evaluator_name,
+                created_on=kwargs.pop("created_on", None),
+                storage_creator=kwargs.pop("storage_creator", None),
+                **kwargs,
+            )
         )
 
         return run_future
@@ -75,7 +86,10 @@ def _update(prefix: str, items: Sequence[Mapping[str, Any]]) -> None:
                     key = f"{prefix}.{k}"
                     data[key].append(value)
 
+        # Go from a list of dictionaries (i.e. a row view of the data) to a dictionary of lists
+        # (i.e. a column view of the data)
         _update("inputs", run.inputs)
+        _update("inputs", [{ LINE_NUMBER: i } for i in range(len(run.inputs)) ])
         _update("outputs", run.outputs)
 
         df = pd.DataFrame(data).reindex(columns=[k for k in data.keys()])
 
@@ -8,6 +8,10 @@
 from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
 from azure.ai.evaluation._legacy._adapters.utils import ClientUserAgentUtil
 from azure.ai.evaluation._legacy._adapters.tracing import inject_openai_api, recover_openai_api
+from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
+    inject_openai_api as ported_inject_openai_api,
+    recover_openai_api as ported_recover_openai_api,
+)
 
 from azure.ai.evaluation._constants import (
     OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
@@ -68,6 +72,7 @@ def __enter__(self) -> None:
 
         if isinstance(self.client, RunSubmitterClient):
             set_event_loop_policy()
+            ported_inject_openai_api()
 
     def __exit__(
         self,
@@ -92,3 +97,6 @@ def __exit__(
             if self._is_otel_timeout_set_by_system:
                 os.environ.pop(OTEL_EXPORTER_OTLP_TRACES_TIMEOUT, None)
                 self._is_otel_timeout_set_by_system = False
+
+        if isinstance(self.client, RunSubmitterClient):
+            ported_recover_openai_api()
@@ -58,6 +58,11 @@ def run(
         if not name:
             name = f"azure_ai_evaluation_evaluators_{evaluator_name}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
 
+        # Pass the correct previous run to the evaluator
+        run: Optional[BatchClientRun] = kwargs.pop("run", None)
+        if run:
+            kwargs["run"] = self.get_result(run)
+
         batch_use_async = self._should_batch_use_async(flow_to_run)
         eval_future = self._thread_pool.submit(
             self._pf_client.run,
 
@@ -5,8 +5,15 @@
 import types
 from typing import Optional, Type
 
+from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClient
+from azure.ai.evaluation._evaluate._batch_run import RunSubmitterClient
 from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP
+from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
+    inject_openai_api as ported_inject_openai_api,
+    recover_openai_api as ported_recover_openai_api,
+)
 from azure.ai.evaluation._constants import PF_DISABLE_TRACING
+from azure.ai.evaluation._evaluate._utils import set_event_loop_policy
 
 
 class TargetRunContext:
@@ -16,7 +23,8 @@ class TargetRunContext:
     :type upload_snapshot: bool
     """
 
-    def __init__(self, upload_snapshot: bool = False) -> None:
+    def __init__(self, client: BatchClient, upload_snapshot: bool = False) -> None:
+        self._client = client
         self._upload_snapshot = upload_snapshot
         self._original_cwd = os.getcwd()
 
@@ -32,6 +40,11 @@ def __enter__(self) -> None:
 
         os.environ[PF_DISABLE_TRACING] = "true"
 
+        if isinstance(self._client, RunSubmitterClient):
+            ported_inject_openai_api()
+            # For addressing the issue of asyncio event loop closed on Windows
+            set_event_loop_policy()
+
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
@@ -44,3 +57,6 @@ def __exit__(
             os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
 
         os.environ.pop(PF_DISABLE_TRACING, None)
+
+        if isinstance(self._client, RunSubmitterClient):
+            ported_recover_openai_api()
@@ -9,6 +9,7 @@
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypedDict, Union, cast
 
 from azure.ai.evaluation._legacy._adapters._constants import LINE_NUMBER
+from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
 from azure.ai.evaluation._legacy._adapters.entities import Run
 import pandas as pd
 
@@ -40,7 +41,7 @@
     _write_output,
     DataLoaderFactory,
 )
-from ._batch_run.batch_clients import BatchClient
+from ._batch_run.batch_clients import BatchClient, BatchClientRun
 
 LOGGER = logging.getLogger(__name__)
 
@@ -486,12 +487,12 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
 
 def _apply_target_to_data(
     target: Callable,
-    data: Union[str, os.PathLike],
+    data: Union[str, os.PathLike, pd.DataFrame],
     batch_client: BatchClient,
     initial_data: pd.DataFrame,
     evaluation_name: Optional[str] = None,
     **kwargs,
-) -> Tuple[pd.DataFrame, Set[str], Run]:
+) -> Tuple[pd.DataFrame, Set[str], BatchClientRun]:
     """
     Apply the target function to the data set and return updated data and generated columns.
 
@@ -509,24 +510,18 @@ def _apply_target_to_data(
     :rtype: Tuple[pandas.DataFrame, List[str]]
     """
 
-    if not isinstance(batch_client, ProxyClient):
-        raise ValueError("Only ProxyClient supports target runs for now.")
-
     _run_name = kwargs.get("_run_name")
-    with TargetRunContext():
-        run = cast(
-            ProxyRun,
-            batch_client.run(
-                flow=target,
-                display_name=evaluation_name,
-                data=data,
-                stream=True,
-                name=_run_name,
-            ),
+    with TargetRunContext(batch_client):
+        run: BatchClientRun = batch_client.run(
+            flow=target,
+            display_name=evaluation_name,
+            data=data,
+            stream=True,
+            name=_run_name,
+            evaluator_name=getattr(target, "__qualname__", "TARGET"),
         )
-
-    target_output: pd.DataFrame = batch_client.get_details(run, all_results=True)
-    run_summary = batch_client.get_run_summary(run)
+        target_output: pd.DataFrame = batch_client.get_details(run, all_results=True)
+        run_summary = batch_client.get_run_summary(run)
 
     if run_summary["completed_lines"] == 0:
         msg = (
@@ -557,7 +552,7 @@ def _apply_target_to_data(
     # Concatenate output to input
     target_output = pd.concat([target_output, initial_data], axis=1)
 
-    return target_output, generated_columns, run.run.result()
+    return target_output, generated_columns, run
 
 
 def _process_column_mappings(
@@ -777,19 +772,27 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     column_mapping = column_mapping or {}
     column_mapping.setdefault("default", {})
 
-    target_run: Optional[Run] = None
+    target_run: Optional[BatchClientRun] = None
     target_generated_columns: Set[str] = set()
     batch_run_client: BatchClient
     batch_run_data: Union[str, os.PathLike, pd.DataFrame] = data
 
-    # If target is set, apply 1-1 column mapping from target outputs to evaluator inputs
-    if data is not None and target is not None:
-        # Right now, only the ProxyClient that uses Promptflow supports a target function
+    if kwargs.pop("_use_run_submitter_client", False):
+        batch_run_client = RunSubmitterClient()
+        batch_run_data = input_data_df
+    elif kwargs.pop("_use_pf_client", True):
         batch_run_client = ProxyClient(user_agent=USER_AGENT)
+        # Ensure the absolute path is passed to pf.run, as relative path doesn't work with
+        # multiple evaluators. If the path is already absolute, abspath will return the original path.
         batch_run_data = os.path.abspath(data)
+    else:
+        batch_run_client = CodeClient()
+        batch_run_data = input_data_df
 
+    # If target is set, apply 1-1 column mapping from target outputs to evaluator inputs
+    if data is not None and target is not None:
         input_data_df, target_generated_columns, target_run = _apply_target_to_data(
-            target, data, batch_run_client, input_data_df, evaluation_name, **kwargs
+            target, batch_run_data, batch_run_client, input_data_df, evaluation_name, **kwargs
         )
 
         for evaluator_name, mapping in column_mapping.items():
@@ -803,17 +806,6 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
                 # customer did not mapped target output.
                 if col not in mapping and run_output not in mapped_to_values:
                     column_mapping[evaluator_name][col] = run_output  # pylint: disable=unnecessary-dict-index-lookup
-    elif kwargs.pop("_use_run_submitter_client", False):
-        batch_run_client = RunSubmitterClient()
-        batch_run_data = input_data_df
-    elif kwargs.pop("_use_pf_client", True):
-        batch_run_client = ProxyClient(user_agent=USER_AGENT)
-        # Ensure the absolute path is passed to pf.run, as relative path doesn't work with
-        # multiple evaluators. If the path is already absolute, abspath will return the original path.
-        batch_run_data = os.path.abspath(data)
-    else:
-        batch_run_client = CodeClient()
-        batch_run_data = input_data_df
 
     # After we have generated all columns, we can check if we have everything we need for evaluators.
     _validate_columns_for_evaluators(input_data_df, evaluators, target, target_generated_columns, column_mapping)
@@ -896,12 +888,11 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     metrics.update(evaluators_metric)
 
     # Since tracing is disabled, pass None for target_run so a dummy evaluation run will be created each time.
-    target_run: Optional[Run] = None
     trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None
     studio_url = None
     if trace_destination:
         studio_url = _log_metrics_and_instance_results(
-            metrics, result_df, trace_destination, target_run, evaluation_name, **kwargs
+            metrics, result_df, trace_destination, None, evaluation_name, **kwargs
         )
 
     result_df_dict = result_df.to_dict("records")
 
@@ -5,17 +5,3 @@
 # NOTE: This contains adapters that make the Promptflow dependency optional. In the first phase,
 #       Promptflow will still be installed as part of the azure-ai-evaluation dependencies. This
 #       will be removed in the future once the code migration is complete.
-
-from typing import Final
-
-
-_has_legacy = False
-try:
-    from promptflow.client import PFClient
-
-    _has_legacy = True
-except ImportError:
-    pass
-
-HAS_LEGACY_SDK: Final[bool] = _has_legacy
-MISSING_LEGACY_SDK: Final[bool] = not _has_legacy
@@ -0,0 +1,17 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+from typing import Final
+
+
+_has_legacy = False
+try:
+    from promptflow._constants import FlowType
+
+    _has_legacy = True
+except ImportError:
+    pass
+
+HAS_LEGACY_SDK: Final[bool] = _has_legacy
+MISSING_LEGACY_SDK: Final[bool] = not _has_legacy