Skip to content

Commit 3fe7515

Browse files
authored
Enable use of the new leaner ported BatchEngine code path (Azure#40241)
- Added a new kwargs argument called `_use_run_submitter_client` that is set to true when calling evaluate() will enable the use of the new faster and leaner ported code path without the promptflow dependenecy. Please note that this is still in a "happy path" works state with some features missing (e.g. handling target function calls which will be added in a future PR). The goal here is to enable testing of this code path sooner to enable us to start finding bugs/issues - Created a `BatchClient` protocol to standardize the existing `CodeClient`, `ProxyClient`, as well as the newly added `RunSubmitterClient`. This makes the evaluate logic simpler in the rest of the code - Though still installed by default, made promptflow an optional dependency by: - Creating some adapters to handle the case where promptflow is not installed, and replace with either some stub code, or the closest ported version of the code as needed - Added some additional dependencies to setup.py that were implicitly brought in by the promptflow dependency that are still needed by the ported legacy code - Removed some unneeded code now that tracing support has been deprecated/disabled, as well as some unneeded test code
1 parent f6a9d07 commit 3fe7515

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+506
-582
lines changed

sdk/evaluation/azure-ai-evaluation/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/evaluation/azure-ai-evaluation",
5-
"Tag": "python/evaluation/azure-ai-evaluation_f423a24efa"
5+
"Tag": "python/evaluation/azure-ai-evaluation_e33b6c53d7"
66
}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
import jwt
1717

18-
from promptflow.core._errors import MissingRequiredPackage
18+
from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
1919
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
2020
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
2121
from azure.ai.evaluation._model_configurations import AzureAIProject

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import nltk
1111
from typing_extensions import NotRequired, Required, TypeGuard
12-
from promptflow.core._errors import MissingRequiredPackage
12+
from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
1313
from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
1414
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
1515
from azure.ai.evaluation._model_configurations import (
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# ---------------------------------------------------------
22
# Copyright (c) Microsoft Corporation. All rights reserved.
3-
# ---------------------------------------------------------
3+
# ---------------------------------------------------------

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/code_client.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@
66
import logging
77
import os
88
from concurrent.futures import Future
9-
from pathlib import Path
10-
from typing import Any, Callable, Dict, Optional, Union, cast
9+
from typing import Any, Callable, Dict, Optional, Sequence, Union, cast
1110

1211
import pandas as pd
13-
from promptflow.contracts.types import AttrDict
14-
from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
12+
from azure.ai.evaluation._legacy._adapters.types import AttrDict
13+
from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
1514

1615
from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
1716
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
1817

1918
from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
19+
from .batch_clients import BatchClientRun
2020

2121
LOGGER = logging.getLogger(__name__)
2222

@@ -84,7 +84,7 @@ def _calculate_metric(
8484
for param in inspect.signature(evaluator).parameters.values()
8585
if param.name not in ["args", "kwargs"]
8686
}
87-
for value in input_df.to_dict("records"):
87+
for value in cast(Sequence[Dict[str, Any]], input_df.to_dict("records")):
8888
# Filter out only the parameters that are present in the input data
8989
# if no parameters then pass data as is
9090
filtered_values = {k: v for k, v in value.items() if k in parameters} if len(parameters) > 0 else value
@@ -133,10 +133,10 @@ def _calculate_aggregations(evaluator: Callable, run: CodeRun) -> Any:
133133
def run(
134134
self, # pylint: disable=unused-argument
135135
flow: Callable,
136-
data: Union[os.PathLike, Path, pd.DataFrame],
137-
evaluator_name: Optional[str] = None,
136+
data: Union[str, os.PathLike, pd.DataFrame],
138137
column_mapping: Optional[Dict[str, str]] = None,
139-
**kwargs,
138+
evaluator_name: Optional[str] = None,
139+
**kwargs: Any,
140140
) -> CodeRun:
141141
input_df = data
142142
if not isinstance(input_df, pd.DataFrame):
@@ -157,7 +157,7 @@ def run(
157157
evaluator=flow,
158158
input_df=input_df,
159159
column_mapping=column_mapping,
160-
evaluator_name=evaluator_name,
160+
evaluator_name=evaluator_name or "",
161161
)
162162

163163
return CodeRun(
@@ -169,11 +169,13 @@ def run(
169169
),
170170
)
171171

172-
def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
172+
def get_details(self, client_run: BatchClientRun, all_results: bool = False) -> pd.DataFrame:
173+
run = self._get_result(client_run)
173174
result_df = run.get_result_df(exclude_inputs=not all_results)
174175
return result_df
175176

176-
def get_metrics(self, run: CodeRun) -> Dict[str, Any]:
177+
def get_metrics(self, client_run: BatchClientRun) -> Dict[str, Any]:
178+
run = self._get_result(client_run)
177179
try:
178180
aggregated_metrics = run.get_aggregated_metrics()
179181
print("Aggregated metrics")
@@ -183,6 +185,10 @@ def get_metrics(self, run: CodeRun) -> Dict[str, Any]:
183185
return {}
184186
return aggregated_metrics
185187

186-
def get_run_summary(self, run: CodeRun) -> Any: # pylint: disable=unused-argument
188+
def get_run_summary(self, client_run: BatchClientRun) -> Any: # pylint: disable=unused-argument
187189
# Not implemented
188190
return None
191+
192+
@staticmethod
193+
def _get_result(run: BatchClientRun) -> CodeRun:
194+
return cast(CodeRun, run)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import types
66
from typing import Optional, Type, Union
77

8-
from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
9-
from promptflow._utils.user_agent_utils import ClientUserAgentUtil
10-
from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
8+
from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
9+
from azure.ai.evaluation._legacy._adapters.utils import ClientUserAgentUtil
10+
from azure.ai.evaluation._legacy._adapters.tracing import inject_openai_api, recover_openai_api
1111

1212
from azure.ai.evaluation._constants import (
1313
OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
@@ -19,6 +19,8 @@
1919

2020
from ..._user_agent import USER_AGENT
2121
from .._utils import set_event_loop_policy
22+
from .batch_clients import BatchClient
23+
from ._run_submitter_client import RunSubmitterClient
2224
from .code_client import CodeClient
2325
from .proxy_client import ProxyClient
2426

@@ -33,7 +35,7 @@ class EvalRunContext:
3335
]
3436
"""
3537

36-
def __init__(self, client: Union[CodeClient, ProxyClient]) -> None:
38+
def __init__(self, client: BatchClient) -> None:
3739
self.client = client
3840
self._is_batch_timeout_set_by_system = False
3941
self._is_otel_timeout_set_by_system = False
@@ -64,6 +66,9 @@ def __enter__(self) -> None:
6466
# For addressing the issue of asyncio event loop closed on Windows
6567
set_event_loop_policy()
6668

69+
if isinstance(self.client, RunSubmitterClient):
70+
set_event_loop_policy()
71+
6772
def __exit__(
6873
self,
6974
exc_type: Optional[Type[BaseException]],

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,21 @@
88
import logging
99
import math
1010
import os
11+
from datetime import datetime
1112
from collections import OrderedDict
1213
from concurrent.futures import Future
13-
from typing import Any, Callable, Dict, Optional, Union
14+
from typing import Any, Callable, Dict, Optional, Union, cast
1415

16+
from azure.ai.evaluation._legacy._adapters.entities import Run
17+
from azure.ai.evaluation._legacy._adapters._configuration import Configuration
18+
from azure.ai.evaluation._legacy._adapters.client import PFClient
19+
from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext
1520
import pandas as pd
16-
from promptflow.client import PFClient
17-
from promptflow.entities import Run
18-
from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
1921

22+
from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClientRun, HasAsyncCallable
23+
24+
25+
Configuration.get_instance().set_config("trace.destination", "none")
2026
LOGGER = logging.getLogger(__name__)
2127

2228

@@ -26,46 +32,56 @@ def __init__(self, run: Future, **kwargs) -> None: # pylint: disable=unused-arg
2632

2733

2834
class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
29-
def __init__( # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
30-
self, pf_client: PFClient
35+
def __init__( # pylint: disable=missing-client-constructor-parameter-credential
36+
self,
37+
**kwargs: Any,
3138
) -> None:
32-
self._pf_client = pf_client
33-
self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
39+
self._pf_client = PFClient(**kwargs)
40+
self._thread_pool = ThreadPoolExecutorWithContext(thread_name_prefix="evaluators_thread")
3441

3542
def run(
3643
self,
37-
flow: Union[str, os.PathLike, Callable],
38-
data: Union[str, os.PathLike],
44+
flow: Callable,
45+
data: Union[str, os.PathLike, pd.DataFrame],
3946
column_mapping: Optional[Dict[str, str]] = None,
40-
**kwargs
47+
evaluator_name: Optional[str] = None,
48+
**kwargs: Any,
4149
) -> ProxyRun:
42-
flow_to_run = flow
43-
if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and hasattr(flow, "_to_async"):
50+
if isinstance(data, pd.DataFrame):
51+
raise ValueError("Data cannot be a pandas DataFrame")
52+
53+
flow_to_run: Callable = flow
54+
if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and isinstance(flow, HasAsyncCallable):
4455
flow_to_run = flow._to_async() # pylint: disable=protected-access
4556

57+
name: str = kwargs.pop("name", "")
58+
if not name:
59+
name = f"azure_ai_evaluation_evaluators_{evaluator_name}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
60+
4661
batch_use_async = self._should_batch_use_async(flow_to_run)
4762
eval_future = self._thread_pool.submit(
4863
self._pf_client.run,
4964
flow_to_run,
5065
data=data,
51-
column_mapping=column_mapping,
66+
column_mapping=column_mapping, # type: ignore
5267
batch_use_async=batch_use_async,
53-
**kwargs
68+
name=name,
69+
**kwargs,
5470
)
5571
return ProxyRun(run=eval_future)
5672

57-
def get_details(self, proxy_run: ProxyRun, all_results: bool = False) -> pd.DataFrame:
58-
run: Run = proxy_run.run.result()
73+
def get_details(self, client_run: BatchClientRun, all_results: bool = False) -> pd.DataFrame:
74+
run: Run = self.get_result(client_run)
5975
result_df = self._pf_client.get_details(run, all_results=all_results)
6076
result_df.replace("(Failed)", math.nan, inplace=True)
6177
return result_df
6278

63-
def get_metrics(self, proxy_run: ProxyRun) -> Dict[str, Any]:
64-
run: Run = proxy_run.run.result()
79+
def get_metrics(self, client_run: BatchClientRun) -> Dict[str, Any]:
80+
run: Run = self.get_result(client_run)
6581
return self._pf_client.get_metrics(run)
6682

67-
def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
68-
run = proxy_run.run.result()
83+
def get_run_summary(self, client_run: BatchClientRun) -> Dict[str, Any]:
84+
run: Run = self.get_result(client_run)
6985

7086
# pylint: disable=protected-access
7187
completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
@@ -81,13 +97,17 @@ def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
8197
return OrderedDict(
8298
[
8399
("status", status),
84-
("duration", str(run._end_time - run._created_on)),
100+
("duration", str((run._end_time or run._created_on) - run._created_on)),
85101
("completed_lines", completed_lines),
86102
("failed_lines", failed_lines),
87103
("log_path", str(run._output_path)),
88104
]
89105
)
90106

107+
@staticmethod
108+
def get_result(run: BatchClientRun) -> Run:
109+
return cast(ProxyRun, run).run.result()
110+
91111
@staticmethod
92112
def _should_batch_use_async(flow):
93113
if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import types
66
from typing import Optional, Type
77

8-
from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
8+
from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP
99
from azure.ai.evaluation._constants import PF_DISABLE_TRACING
1010

1111

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_eval_run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from typing import Any, Dict, List, Optional, Set, Type
1414
from urllib.parse import urlparse
1515

16-
from promptflow._sdk.entities import Run
16+
from azure.ai.evaluation._legacy._adapters.entities import Run
1717
from typing_extensions import Self
1818

1919
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException

0 commit comments

Comments
 (0)