azure-sdk
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py‎
Lines changed: 1 addition & 5 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py‎
Lines changed: 4 additions & 4 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py‎
Lines changed: 16 additions & 15 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py‎
Lines changed: 16 additions & 15 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py‎
Lines changed: 9 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py‎
Lines changed: 2 additions & 1 deletion b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py‎
Lines changed: 38 additions & 16 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py‎
Lines changed: 38 additions & 16 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py‎
Lines changed: 23 additions & 13 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py‎
Lines changed: 23 additions & 13 deletions
@@ -25,11 +25,7 @@
 from ._evaluators._rouge import RougeScoreEvaluator, RougeType
 from ._evaluators._similarity import SimilarityEvaluator
 from ._evaluators._xpia import IndirectAttackEvaluator
-from ._model_configurations import (
-    AzureAIProject,
-    AzureOpenAIModelConfiguration,
-    OpenAIModelConfiguration,
-)
+from ._model_configurations import AzureAIProject, AzureOpenAIModelConfiguration, OpenAIModelConfiguration
 
 __all__ = [
     "evaluate",
 
@@ -11,12 +11,12 @@
 
 import jwt
 import numpy as np
-from azure.core.credentials import TokenCredential
-from azure.identity import DefaultAzureCredential
 
+from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import get_async_http_client
-from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
 from azure.ai.evaluation._model_configurations import AzureAIProject
+from azure.core.credentials import TokenCredential
+from azure.identity import DefaultAzureCredential
 
 from .constants import (
     CommonConstants,
@@ -348,7 +348,7 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st
         )
 
     if response.status_code != 200:
-        msg = f"Failed to retrieve the discovery service URL."
+        msg = "Failed to retrieve the discovery service URL."
         raise EvaluationException(
             message=msg,
             internal_message=msg,
 
@@ -2,20 +2,15 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
-from typing import Optional, Union
+import threading
+from typing import List, Optional, Union
 
-from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
+import nltk
+import numpy as np
 
-try:
-    from . import constants
-except ImportError:
-    import constants
+from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
 
-from typing import List
-
-import threading
-import numpy as np
-import nltk
+from . import constants
 
 _nltk_data_download_lock = threading.Lock()
 
@@ -46,15 +41,21 @@ def ensure_nltk_data_downloaded():
     """Download NLTK data packages if not already downloaded."""
     with _nltk_data_download_lock:
         try:
-            from nltk.tokenize.nist import NISTTokenizer
+            from nltk.tokenize.nist import NISTTokenizer  # pylint: disable=unused-import
         except LookupError:
             nltk.download("perluniprops")
             nltk.download("punkt")
             nltk.download("punkt_tab")
 
 
 def nltk_tokenize(text: str) -> List[str]:
-    """Tokenize the input text using the NLTK tokenizer."""
+    """Tokenize the input text using the NLTK tokenizer.
+
+    :param text: The text to tokenize
+    :type text: str
+    :return: A list of tokens
+    :rtype: list[str]
+    """
     ensure_nltk_data_downloaded()
 
     if not text.isascii():
@@ -69,15 +70,15 @@ def nltk_tokenize(text: str) -> List[str]:
     return list(tokens)
 
 
-def check_and_add_api_version_for_aoai_model_config(
+def ensure_api_version_in_aoai_model_config(
     model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
     default_api_version: str,
 ) -> None:
     if "azure_endpoint" in model_config or "azure_deployment" in model_config:
         model_config["api_version"] = model_config.get("api_version", default_api_version)
 
 
-def check_and_add_user_agent_for_aoai_model_config(
+def ensure_user_agent_in_aoai_model_config(
     model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
     prompty_model_config: dict,
     user_agent: Optional[str] = None,
 
@@ -39,6 +39,15 @@ class Prefixes:
     TSG_OUTPUTS = "__outputs."
 
 
+class DefaultOpenEncoding:
+    """Enum that captures SDK's default values for the encoding param of open(...)"""
+
+    READ = "utf-8-sig"
+    """SDK Default Encoding when reading a file"""
+    WRITE = "utf-8"
+    """SDK Default Encoding when writing a file"""
+
+
 DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
 
 CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4
 
@@ -5,13 +5,14 @@
 
 from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
 from promptflow._utils.user_agent_utils import ClientUserAgentUtil
+from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
+
 from azure.ai.evaluation._constants import (
     OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
     OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT,
     PF_BATCH_TIMEOUT_SEC,
     PF_BATCH_TIMEOUT_SEC_DEFAULT,
 )
-from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
 
 from ..._user_agent import USER_AGENT
 from .._utils import set_event_loop_policy
 
@@ -4,21 +4,26 @@
 import inspect
 import json
 import logging
+import os
+from pathlib import Path
+from typing import Callable, Dict, Optional, Union
 
 import pandas as pd
-
 from promptflow.contracts.types import AttrDict
-from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
 from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
-from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
+
+from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
+from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 
 from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
 
 LOGGER = logging.getLogger(__name__)
 
 
 class CodeRun:
-    def __init__(self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs):
+    def __init__(
+        self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs  # pylint: disable=unused-argument
+    ):
         self.run = run
         self.evaluator_name = evaluator_name if evaluator_name is not None else ""
         self.input_data = input_data
@@ -40,25 +45,29 @@ def get_aggregated_metrics(self):
                 else None
             )
         except Exception as ex:  # pylint: disable=broad-exception-caught
-            LOGGER.debug(f"Error calculating metrics for evaluator {self.evaluator_name}, failed with error {str(ex)}")
+            LOGGER.debug("Error calculating metrics for evaluator %s, failed with error %s", self.evaluator_name, ex)
             aggregated_metrics = None
 
         if not isinstance(aggregated_metrics, dict):
             LOGGER.warning(
-                f"Aggregated metrics for evaluator {self.evaluator_name}"
-                f" is not a dictionary will not be logged as metrics"
+                "Aggregated metrics for evaluator %s is not a dictionary will not be logged as metrics",
+                self.evaluator_name,
             )
 
         aggregated_metrics = aggregated_metrics if isinstance(aggregated_metrics, dict) else {}
 
         return aggregated_metrics
 
 
-class CodeClient:
-    def __init__(self):
+class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
+    def __init__(  # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
+        self,
+    ) -> None:
         self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
 
-    def _calculate_metric(self, evaluator, input_df, column_mapping, evaluator_name):
+    def _calculate_metric(
+        self, evaluator: Callable, input_df: pd.DataFrame, column_mapping: Optional[Dict[str, str]], evaluator_name: str
+    ) -> pd.DataFrame:
         row_metric_futures = []
         row_metric_results = []
         input_df = _apply_column_mapping(input_df, column_mapping)
@@ -110,11 +119,18 @@ def _calculate_aggregations(self, evaluator, run):
                 return aggregated_output
         except Exception as ex:  # pylint: disable=broad-exception-caught
             LOGGER.warning(
-                f"Error calculating aggregations for evaluator {run.evaluator_name}," f" failed with error {str(ex)}"
+                "Error calculating aggregations for evaluator %s, failed with error %s", run.evaluator_name, ex
             )
         return None
 
-    def run(self, flow, data, evaluator_name=None, column_mapping=None, **kwargs):
+    def run(
+        self,  # pylint: disable=unused-argument
+        flow: Callable,
+        data: Union[os.PathLike, Path, pd.DataFrame],
+        evaluator_name: Optional[str] = None,
+        column_mapping: Optional[Dict[str, str]] = None,
+        **kwargs,
+    ) -> CodeRun:
         input_df = data
         if not isinstance(input_df, pd.DataFrame):
             try:
@@ -129,22 +145,28 @@ def run(self, flow, data, evaluator_name=None, column_mapping=None, **kwargs):
                 ) from exc
 
             input_df = pd.DataFrame(json_data)
-        eval_future = self._thread_pool.submit(self._calculate_metric, flow, input_df, column_mapping, evaluator_name)
+        eval_future = self._thread_pool.submit(
+            self._calculate_metric,
+            evaluator=flow,
+            input_df=input_df,
+            column_mapping=column_mapping,
+            evaluator_name=evaluator_name,
+        )
         run = CodeRun(run=eval_future, input_data=data, evaluator_name=evaluator_name, aggregated_metrics=None)
         aggregation_future = self._thread_pool.submit(self._calculate_aggregations, evaluator=flow, run=run)
         run.aggregated_metrics = aggregation_future
         return run
 
-    def get_details(self, run, all_results=False):
+    def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
         result_df = run.get_result_df(exclude_inputs=not all_results)
         return result_df
 
-    def get_metrics(self, run):
+    def get_metrics(self, run: CodeRun) -> Optional[None]:
         try:
             aggregated_metrics = run.get_aggregated_metrics()
             print("Aggregated metrics")
             print(aggregated_metrics)
         except Exception as ex:  # pylint: disable=broad-exception-caught
-            LOGGER.debug(f"Error calculating metrics for evaluator {run.evaluator_name}, failed with error {str(ex)}")
+            LOGGER.debug("Error calculating metrics for evaluator %s, failed with error %s", run.evaluator_name, ex)
             return None
         return aggregated_metrics
@@ -4,29 +4,40 @@
 import inspect
 import logging
 import os
+from concurrent.futures import Future
+from typing import Any, Callable, Dict, Optional, Union
 
 import numpy as np
-
+import pandas as pd
 from promptflow.client import PFClient
+from promptflow.entities import Run
 from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
 
 LOGGER = logging.getLogger(__name__)
 
 
 class ProxyRun:
-    def __init__(self, run, **kwargs):
+    def __init__(self, run: Future, **kwargs) -> None:  # pylint: disable=unused-argument
         self.run = run
 
 
-class ProxyClient:
-    def __init__(self, pf_client: PFClient):
+class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
+    def __init__(  # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
+        self, pf_client: PFClient
+    ) -> None:
         self._pf_client = pf_client
         self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
 
-    def run(self, flow, data, column_mapping=None, **kwargs):
+    def run(
+        self,
+        flow: Union[str, os.PathLike, Callable],
+        data: Union[str, os.PathLike],
+        column_mapping: Optional[Dict[str, str]] = None,
+        **kwargs
+    ) -> ProxyRun:
         flow_to_run = flow
         if hasattr(flow, "_to_async"):
-            flow_to_run = flow._to_async()
+            flow_to_run = flow._to_async()  # pylint: disable=protected-access
 
         batch_use_async = self._should_batch_use_async(flow_to_run)
         eval_future = self._thread_pool.submit(
@@ -39,23 +50,22 @@ def run(self, flow, data, column_mapping=None, **kwargs):
         )
         return ProxyRun(run=eval_future)
 
-    def get_details(self, proxy_run, all_results=False):
-        run = proxy_run.run.result()
+    def get_details(self, proxy_run: ProxyRun, all_results: bool = False) -> pd.DataFrame:
+        run: Run = proxy_run.run.result()
         result_df = self._pf_client.get_details(run, all_results=all_results)
         result_df.replace("(Failed)", np.nan, inplace=True)
         return result_df
 
-    def get_metrics(self, proxy_run):
-        run = proxy_run.run.result()
+    def get_metrics(self, proxy_run: ProxyRun) -> Dict[str, Any]:
+        run: Run = proxy_run.run.result()
         return self._pf_client.get_metrics(run)
 
     @staticmethod
     def _should_batch_use_async(flow):
         if os.getenv("PF_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
             if hasattr(flow, "__call__") and inspect.iscoroutinefunction(flow.__call__):
                 return True
-            elif inspect.iscoroutinefunction(flow):
+            if inspect.iscoroutinefunction(flow):
                 return True
-            else:
-                return False
+            return False
         return False