Refactor _evaluate_query to public evaluate_query parameter with backward compatibility

Copilot · slister1001 · Copilot · commit e27be7178a7d · 2025-07-22T20:17:44.000Z
Co-authored-by: slister1001 &lt;103153180+slister1001@users.noreply.github.com&gt;
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py
@@ -89,13 +89,13 @@ def __init__(
         credential,
         azure_ai_project,
         *,
-        _evaluate_query: bool = True,
+        evaluate_query: bool = True,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.CODE_VULNERABILITY,
             azure_ai_project=azure_ai_project,
             credential=credential,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py
@@ -41,9 +41,9 @@ class RaiServiceEvaluatorBase(EvaluatorBase[T]):
     :type conversation_aggregation_type: ~azure.ai.evaluation._AggregationType
     :param threshold: The threshold for the evaluation. Default is 3.
     :type threshold: Optional[int]
-    :param _evaluate_query: If True, the query will be included in the evaluation data when evaluating
+    :param evaluate_query: If True, the query will be included in the evaluation data when evaluating
         query-response pairs. If False, only the response will be evaluated. Default is False.
-    :type _evaluate_query: bool
+    :type evaluate_query: bool
     :param _higher_is_better: If True, higher scores are better. Default is True.
     :type _higher_is_better: Optional[bool]
     """
@@ -57,8 +57,10 @@ def __init__(
         eval_last_turn: bool = False,
         conversation_aggregation_type: _AggregationType = _AggregationType.MEAN,
         threshold: int = 3,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
         _higher_is_better: Optional[bool] = False,
+        *,
+        _evaluate_query: Optional[bool] = None,
     ):
         super().__init__(
             eval_last_turn=eval_last_turn,
@@ -70,7 +72,21 @@ def __init__(
         self._azure_ai_project = validate_azure_ai_project(azure_ai_project)
         self._credential = credential
         self._threshold = threshold
-        self._evaluate_query = _evaluate_query
+        
+        # Handle backward compatibility with _evaluate_query parameter
+        if _evaluate_query is not None:
+            import warnings
+            warnings.warn(
+                "The '_evaluate_query' parameter is deprecated. Use 'evaluate_query' instead.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+            # If both are provided, that's an error
+            if evaluate_query != False:  # evaluate_query was explicitly set to something other than default
+                raise ValueError("Cannot specify both 'evaluate_query' and '_evaluate_query'. Use 'evaluate_query'.")
+            evaluate_query = _evaluate_query
+        
+        self._evaluate_query = evaluate_query
         self._higher_is_better = _higher_is_better
 
     @override
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py
@@ -33,8 +33,8 @@ class ContentSafetyEvaluator(MultiEvaluatorBase[Union[str, float]]):
     :type self_harm_threshold: int
     :param hate_unfairness_threshold: The threshold for hate/unfairness evaluation. Default is 3.
     :type hate_unfairness_threshold: int
-    :param _evaluate_query: Whether to also evaluate the query in addition to the response. Default is False.
-    :type _evaluate_query: bool
+    :param evaluate_query: Whether to also evaluate the query in addition to the response. Default is False.
+    :type evaluate_query: bool
     :param kwargs: Additional arguments to pass to the evaluator.
     :type kwargs: Any
     :return: A function that evaluates content-safety metrics for "question-answering" scenario.
@@ -81,7 +81,7 @@ def __init__(
         sexual_threshold: int = 3,
         self_harm_threshold: int = 3,
         hate_unfairness_threshold: int = 3,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
         **kwargs,
     ):
         # Type checking
@@ -96,14 +96,14 @@ def __init__(
 
         evaluators = [
             ViolenceEvaluator(
-                credential, azure_ai_project, threshold=violence_threshold, _evaluate_query=_evaluate_query
+                credential, azure_ai_project, threshold=violence_threshold, evaluate_query=evaluate_query
             ),
-            SexualEvaluator(credential, azure_ai_project, threshold=sexual_threshold, _evaluate_query=_evaluate_query),
+            SexualEvaluator(credential, azure_ai_project, threshold=sexual_threshold, evaluate_query=evaluate_query),
             SelfHarmEvaluator(
-                credential, azure_ai_project, threshold=self_harm_threshold, _evaluate_query=_evaluate_query
+                credential, azure_ai_project, threshold=self_harm_threshold, evaluate_query=evaluate_query
             ),
             HateUnfairnessEvaluator(
-                credential, azure_ai_project, threshold=hate_unfairness_threshold, _evaluate_query=_evaluate_query
+                credential, azure_ai_project, threshold=hate_unfairness_threshold, evaluate_query=evaluate_query
             ),
         ]
         super().__init__(evaluators=evaluators, **kwargs)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py
@@ -91,7 +91,7 @@ def __init__(
         azure_ai_project,
         *,
         threshold: int = 3,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.HATE_FAIRNESS,
@@ -100,7 +100,7 @@ def __init__(
             conversation_aggregation_type=_AggregationType.MAX,
             threshold=threshold,
             _higher_is_better=False,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py
@@ -76,7 +76,7 @@ def __init__(
         azure_ai_project,
         *,
         threshold: int = 3,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.SELF_HARM,
@@ -85,7 +85,7 @@ def __init__(
             conversation_aggregation_type=_AggregationType.MAX,
             threshold=threshold,
             _higher_is_better=False,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py
@@ -87,7 +87,7 @@ def __init__(
         azure_ai_project,
         *,
         threshold: int = 3,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.SEXUAL,
@@ -96,7 +96,7 @@ def __init__(
             conversation_aggregation_type=_AggregationType.MAX,
             threshold=threshold,
             _higher_is_better=False,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_violence.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_violence.py
@@ -87,7 +87,7 @@ def __init__(
         azure_ai_project,
         *,
         threshold: int = 3,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.VIOLENCE,
@@ -96,7 +96,7 @@ def __init__(
             conversation_aggregation_type=_AggregationType.MAX,
             threshold=threshold,
             _higher_is_better=False,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py
@@ -60,13 +60,13 @@ def __init__(
         credential,
         azure_ai_project,
         *,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
     ):
         super().__init__(
             eval_metric=_InternalEvaluationMetrics.ECI,
             azure_ai_project=azure_ai_project,
             credential=credential,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py
@@ -60,13 +60,13 @@ def __init__(
         credential,
         azure_ai_project,
         *,
-        _evaluate_query: bool = True,
+        evaluate_query: bool = True,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.PROTECTED_MATERIAL,
             azure_ai_project=azure_ai_project,
             credential=credential,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py
@@ -77,7 +77,7 @@ def __init__(
         azure_ai_project,
         *,
         threshold: int = 5,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
         **kwargs,
     ):
         self.threshold = threshold
@@ -88,7 +88,7 @@ def __init__(
             azure_ai_project=azure_ai_project,
             credential=credential,
             threshold=self.threshold,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
             **kwargs,
         )
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py
@@ -68,13 +68,13 @@ def __init__(
         credential,
         azure_ai_project,
         *,
-        _evaluate_query: bool = True,
+        evaluate_query: bool = True,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
             azure_ai_project=azure_ai_project,
             credential=credential,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_xpia/xpia.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_xpia/xpia.py
@@ -77,13 +77,13 @@ def __init__(
         credential,
         azure_ai_project,
         *,
-        _evaluate_query: bool = False,
+        evaluate_query: bool = False,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.XPIA,
             azure_ai_project=azure_ai_project,
             credential=credential,
-            _evaluate_query=_evaluate_query,
+            evaluate_query=evaluate_query,
         )
 
     @overload