diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
index 75670e2c3849..4a999adb7a18 100644
--- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
+++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -5,11 +5,19 @@
 ### Breaking Changes
 
 ### Features Added
+
 - Added support for user-supplied tags in the `evaluate` function. Tags are key-value pairs that can be used for experiment tracking, A/B testing, filtering, and organizing evaluation runs. The function accepts a `tags` parameter.
 - Enhanced `GroundednessEvaluator` to support AI agent evaluation with tool calls. The evaluator now accepts agent response data containing tool calls and can extract context from `file_search` tool results for groundedness assessment. This enables evaluation of AI agents that use tools to retrieve information and generate responses. Note: Agent groundedness evaluation is currently supported only when the `file_search` tool is used.
 
 ### Bugs Fixed
 
+- [Bug](https://github.com/Azure/azure-sdk-for-python/issues/39909): Added `is_reasoning_model` keyword parameter to all evaluators
+    (`SimilarityEvaluator`, `RelevanceEvaluator`, `CoherenceEvaluator`, `FluencyEvaluator`,
+    `RetrievalEvaluator`, `GroundednessEvaluator`, `IntentResolutionEvaluator`,
+    `ResponseCompletenessEvaluator`, `TaskAdherenceEvaluator`, `ToolCallAccuracyEvaluator`).
+    When set, evaluator configuration is adjusted appropriately for reasoning models.
+    `QAEvaluator` now propagates this parameter to its child evaluators.
+
 ### Other Changes
 
 ## 1.10.0 (2025-07-31)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_coherence/_coherence.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_coherence/_coherence.py
index 66cc593452fb..6644bceaa263 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_coherence/_coherence.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_coherence/_coherence.py
@@ -12,17 +12,22 @@
 
 class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     """
-    Evaluates coherence score for a given query and response or a multi-turn conversation, including reasoning.
+    Evaluates coherence for a given query and response or a multi-turn
+    conversation, including reasoning.
 
-    The coherence measure assesses the ability of the language model to generate text that reads naturally,
-    flows smoothly, and resembles human-like language in its responses. Use it when assessing the readability
-    and user-friendliness of a model's generated responses in real-world applications.
+    The coherence measure assesses the model's ability to generate text that
+    reads naturally, flows smoothly, and resembles human-like language. Use it
+    when assessing the readability and user-friendliness of responses.
 
     :param model_config: Configuration for the Azure OpenAI model.
-    :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
+    :type model_config:
+        Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the coherence evaluator. Default is 3.
     :type threshold: int
+    :keyword is_reasoning_model: (Preview) config for chat completions is
+        updated to use reasoning models
+    :type is_reasoning_model: bool
 
     .. admonition:: Example:
 
@@ -31,7 +36,8 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             :end-before: [END coherence_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call CoherenceEvaluator using azure.ai.evaluation.AzureAIProject
+            :caption: Initialize and call CoherenceEvaluator using
+                azure.ai.evaluation.AzureAIProject
 
     .. admonition:: Example using Azure AI Project URL:
 
@@ -40,7 +46,8 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             :end-before: [END coherence_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call CoherenceEvaluator using Azure AI Project URL in following format
+            :caption: Initialize and call CoherenceEvaluator using Azure AI
+                Project URL in following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
 
     .. admonition:: Example with Threshold:
@@ -50,23 +57,24 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             :end-before: [END threshold_coherence_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize with threshold and call a CoherenceEvaluator with a query and response.
+            :caption: Initialize with threshold and call a CoherenceEvaluator
+                with a query and response.
 
     .. note::
 
-        To align with our support of a diverse set of models, an output key without the `gpt_` prefix has been added.
-        To maintain backwards compatibility, the old key with the `gpt_` prefix is still be present in the output;
-        however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
+    To align with support of diverse models, an output key without the
+    `gpt_` prefix has been added. The old key with the `gpt_` prefix is
+    still present for compatibility; however, it will be deprecated.
     """
 
     _PROMPTY_FILE = "coherence.prompty"
     _RESULT_KEY = "coherence"
 
     id = "azureai://built-in/evaluators/coherence"
-    """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    """Evaluator identifier, experimental to be used only with cloud evaluation"""
 
     @override
-    def __init__(self, model_config, *, threshold=3):
+    def __init__(self, model_config, *, threshold=3, **kwargs):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self._threshold = threshold
@@ -77,6 +85,7 @@ def __init__(self, model_config, *, threshold=3):
             result_key=self._RESULT_KEY,
             threshold=threshold,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
 
     @overload
@@ -104,9 +113,11 @@ def __call__(
     ) -> Dict[str, Union[float, Dict[str, List[Union[str, float]]]]]:
         """Evaluate coherence for a conversation
 
-        :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
-            key "messages", and potentially a global context under the key "context". Conversation turns are expected
-            to be dictionaries with keys "content", "role", and possibly "context".
+        :keyword conversation: The conversation to evaluate. Expected to
+            contain a list of conversation turns under the key "messages",
+            and optionally a global context under the key "context". Turns are
+            dictionaries with keys "content", "role", and possibly
+            "context".
         :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
         :return: The coherence score.
         :rtype: Dict[str, Union[float, Dict[str, List[float]]]]
@@ -118,19 +129,22 @@ def __call__(  # pylint: disable=docstring-missing-param
         *args,
         **kwargs,
     ):
-        """Evaluate coherence. Accepts either a query and response for a single evaluation,
-        or a conversation for a potentially multi-turn evaluation. If the conversation has more than one pair of
-        turns, the evaluator will aggregate the results of each turn.
+        """Evaluate coherence.
+
+        Accepts a query/response for a single evaluation, or a conversation
+        for a multi-turn evaluation. If the conversation has more than one
+        pair of turns, results are aggregated.
 
         :keyword query: The query to be evaluated.
         :paramtype query: str
         :keyword response: The response to be evaluated.
         :paramtype response: Optional[str]
-        :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
-            key "messages". Conversation turns are expected
-            to be dictionaries with keys "content" and "role".
+        :keyword conversation: The conversation to evaluate. Expected to
+            contain conversation turns under the key "messages" as
+            dictionaries with keys "content" and "role".
         :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
         :return: The relevance score.
-        :rtype: Union[Dict[str, float], Dict[str, Union[float, Dict[str, List[float]]]]]
+        :rtype: Union[Dict[str, float], Dict[str, Union[float, Dict[str,
+            List[float]]]]]
         """
         return super().__call__(*args, **kwargs)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py
index 7eafa42a2926..544336fb4adf 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py
@@ -4,7 +4,9 @@
 from concurrent.futures import as_completed
 from typing import TypeVar, Dict, List
 
-from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
+from azure.ai.evaluation._legacy._adapters.tracing import (
+    ThreadPoolExecutorWithContext as ThreadPoolExecutor,
+)
 from typing_extensions import override
 
 from azure.ai.evaluation._evaluators._common import EvaluatorBase
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py
index 1e0dfe9d5ce1..6cf26e352e2d 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py
@@ -15,8 +15,17 @@
 
 from azure.ai.evaluation._common.constants import PROMPT_BASED_REASON_EVALUATORS
 from azure.ai.evaluation._constants import EVALUATION_PASS_FAIL_MAPPING
-from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
-from ..._common.utils import construct_prompty_model_config, validate_model_config, parse_quality_evaluator_reason_score
+from azure.ai.evaluation._exceptions import (
+    EvaluationException,
+    ErrorBlame,
+    ErrorCategory,
+    ErrorTarget,
+)
+from ..._common.utils import (
+    construct_prompty_model_config,
+    validate_model_config,
+    parse_quality_evaluator_reason_score,
+)
 from . import EvaluatorBase
 
 try:
@@ -71,7 +80,11 @@ def __init__(
         self._prompty_file = prompty_file
         self._threshold = threshold
         self._higher_is_better = _higher_is_better
-        super().__init__(eval_last_turn=eval_last_turn, threshold=threshold, _higher_is_better=_higher_is_better)
+        super().__init__(
+            eval_last_turn=eval_last_turn,
+            threshold=threshold,
+            _higher_is_better=_higher_is_better,
+        )
 
         subclass_name = self.__class__.__name__
         user_agent = f"{UserAgentSingleton().value} (type=evaluator subtype={subclass_name})"
@@ -82,7 +95,9 @@ def __init__(
         )
 
         self._flow = AsyncPrompty.load(
-            source=self._prompty_file, model=prompty_model_config, is_reasoning_model=self._is_reasoning_model
+            source=self._prompty_file,
+            model=prompty_model_config,
+            is_reasoning_model=self._is_reasoning_model,
         )
 
     # __call__ not overridden here because child classes have such varied signatures that there's no point
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_fluency/_fluency.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_fluency/_fluency.py
index 989f9e06b4af..47e63787e218 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_fluency/_fluency.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_fluency/_fluency.py
@@ -13,18 +13,24 @@
 
 class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     """
-    Evaluates the fluency of a given response or a multi-turn conversation, including reasoning.
+    Evaluates the fluency of a given response or a multi-turn conversation,
+    including reasoning.
 
-    The fluency measure assesses the extent to which the generated text conforms to grammatical rules, syntactic
-    structures, and appropriate vocabulary usage, resulting in linguistically correct responses.
+    The fluency measure assesses the extent to which generated text conforms
+    to grammar, syntax, and appropriate vocabulary, resulting in linguistically
+    correct responses.
 
-    Fluency scores range from 1 to 5, with 1 being the least fluent and 5 being the most fluent.
+    Fluency scores range from 1 to 5 (1 = least fluent, 5 = most fluent).
 
     :param model_config: Configuration for the Azure OpenAI model.
-    :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
+    :type model_config:
+        Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the fluency evaluator. Default is 3.
     :type threshold: int
+    :keyword is_reasoning_model: (Preview) config for chat completions is
+        updated to use reasoning models
+    :type is_reasoning_model: bool
 
     .. admonition:: Example:
 
@@ -51,24 +57,25 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             :end-before: [END fluency_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call FluencyEvaluator using Azure AI Project URL in the following format
+            :caption: Initialize and call FluencyEvaluator using Azure AI
+                Project URL in the following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
 
     .. note::
 
-        To align with our support of a diverse set of models, an output key without the `gpt_` prefix has been added.
-        To maintain backwards compatibility, the old key with the `gpt_` prefix is still be present in the output;
-        however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
+    To align with support of diverse models, an output key without the
+    `gpt_` prefix has been added. The old key with the `gpt_` prefix is
+    still present for compatibility and will be deprecated.
     """
 
     _PROMPTY_FILE = "fluency.prompty"
     _RESULT_KEY = "fluency"
 
     id = "azureai://built-in/evaluators/fluency"
-    """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    """Evaluator identifier for cloud evaluation."""
 
     @override
-    def __init__(self, model_config, *, threshold=3):
+    def __init__(self, model_config, *, threshold=3, **kwargs):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self._threshold = threshold
@@ -79,6 +86,7 @@ def __init__(self, model_config, *, threshold=3):
             result_key=self._RESULT_KEY,
             threshold=threshold,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
 
     @overload
@@ -103,9 +111,10 @@ def __call__(
     ) -> Dict[str, Union[float, Dict[str, List[Union[str, float]]]]]:
         """Evaluate fluency for a conversation
 
-        :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
-            key "messages", and potentially a global context under the key "context". Conversation turns are expected
-            to be dictionaries with keys "content", "role", and possibly "context".
+        :keyword conversation: The conversation to evaluate. Expected to
+            contain turns under the key "messages", and optionally a global
+            context under the key "context". Turns are dictionaries with
+            keys "content", "role", and possibly "context".
         :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
         :return: The fluency score
         :rtype: Dict[str, Union[float, Dict[str, List[float]]]]
@@ -118,16 +127,19 @@ def __call__(  # pylint: disable=docstring-missing-param
         **kwargs,
     ):
         """
-        Evaluate fluency. Accepts either a response for a single evaluation,
-        or a conversation for a multi-turn evaluation. If the conversation has more than one turn,
-        the evaluator will aggregate the results of each turn.
-
-        :keyword response: The response to be evaluated. Mutually exclusive with the "conversation" parameter.
-        :paramtype response: Optional[str]
-        :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
-            key "messages". Conversation turns are expected to be dictionaries with keys "content" and "role".
-        :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
-        :return: The fluency score.
-        :rtype: Union[Dict[str, float], Dict[str, Union[float, Dict[str, List[float]]]]]
+            Evaluate fluency. Accepts either a response for a single evaluation,
+        or a conversation for a multi-turn evaluation. If the conversation has
+        more than one turn, the evaluator will aggregate per-turn results.
+
+            :keyword response: The response to be evaluated. Mutually exclusive
+                with the "conversation" parameter.
+            :paramtype response: Optional[str]
+            :keyword conversation: The conversation to evaluate. Expected to
+                contain turns under the key "messages" as dictionaries with
+                keys "content" and "role".
+            :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
+            :return: The fluency score.
+            :rtype: Union[Dict[str, float], Dict[str, Union[float, Dict[str,
+                List[float]]]]]
         """
         return super().__call__(*args, **kwargs)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py
index 9aa8520630fc..09c3d3c7263b 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py
@@ -1,7 +1,9 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-import os, logging
+import os
+import logging
+from inspect import signature
 from typing import Dict, List, Optional, Union
 
 from typing_extensions import overload, override
@@ -49,6 +51,9 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the groundedness evaluator. Default is 3.
     :type threshold: int
+    :keyword is_reasoning_model: (Preview) config for chat completions is
+        updated to use reasoning models
+    :type is_reasoning_model: bool
 
     .. admonition:: Example:
 
@@ -105,10 +110,16 @@ def __init__(self, model_config, *, threshold=3, **kwargs):
             result_key=self._RESULT_KEY,
             threshold=threshold,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
         self._model_config = model_config
         self.threshold = threshold
-        # Needs to be set because it's used in call method to re-validate prompt if `query` is provided
+
+        # Cache whether AsyncPrompty.load supports the is_reasoning_model parameter.
+        try:
+            self._has_is_reasoning_model_param: bool = "is_reasoning_model" in signature(AsyncPrompty.load).parameters
+        except Exception:  # Very defensive: if inspect fails, assume not supported
+            self._has_is_reasoning_model_param = False
 
     @overload
     def __call__(
@@ -202,7 +213,18 @@ def __call__(  # pylint: disable=docstring-missing-param
                 self._DEFAULT_OPEN_API_VERSION,
                 UserAgentSingleton().value,
             )
-            self._flow = AsyncPrompty.load(source=self._prompty_file, model=prompty_model_config)
+
+            if self._has_is_reasoning_model_param:
+                self._flow = AsyncPrompty.load(
+                    source=self._prompty_file,
+                    model=prompty_model_config,
+                    is_reasoning_model=self._is_reasoning_model,
+                )
+            else:
+                self._flow = AsyncPrompty.load(
+                    source=self._prompty_file,
+                    model=prompty_model_config,
+                )
 
         return super().__call__(*args, **kwargs)
 
@@ -282,4 +304,4 @@ def _get_context_from_agent_response(self, response, tool_definitions):
             logger.debug(f"Error extracting context from agent response : {str(ex)}")
             context = ""
 
-        return context if context else None
+        return context
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
index df095f67ba97..d4c943c59282 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_qa/_qa.py
@@ -35,6 +35,9 @@ class QAEvaluator(MultiEvaluatorBase[Union[str, float]]):
     :type similarity_threshold: int
     :param f1_score_threshold: The threshold for F1 score evaluation. Default is 0.5.
     :type f1_score_threshold: float
+    :keyword is_reasoning_model: (Preview) config for chat completions is
+        updated to use reasoning models
+    :type is_reasoning_model: bool
     :return: A callable class that evaluates and generates metrics for "question-answering" scenario.
     :param kwargs: Additional arguments to pass to the evaluator.
     :type kwargs: Any
@@ -102,11 +105,31 @@ def __init__(
                 raise TypeError(f"{name} must be an int or float, got {type(value)}")
 
         evaluators = [
-            GroundednessEvaluator(model_config, threshold=groundedness_threshold),
-            RelevanceEvaluator(model_config, threshold=relevance_threshold),
-            CoherenceEvaluator(model_config, threshold=coherence_threshold),
-            FluencyEvaluator(model_config, threshold=fluency_threshold),
-            SimilarityEvaluator(model_config, threshold=similarity_threshold),
+            GroundednessEvaluator(
+                model_config,
+                threshold=groundedness_threshold,
+                **kwargs,
+            ),
+            RelevanceEvaluator(
+                model_config,
+                threshold=relevance_threshold,
+                **kwargs,
+            ),
+            CoherenceEvaluator(
+                model_config,
+                threshold=coherence_threshold,
+                **kwargs,
+            ),
+            FluencyEvaluator(
+                model_config,
+                threshold=fluency_threshold,
+                **kwargs,
+            ),
+            SimilarityEvaluator(
+                model_config,
+                threshold=similarity_threshold,
+                **kwargs,
+            ),
             F1ScoreEvaluator(threshold=f1_score_threshold),
         ]
         super().__init__(evaluators=evaluators, **kwargs)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_relevance/_relevance.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_relevance/_relevance.py
index bac157ab2623..e9a548303290 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_relevance/_relevance.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_relevance/_relevance.py
@@ -8,7 +8,12 @@
 
 from typing_extensions import overload, override
 
-from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
+from azure.ai.evaluation._exceptions import (
+    EvaluationException,
+    ErrorBlame,
+    ErrorCategory,
+    ErrorTarget,
+)
 from ..._common.utils import reformat_conversation_history, reformat_agent_response
 
 from azure.ai.evaluation._model_configurations import Conversation
@@ -35,6 +40,9 @@ class RelevanceEvaluator(PromptyEvaluatorBase):
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the relevance evaluator. Default is 3.
     :type threshold: int
+    :keyword is_reasoning_model: (Preview) config for chat completions is
+        updated to use reasoning models
+    :type is_reasoning_model: bool
 
     .. admonition:: Example:
 
@@ -79,7 +87,7 @@ class RelevanceEvaluator(PromptyEvaluatorBase):
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
 
     @override
-    def __init__(self, model_config, *, threshold=3):
+    def __init__(self, model_config, *, threshold=3, **kwargs):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self._threshold = threshold
@@ -90,6 +98,7 @@ def __init__(self, model_config, *, threshold=3):
             result_key=self._RESULT_KEY,
             threshold=threshold,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py
index 1f0a886f944f..138278a0d7d8 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py
@@ -8,7 +8,12 @@
 
 from typing_extensions import overload, override
 
-from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
+from azure.ai.evaluation._exceptions import (
+    EvaluationException,
+    ErrorBlame,
+    ErrorCategory,
+    ErrorTarget,
+)
 from azure.ai.evaluation._evaluators._common import PromptyEvaluatorBase
 from azure.ai.evaluation._common.utils import parse_quality_evaluator_reason_score
 from azure.ai.evaluation._model_configurations import Conversation, Message
@@ -37,6 +42,9 @@ class ResponseCompletenessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     :param model_config: Configuration for the Azure OpenAI model.
     :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
         ~azure.ai.evaluation.OpenAIModelConfiguration]
+    :keyword is_reasoning_model: (Preview) config for chat completions is
+        updated to use reasoning models
+    :type is_reasoning_model: bool
 
     .. admonition:: Example:
 
@@ -73,11 +81,22 @@ class ResponseCompletenessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
 
     @override
-    def __init__(self, model_config, *, threshold: Optional[float] = _DEFAULT_COMPLETENESS_THRESHOLD, **kwargs):
+    def __init__(
+        self,
+        model_config,
+        *,
+        threshold: Optional[float] = _DEFAULT_COMPLETENESS_THRESHOLD,
+        **kwargs,
+    ):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self.threshold = threshold
-        super().__init__(model_config=model_config, prompty_file=prompty_path, result_key=self._RESULT_KEY, **kwargs)
+        super().__init__(
+            model_config=model_config,
+            prompty_file=prompty_path,
+            result_key=self._RESULT_KEY,
+            **kwargs,
+        )
 
     @overload
     def __call__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py
index eea0cd516154..951faba0a305 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py
@@ -7,7 +7,9 @@
 from typing import Dict, List, Union
 from typing_extensions import overload, override
 
-from azure.ai.evaluation._evaluators._common._base_prompty_eval import PromptyEvaluatorBase
+from azure.ai.evaluation._evaluators._common._base_prompty_eval import (
+    PromptyEvaluatorBase,
+)
 from azure.ai.evaluation._model_configurations import Conversation
 
 logger = logging.getLogger(__name__)
@@ -33,6 +35,9 @@ class RetrievalEvaluator(PromptyEvaluatorBase[Union[str, float]]):
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the evaluation. Default is 3.
     :type threshold: float
+    :keyword is_reasoning_model: (Preview) config for chat completions is
+        updated to use reasoning models
+    :type is_reasoning_model: bool
     :return: A function that evaluates and generates metrics for "chat" scenario.
     :rtype: Callable
 
@@ -78,7 +83,8 @@ class RetrievalEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
 
     @override
-    def __init__(self, model_config, *, threshold: float = 3):  # pylint: disable=super-init-not-called
+    # pylint: disable=super-init-not-called
+    def __init__(self, model_config, *, threshold: float = 3, **kwargs):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self._threshold = threshold
@@ -89,6 +95,7 @@ def __init__(self, model_config, *, threshold: float = 3):  # pylint: disable=su
             result_key=self._RESULT_KEY,
             threshold=threshold,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
 
     @overload
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py
index dd4043944a88..1ac08ef527b5 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py
@@ -14,22 +14,28 @@ class SimilarityEvaluator(PromptyEvaluatorBase):
     """
     Evaluates similarity score for a given query, response, and ground truth.
 
-    The similarity measure evaluates the likeness between a ground truth sentence (or document) and the
-    AI model's generated prediction. This calculation involves creating sentence-level embeddings for both
-    the ground truth and the model's prediction, which are high-dimensional vector representations capturing
-    the semantic meaning and context of the sentences.
+    The similarity measure evaluates the likeness between a ground truth
+    sentence (or document) and the AI model's generated prediction. This
+    involves creating sentence-level embeddings for both the ground truth and
+    the model's prediction. These are high-dimensional vectors capturing the
+    semantic meaning and context of the sentences.
 
-    Use it when you want an objective evaluation of an AI model's performance, particularly in text generation
-    tasks where you have access to ground truth responses. Similarity enables you to assess the generated
-    text's semantic alignment with the desired content, helping to gauge the model's quality and accuracy.
+    Use it when you need an objective evaluation of an AI model's performance,
+    especially for text generation with ground truth responses. Similarity
+    assesses semantic alignment with the desired content and helps gauge model
+    quality and accuracy.
 
-    Similarity scores range from 1 to 5, with 1 being the least similar and 5 being the most similar.
+    Similarity scores range from 1 to 5 (1 = least similar, 5 = most similar).
 
     :param model_config: Configuration for the Azure OpenAI model.
-    :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
+    :type model_config:
+        Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the similarity evaluator. Default is 3.
     :type threshold: int
+    :keyword is_reasoning_model: (Preview) config for chat completions is
+        updated to use reasoning models
+    :type is_reasoning_model: bool
 
     .. admonition:: Example:
 
@@ -38,7 +44,8 @@ class SimilarityEvaluator(PromptyEvaluatorBase):
             :end-before: [END similarity_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call a SimilarityEvaluator with a four-gram rouge type.
+            :caption: Initialize and call a SimilarityEvaluator with a
+                four-gram rouge type.
 
     .. admonition:: Example using Azure AI Project URL:
 
@@ -47,7 +54,8 @@ class SimilarityEvaluator(PromptyEvaluatorBase):
             :end-before: [END similarity_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call SimilarityEvaluator using Azure AI Project URL in the following format
+            :caption: Initialize and call SimilarityEvaluator using Azure AI
+                Project URL in the following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
 
     .. admonition:: Example:
@@ -57,13 +65,16 @@ class SimilarityEvaluator(PromptyEvaluatorBase):
             :end-before: [END threshold_similarity_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize with a threshold and call a SimilarityEvaluator.
+            :caption: Initialize with a threshold and call a
+                SimilarityEvaluator.
 
     .. note::
 
-        To align with our support of a diverse set of models, an output key without the `gpt_` prefix has been added.
-        To maintain backwards compatibility, the old key with the `gpt_` prefix is still be present in the output;
-        however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
+    To align with our support of diverse models, an output key without the
+    `gpt_` prefix has been added. To maintain backwards compatibility, the
+    old key with the `gpt_` prefix is still present in the output; however,
+    it is recommended to use the new key moving forward as the old key will
+    be deprecated in the future.
     """
 
     # Constants must be defined within eval's directory to be save/loadable
@@ -72,10 +83,10 @@ class SimilarityEvaluator(PromptyEvaluatorBase):
     _RESULT_KEY = "similarity"
 
     id = "azureai://built-in/evaluators/similarity"
-    """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    """Evaluator identifier for cloud evaluation."""
 
     @override
-    def __init__(self, model_config, *, threshold=3):
+    def __init__(self, model_config, *, threshold=3, **kwargs):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self._threshold = threshold
@@ -86,13 +97,14 @@ def __init__(self, model_config, *, threshold=3):
             result_key=self._RESULT_KEY,
             threshold=threshold,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
 
     # Ignoring a mypy error about having only 1 overload function.
-    # We want to use the overload style for all evals, even single-inputs. This is both to make
-    # refactoring to multi-input styles easier, stylistic consistency consistency across evals,
-    # and due to the fact that non-overloaded syntax now causes various parsing issues that
-    # we don't want to deal with.
+    # We want to use the overload style for all evals, even single-inputs.
+    # This makes refactoring to multi-input styles easier, keeps stylistic
+    # consistency across evals, and avoids parsing issues with non-overloaded
+    # syntax.
     @overload  # type: ignore
     def __call__(self, *, query: str, response: str, ground_truth: str) -> Dict[str, float]:
         """