Add support for skipping trustworthiness scoring (#102)

elisno · huiwengoh · web-flow · commit b9763f94543c · 2025-08-06T01:25:52.000Z
Co-authored-by: Hui Wen &lt;45724323+huiwengoh@users.noreply.github.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.1.23] - 2025-08-06
+
+### Changed
+
+- Updated `TLMOptions` to support `disable_trustworthiness` parameter
+    - Skips trustworthiness scoring when `disable_trustworthiness` is True, assuming either custom evaluation criteria (TLM) or RAG Evals (TrustworthyRAG) are provided
+
+
 ## [1.1.22] - 2025-07-29
 
 ### Added
@@ -291,7 +299,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Release of the Cleanlab TLM Python client.
 
 
-[Unreleased]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.22...HEAD
+[Unreleased]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.23...HEAD
+[1.1.23]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.22...v1.1.23
 [1.1.22]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.21...v1.1.22
 [1.1.21]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.20...v1.1.21
 [1.1.20]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.19...v1.1.20
diff --git a/src/cleanlab_tlm/__about__.py b/src/cleanlab_tlm/__about__.py
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: MIT
-__version__ = "1.1.22"
+__version__ = "1.1.23"
diff --git a/src/cleanlab_tlm/internal/validation.py b/src/cleanlab_tlm/internal/validation.py
@@ -1,7 +1,9 @@
+from __future__ import annotations
+
 import os
 import warnings
 from collections.abc import Sequence
-from typing import Any, Callable, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, Union
 
 from cleanlab_tlm.errors import ValidationError
 from cleanlab_tlm.internal.constants import (
@@ -24,6 +26,10 @@
 )
 from cleanlab_tlm.internal.types import Task
 
+if TYPE_CHECKING:
+    from cleanlab_tlm.tlm import TLMOptions
+    from cleanlab_tlm.utils.rag import Eval
+
 SKIP_VALIDATE_TLM_OPTIONS: bool = os.environ.get("CLEANLAB_TLM_SKIP_VALIDATE_TLM_OPTIONS", "false").lower() == "true"
 
 
@@ -198,6 +204,27 @@ def validate_tlm_options(
 
                 if not isinstance(criteria.get("criteria"), str):
                     raise ValidationError(f"'criteria' in custom_eval_criteria item {i} must be a string.")
+        elif option == "disable_trustworthiness":
+            if not isinstance(val, bool):
+                raise ValidationError(f"Invalid type {type(val)}, disable_trustworthiness must be a boolean")
+            if val and support_custom_eval_criteria and not options.get("custom_eval_criteria"):
+                raise ValidationError("disable_trustworthiness is only supported when custom_eval_criteria is provided")
+
+
+def _validate_trustworthy_rag_options(options: Optional[TLMOptions], initialized_evals: list[Eval]) -> None:
+    """To be used for ensuring TLMOptions are set correctly given other parameters to TrustworthyRAG
+
+    options: TLMOptions
+    initialized_evals: list[Eval]
+        The evals field configured in TrustworthyRAG.__init__. Required to validate disable_trustworthiness option.
+    """
+    disable_trustworthiness = options and options.get("disable_trustworthiness", False)
+
+    if disable_trustworthiness and not initialized_evals:
+        raise ValidationError(
+            "When disable_trustworthiness=True in TrustworthyRAG, at least one evaluation must be provided. "
+            "Either provide evaluations via the 'evals' parameter or set disable_trustworthiness=False."
+        )
 
 
 def process_and_validate_kwargs_constrain_outputs(
diff --git a/src/cleanlab_tlm/tlm.py b/src/cleanlab_tlm/tlm.py
@@ -613,12 +613,14 @@ class TLMOptions(TypedDict):
         num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
         The maximum number currently supported is 3. Lower values can reduce runtimes.
         Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
         num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
         Must be between 0 and 20. Lower values can reduce runtimes.
         Measuring consistency helps quantify the epistemic uncertainty associated with
         strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
         TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
         similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
         trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
@@ -633,6 +635,11 @@ class TLMOptions(TypedDict):
         You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
         This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
         When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+        This parameter has no effect when `disable_trustworthiness` is True.
+
+        disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+        This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+        The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
     """
 
     model: NotRequired[str]
@@ -645,3 +652,4 @@ class TLMOptions(TypedDict):
     reasoning_effort: NotRequired[str]
     log: NotRequired[list[str]]
     custom_eval_criteria: NotRequired[list[dict[str, Any]]]
+    disable_trustworthiness: NotRequired[bool]
diff --git a/src/cleanlab_tlm/utils/rag.py b/src/cleanlab_tlm/utils/rag.py
@@ -42,6 +42,7 @@
 )
 from cleanlab_tlm.internal.exception_handling import handle_tlm_exceptions
 from cleanlab_tlm.internal.validation import (
+    _validate_trustworthy_rag_options,
     tlm_score_process_response_and_kwargs,
     validate_rag_inputs,
 )
@@ -75,6 +76,7 @@ class TrustworthyRAG(BaseTLM):
 
         options ([TLMOptions](../tlm/#class-tlmoptions), optional): a typed dict of advanced configurations you can optionally specify.
             The "custom_eval_criteria" key for [TLM](../tlm/#class-tlm) is not supported for `TrustworthyRAG`, you can instead specify `evals`.
+            The "disable_trustworthiness" key is only supported for `TrustworthyRAG` when it's set to run `Evals`. See the `evals` argument description below for how evaluations are determined.
 
         timeout (float, optional): timeout (in seconds) to apply to each request.
 
@@ -131,6 +133,8 @@ def __init__(
 
             self._evals = evals
 
+        _validate_trustworthy_rag_options(options=options, initialized_evals=self._evals)
+
     def score(
         self,
         *,
diff --git a/tests/test_get_trustworthiness_score.py b/tests/test_get_trustworthiness_score.py
@@ -203,3 +203,26 @@ def reset_tlm(tlm: TLM) -> Generator[None, None, None]:
     original_timeout = tlm._timeout
     yield
     tlm._timeout = original_timeout
+
+
+def test_get_trustworthiness_score_with_disable_trustworthiness(tlm_api_key: str) -> None:
+    """Tests get_trustworthiness_score with disable_trustworthiness option.
+
+    When disable_trustworthiness is enabled (along with custom_eval_criteria),
+    the trustworthiness_score should be None in the response.
+
+    Expected:
+    - TLM should return a response
+    - trustworthiness_score should be None
+    - No exceptions are raised
+    """
+    tlm = TLM(
+        api_key=tlm_api_key,
+        options={
+            "disable_trustworthiness": True,
+            "custom_eval_criteria": [{"name": "test", "criteria": "test criteria"}],
+        },
+    )
+    response = tlm.get_trustworthiness_score(test_prompt, TEST_RESPONSE)
+    assert not isinstance(response, list)
+    assert response["trustworthiness_score"] is None
diff --git a/tests/test_tlm_rag.py b/tests/test_tlm_rag.py
@@ -962,3 +962,30 @@ def reset_rag_timeout(trustworthy_rag: TrustworthyRAG) -> Generator[None, None,
     old_timeout = trustworthy_rag._timeout
     yield
     trustworthy_rag._timeout = old_timeout
+
+
+def test_score_with_disable_trustworthiness(trustworthy_rag_api_key: str) -> None:
+    """Tests score with disable_trustworthiness option.
+
+    When disable_trustworthiness is enabled (along with valid evals),
+    the trustworthiness score should be None in the response.
+
+    Expected:
+    - TrustworthyRAG should return a response
+    - response should have the trustworthiness key
+    - trustworthiness score should be None
+    - No exceptions are raised
+    """
+    trustworthy_rag = TrustworthyRAG(
+        api_key=trustworthy_rag_api_key,
+        options={"disable_trustworthiness": True},
+    )
+    response = trustworthy_rag.score(
+        query=test_query,
+        context=test_context,
+        response=test_response,
+        prompt=test_prompt,
+    )
+    assert not isinstance(response, list)
+    assert "trustworthiness" in response
+    assert response["trustworthiness"]["score"] is None
diff --git a/tests/test_validation.py b/tests/test_validation.py
@@ -687,6 +687,22 @@ def test_validate_tlm_options_support_custom_eval_criteria() -> None:
     ):
         validate_tlm_options(options, support_custom_eval_criteria=False)
 
+    # Valid with disable_trustworthiness=True and custom_eval_criteria
+    validate_tlm_options({**options, "disable_trustworthiness": True}, support_custom_eval_criteria=True)
+
+    # Invalid: disable_trustworthiness=True without custom_eval_criteria
+    with pytest.raises(
+        ValidationError, match="^disable_trustworthiness is only supported when custom_eval_criteria is provided"
+    ):
+        validate_tlm_options({"disable_trustworthiness": True}, support_custom_eval_criteria=True)
+
+    with pytest.raises(
+        ValidationError, match="^disable_trustworthiness is only supported when custom_eval_criteria is provided"
+    ):
+        validate_tlm_options(
+            {"disable_trustworthiness": True, "custom_eval_criteria": None}, support_custom_eval_criteria=True
+        )
+
 
 def test_validate_rag_inputs_mixed_string_and_sequence() -> None:
     """Tests that validate_rag_inputs rejects mixed inputs where some are strings and others are sequences."""
@@ -798,3 +814,37 @@ def test_validate_rag_inputs_matching_lists() -> None:
     assert len(result) == list_length
     assert result[0] == "Q: query 1 C: context 1"
     assert result[1] == "Q: query 2 C: context 2"
+
+
+def test_disable_trustworthiness_without_custom_criteria_raises_error(tlm_api_key: str) -> None:
+    """Test that disable_trustworthiness=True without custom_eval_criteria raises ValueError."""
+    with pytest.raises(
+        ValidationError, match="^disable_trustworthiness is only supported when custom_eval_criteria is provided"
+    ):
+        TLM(api_key=tlm_api_key, options={"disable_trustworthiness": True})
+
+
+def test_disable_trustworthiness_with_custom_criteria_works(tlm_api_key: str) -> None:
+    """Test that disable_trustworthiness=True with custom_eval_criteria works normally."""
+    TLM(
+        api_key=tlm_api_key,
+        options={
+            "disable_trustworthiness": True,
+            "custom_eval_criteria": [{"name": "test", "criteria": "test criteria"}],
+        },
+    )
+
+
+def test_disable_trustworthiness_without_custom_criteria_raises_error_rag(tlm_api_key: str) -> None:
+    """Test that disable_trustworthiness=True without custom_eval_criteria raises ValueError for TrustworthyRAG."""
+    from cleanlab_tlm.utils.rag import TrustworthyRAG
+
+    with pytest.raises(ValidationError, match="^When disable_trustworthiness=True in TrustworthyRAG"):
+        TrustworthyRAG(evals=[], api_key=tlm_api_key, options={"disable_trustworthiness": True})
+
+
+def test_disable_trustworthiness_with_custom_criteria_works_rag(tlm_api_key: str) -> None:
+    """Test that disable_trustworthiness=True with custom_eval_criteria works normally for TrustworthyRAG."""
+    from cleanlab_tlm.utils.rag import TrustworthyRAG
+
+    TrustworthyRAG(api_key=tlm_api_key, options={"disable_trustworthiness": True})

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`# SPDX-License-Identifier: MIT`
`2`		`-__version__ = "1.1.22"`
	`2`	`+__version__ = "1.1.23"`