1+ import asyncio
12import json
2- from typing import Callable
3+ from typing import Any, Callable
34
45import pytest
56from openai.types.chat import ChatCompletion, ChatCompletionMessage
2324test_response = make_text_unique(TEST_RESPONSE)
2425
2526
27+ def _run_score_sync_or_async(
28+ tlm_chat: TLMChatCompletion,
29+ response: ChatCompletion,
30+ is_async: bool,
31+ **openai_kwargs: Any,
32+ ) -> TLMScore:
33+ """Runs either sync or async score method based on is_async parameter."""
34+ if is_async:
35+ return asyncio.run(tlm_chat.score_async(response=response, **openai_kwargs))
36+ return tlm_chat.score(response=response, **openai_kwargs)
37+
38+
2639def test_get_model_name() -> None:
2740 tlm = TLMChatCompletion()
2841 model_name = tlm.get_model_name()
@@ -35,7 +48,8 @@ def test_get_model_name() -> None:
3548 "quality_preset",
3649 ["base", "low", "medium", "high", "best"],
3750)
38- def test_tlm_chat_completion_score(quality_preset: TLMQualityPreset) -> None:
51+ @pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
52+ def test_tlm_chat_completion_score(quality_preset: TLMQualityPreset, is_async: bool) -> None:
3953 tlm_chat = TLMChatCompletion(quality_preset=quality_preset)
4054 openai_kwargs = {
4155 "model": "gpt-4.1-mini",
@@ -55,13 +69,14 @@ def test_tlm_chat_completion_score(quality_preset: TLMQualityPreset) -> None:
5569 object="chat.completion",
5670 )
5771
58- score = tlm_chat.score( response=response , **openai_kwargs)
72+ score = _run_score_sync_or_async(tlm_chat, response, is_async , **openai_kwargs)
5973
6074 assert score is not None
6175 assert is_trustworthiness_score_json_format(score)
6276
6377
64- def test_tlm_chat_completion_score_with_options() -> None:
78+ @pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
79+ def test_tlm_chat_completion_score_with_options(is_async: bool) -> None:
6580 tlm_chat = TLMChatCompletion(options={"log": ["explanation", "perplexity"]})
6681 openai_kwargs = {
6782 "model": "gpt-4.1-mini",
@@ -81,13 +96,14 @@ def test_tlm_chat_completion_score_with_options() -> None:
8196 object="chat.completion",
8297 )
8398
84- score = tlm_chat.score( response=response , **openai_kwargs)
99+ score = _run_score_sync_or_async(tlm_chat, response, is_async , **openai_kwargs)
85100
86101 assert score is not None
87102 assert is_trustworthiness_score_json_format(score)
88103
89104
90- def test_tlm_chat_completion_score_with_tools() -> None:
105+ @pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
106+ def test_tlm_chat_completion_score_with_tools(is_async: bool) -> None:
91107 tlm_chat = TLMChatCompletion()
92108 openai_kwargs = {
93109 "model": "gpt-4.1-mini",
@@ -126,13 +142,14 @@ def test_tlm_chat_completion_score_with_tools() -> None:
126142 object="chat.completion",
127143 )
128144
129- score = tlm_chat.score( response=response , **openai_kwargs)
145+ score = _run_score_sync_or_async(tlm_chat, response, is_async , **openai_kwargs)
130146
131147 assert score is not None
132148 assert is_trustworthiness_score_json_format(score)
133149
134150
135- def test_tlm_chat_completion_score_with_structured_output() -> None:
151+ @pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
152+ def test_tlm_chat_completion_score_with_structured_output(is_async: bool) -> None:
136153 tlm_chat = TLMChatCompletion()
137154 openai_kwargs = {
138155 "model": "gpt-4.1-mini",
@@ -200,13 +217,14 @@ def test_tlm_chat_completion_score_with_structured_output() -> None:
200217 object="chat.completion",
201218 )
202219
203- score = tlm_chat.score( response=response , **openai_kwargs)
220+ score = _run_score_sync_or_async(tlm_chat, response, is_async , **openai_kwargs)
204221
205222 assert score is not None
206223 assert is_trustworthiness_score_json_format(score)
207224
208225
209- def test_tlm_chat_completion_structured_output_per_field_scoring() -> None:
226+ @pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
227+ def test_tlm_chat_completion_structured_output_per_field_scoring(is_async: bool) -> None:
210228 tlm_chat = TLMChatCompletion(options={"log": ["per_field_score"]})
211229
212230 openai_kwargs = {
@@ -275,7 +293,7 @@ def test_tlm_chat_completion_structured_output_per_field_scoring() -> None:
275293 object="chat.completion",
276294 )
277295
278- score = tlm_chat.score( response=response , **openai_kwargs)
296+ score = _run_score_sync_or_async(tlm_chat, response, is_async , **openai_kwargs)
279297
280298 assert score is not None
281299 assert is_trustworthiness_score_json_format(score)
@@ -339,7 +357,10 @@ def test_tlm_chat_completion_score_missing_messages() -> None:
339357 ],
340358 ids=["bad_arguments", "good_arguments"],
341359)
342- def test_tlm_chat_completion_score_tool_calls(arguments: str, condition: Callable[[TLMScore], bool]) -> None:
360+ @pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
361+ def test_tlm_chat_completion_score_tool_calls(
362+ arguments: str, condition: Callable[[TLMScore], bool], is_async: bool
363+ ) -> None:
343364 tlm_chat = TLMChatCompletion()
344365
345366 openai_kwargs = {
@@ -390,7 +411,7 @@ def test_tlm_chat_completion_score_tool_calls(arguments: str, condition: Callabl
390411 object="chat.completion",
391412 )
392413
393- score = tlm_chat.score( response=response , **openai_kwargs)
414+ score = _run_score_sync_or_async(tlm_chat, response, is_async , **openai_kwargs)
394415
395416 assert score is not None
396417 assert condition(score)
0 commit comments