4242from openjudge .graders .llm_grader import LLMGrader
4343from openjudge .graders .schema import GraderError
4444from openjudge .models .openai_chat_model import OpenAIChatModel
45+ from openjudge .models .schema .prompt_template import LanguageEnum
4546from openjudge .runner .grading_runner import GraderConfig , GradingRunner
4647
4748# ==================== UNIT TESTS ====================
@@ -60,12 +61,18 @@ def test_initialization_failure_without_template(self):
6061 model = AsyncMock (),
6162 name = "foo" ,
6263 )
64+ assert "Missing template argument value" in str (error_obj .value )
65+
66+ def test_initialization_failure_with_invalid_template_type (self ):
67+ """Test initialization failure without template"""
68+ with pytest .raises (ValueError ) as error_obj :
69+ LLMGrader (model = AsyncMock (), name = "foo" , template = AsyncMock ())
6370 assert "Template must be a str, list, dict or PromptTemplate object" in str (error_obj .value )
6471
6572 def test_initialization_with_string_template (self ):
6673 """Test successful initialization with string template"""
6774 mock_model = AsyncMock ()
68- template_str = """You're a LLM query answer relevance grader, you'll received Query/Response:
75+ template_str = """You're a LLM query answer relevance grader, you'll receive Query/Response:
6976 Query: {query}
7077 Response: {response}
7178 Please read query/response, if the Response answers the Query, return 1, return 0 if no.
@@ -98,7 +105,7 @@ def test_initialization_with_dict_template(self):
98105 },
99106 {
100107 "role" : "user" ,
101- "content" : """You'll received Query/Response:
108+ "content" : """You'll receive Query/Response:
102109 Query: {query}
103110 Response: {response}
104111 Please read query/response, if the Response answers the Query, return 1, return 0 if no.
@@ -139,7 +146,7 @@ def test_initialization_with_model_dict(self):
139146 "api_key" : "test-key" ,
140147 }
141148
142- template_str = """You're a LLM query answer relevance grader, you'll received Query/Response:
149+ template_str = """You're a LLM query answer relevance grader, you'll receive Query/Response:
143150 Query: {query}
144151 Response: {response}
145152 Please read query/response, if the Response answers the Query, return 1, return 0 if no.
@@ -158,8 +165,29 @@ def test_initialization_with_model_dict(self):
158165 )
159166
160167 assert grader .name == "test_llm_grader"
161- assert isinstance (grader .model , OpenAIChatModel )
162168 # Note: We can't easily check the model config since it's private
169+ assert isinstance (grader .model , OpenAIChatModel )
170+
171+ language_template = grader .get_template ()
172+ assert len (language_template ) == 1
173+ assert LanguageEnum .EN in language_template
174+ templates = language_template [LanguageEnum .EN ]
175+ assert len (templates ) == 2
176+ for t in templates :
177+ assert len (t ) == 2
178+ assert "role" in t
179+ assert "content" in t
180+
181+ if t ["role" ] == "system" :
182+ assert (
183+ "You are a professional evaluation assistant. Please evaluate according to the user's requirements."
184+ in t ["content" ]
185+ )
186+ elif t ["role" ] == "user" :
187+ assert "You're a LLM query answer relevance grader, you'll receive Query/Response" in t ["content" ]
188+
189+ default_template = grader .get_default_template ()
190+ assert len (default_template ) == 0
163191
164192 @pytest .mark .asyncio
165193 async def test_pointwise_evaluation_success (self ):
@@ -217,7 +245,7 @@ async def test_listwise_evaluation_success(self):
217245 mock_model .achat = AsyncMock (return_value = mock_response )
218246
219247 # Create grader with template that follows the specification in docs
220- template = """You're a LLM query answer ranking grader, you'll received Query and multiple Responses:
248+ template = """You're a LLM query answer ranking grader, you'll receive Query and multiple Responses:
221249 Query: {query}
222250 Responses:
223251 1. {response_1}
@@ -308,9 +336,8 @@ def test_serialization_methods(self):
308336OPENAI_BASE_URL = os .getenv ("OPENAI_BASE_URL" )
309337RUN_QUALITY_TESTS = bool (OPENAI_API_KEY and OPENAI_BASE_URL )
310338
311- pytestmark = pytest .mark .skipif (not RUN_QUALITY_TESTS , reason = "Requires API keys and base URL to run quality tests" )
312-
313339
340+ @pytest .mark .skipif (not RUN_QUALITY_TESTS , reason = "Requires API keys and base URL to run quality tests" )
314341@pytest .mark .quality
315342class TestLLMGraderQuality :
316343 """Quality tests for LLMGrader - testing evaluation quality using golden dataset"""
@@ -361,7 +388,7 @@ def model(self):
361388 async def test_discriminative_power_with_runner (self , dataset , model ):
362389 """Test the grader's ability to distinguish between accurate and inaccurate responses (using Runner)"""
363390 # Create grader with real model following the specification in docs
364- template = """You're a LLM query answer accuracy grader, you'll received Query/Response and Context:
391+ template = """You're a LLM query answer accuracy grader, you'll receive Query/Response and Context:
365392 Query: {query}
366393 Response: {response}
367394 Context: {context}
@@ -420,7 +447,7 @@ async def test_discriminative_power_with_runner(self, dataset, model):
420447 async def test_consistency_with_runner (self , dataset , model ):
421448 """Test grader evaluation consistency (using Runner)"""
422449 # Create grader with real model following the specification in docs
423- template = """You're a LLM query answer accuracy grader, you'll received Query/Response and Context:
450+ template = """You're a LLM query answer accuracy grader, you'll receive Query/Response and Context:
424451 Query: {query}
425452 Response: {response}
426453 Context: {context}
0 commit comments