3636 LLMGraderGeneratorConfig ,
3737)
3838from openjudge .generator .simple_rubric .rubric_generator import (
39- RubricGenerationConfig ,
39+ DEFAULT_RUBRICS ,
4040 TaskBasedRubricGenerator ,
4141)
4242from openjudge .graders .llm_grader import LLMGrader
4343from openjudge .graders .schema import GraderMode
44- from openjudge .models .base_chat_model import BaseChatModel
4544from openjudge .models .openai_chat_model import OpenAIChatModel
4645from openjudge .models .schema .prompt_template import LanguageEnum
4746
@@ -55,15 +54,12 @@ class SimpleRubricsGeneratorConfig(LLMGraderGeneratorConfig):
5554
5655 Attributes:
5756 task_description: Description of the task for evaluation.
58- Should describe what kind of queries and responses are expected.
5957 scenario: Optional usage scenario for context.
60- Helps the generator understand the evaluation context.
6158 language: Language for prompts (ZH or EN). Defaults to EN.
6259 default_rubrics: Fallback rubrics if generation fails.
63- These are used when LLM generation fails.
64- max_retries: Maximum number of retry attempts for LLM calls. Defaults to 3.
65- min_score: Minimum score for pointwise evaluation. Defaults to 0.
66- max_score: Maximum score for pointwise evaluation. Defaults to 1.
60+ max_retries: Maximum number of retry attempts for LLM calls.
61+ min_score: Minimum score for pointwise evaluation.
62+ max_score: Maximum score for pointwise evaluation.
6763
6864 Inherited from LLMGraderGeneratorConfig:
6965 grader_name: Human-readable name for the generated grader.
@@ -72,24 +68,11 @@ class SimpleRubricsGeneratorConfig(LLMGraderGeneratorConfig):
7268 custom_evaluation_prompt: Custom template for evaluation.
7369 """
7470
75- # Task description parameters
7671 task_description : str = ""
7772 scenario : Optional [str ] = None
7873 language : LanguageEnum = LanguageEnum .EN
79-
80- # Fallback configuration
81- default_rubrics : List [str ] = field (
82- default_factory = lambda : [
83- "Accuracy: Whether the response is factually correct" ,
84- "Relevance: Whether the response addresses the query" ,
85- "Completeness: Whether the response is comprehensive" ,
86- ]
87- )
88-
89- # Generation parameters
74+ default_rubrics : List [str ] = field (default_factory = lambda : DEFAULT_RUBRICS .copy ())
9075 max_retries : int = 3
91-
92- # Pointwise-specific parameters
9376 min_score : int = 0
9477 max_score : int = 1
9578
@@ -107,14 +90,6 @@ class SimpleRubricsGenerator(LLMGraderGenerator):
10790 2. Uses an LLM to generate relevant evaluation criteria
10891 3. Creates an LLMGrader configured with these rubrics
10992
110- This is suitable for scenarios where:
111- - You have a clear task description
112- - You don't have labeled preference data for rubric learning
113- - You want a quick way to set up evaluation
114-
115- For more sophisticated rubric generation from preference data,
116- see the iterative_rubric module.
117-
11893 Example:
11994 >>> config = SimpleRubricsGeneratorConfig(
12095 ... grader_name="Medical QA Grader",
@@ -127,38 +102,25 @@ class SimpleRubricsGenerator(LLMGraderGenerator):
127102 ... dataset=[],
128103 ... sample_queries=["What are the symptoms of flu?"]
129104 ... )
130- >>> # Now use the grader to evaluate responses
131- >>> result = await grader.aevaluate(query="...", response="...")
132105 """
133106
134107 def __init__ (self , config : SimpleRubricsGeneratorConfig ) -> None :
135108 """Initialize the simple rubrics generator.
136109
137110 Args:
138- config: Configuration for rubric generation. Includes:
139- - grader_name: Name for the generated grader
140- - model: Language model for generation and evaluation
141- - task_description: Description of the evaluation task
142- - scenario: Optional usage scenario
143- - language: Language for prompts (ZH or EN)
144- - grader_mode: POINTWISE or LISTWISE
145- - default_rubrics: Fallback rubrics if generation fails
111+ config: Configuration for rubric generation.
146112 """
147113 super ().__init__ (config )
148114 self .config : SimpleRubricsGeneratorConfig = config
149115
150- # Initialize the rubric generator
151- rubric_config = RubricGenerationConfig (
116+ self . _rubric_generator = TaskBasedRubricGenerator (
117+ model = config . model ,
152118 task_description = config .task_description ,
153119 scenario = config .scenario ,
154120 language = config .language ,
155121 default_rubrics = config .default_rubrics ,
156122 max_retries = config .max_retries ,
157123 )
158- self ._rubric_generator = TaskBasedRubricGenerator (
159- config = rubric_config ,
160- model = config .model ,
161- )
162124
163125 async def generate (
164126 self ,
@@ -168,28 +130,20 @@ async def generate(
168130 ) -> LLMGrader :
169131 """Generate an LLMGrader with rubrics from task description.
170132
171- This method generates evaluation rubrics based on the task description
172- and creates an LLMGrader instance configured with these rubrics.
173-
174133 Args:
175- dataset: List of data dictionaries. For this generator, the dataset
176- is optional and only used to extract sample queries if
177- sample_queries is not provided.
134+ dataset: List of data dictionaries (used to extract sample queries
135+ if sample_queries is not provided).
178136 sample_queries: Optional list of sample queries for context.
179- If not provided, queries may be extracted from dataset.
180137 **kwargs: Additional arguments (currently unused).
181138
182139 Returns:
183140 LLMGrader: Configured grader instance with generated rubrics.
184141 """
185- # Extract sample queries from dataset if not provided
186142 if sample_queries is None and dataset :
187143 sample_queries = [d .get ("query" , "" ) for d in dataset [:5 ] if d .get ("query" )]
188144
189- # Generate rubrics
190- rubrics = await self ._generate_rubrics (dataset , sample_queries = sample_queries , ** kwargs )
145+ rubrics = await self ._generate_rubrics (sample_queries )
191146
192- # Prepare grader kwargs
193147 grader_kwargs = {
194148 "name" : self .config .grader_name ,
195149 "model" : self .config .model ,
@@ -198,16 +152,13 @@ async def generate(
198152 "language" : self .config .language ,
199153 }
200154
201- # Add min_score and max_score only for pointwise mode
202155 if self .config .grader_mode == GraderMode .POINTWISE :
203156 grader_kwargs ["min_score" ] = self .config .min_score
204157 grader_kwargs ["max_score" ] = self .config .max_score
205158
206- # Add template: use custom if provided, otherwise use default based on mode
207159 if self .config .custom_evaluation_prompt is not None :
208160 grader_kwargs ["template" ] = self .config .custom_evaluation_prompt
209161 else :
210- # Use default evaluation template based on grader mode
211162 if self .config .grader_mode == GraderMode .POINTWISE :
212163 grader_kwargs ["template" ] = POINTWISE_EVALUATION_TEMPLATE
213164 else :
@@ -217,35 +168,22 @@ async def generate(
217168
218169 async def _generate_rubrics (
219170 self ,
220- dataset : List [dict ],
221171 sample_queries : Optional [List [str ]] = None ,
222- ** kwargs ,
223172 ) -> str :
224173 """Generate rubrics from task description.
225174
226- This method uses the TaskBasedRubricGenerator to create rubrics
227- based on the task description and sample queries.
228-
229175 Args:
230- dataset: List of data dictionaries (used for extracting sample queries
231- if sample_queries is not provided).
232176 sample_queries: Optional list of sample queries for context.
233- **kwargs: Additional arguments (currently unused).
234177
235178 Returns:
236179 str: Formatted string containing evaluation rubrics.
237180 """
238- # Generate rubrics as list
239- rubrics_list = await self ._rubric_generator .generate (
240- sample_queries = sample_queries ,
241- )
181+ rubrics_list = await self ._rubric_generator .generate (sample_queries = sample_queries )
242182
243- # Format rubrics into a string
244183 formatted_rubrics = "\n \n " .join (
245184 [f"{ i + 1 } . { rubric } " for i , rubric in enumerate (rubrics_list )]
246185 )
247186
248187 logger .info (f"Generated { len (rubrics_list )} rubrics from task description" )
249188
250189 return formatted_rubrics
251-
0 commit comments