modelscope · helloml0326 · Jan 7, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 7, 2026
diff --git a/cookbooks/grader_validation/rewardbench2.py b/cookbooks/grader_validation/rewardbench2.py
@@ -307,8 +307,10 @@ async def _evaluate_four_way(
             GraderScore: Result with score=1.0 if predicted best answer matches ground truth
         """
         # Handle None case for mutable arguments
-        answers = answers if answers is not None else []
-        chosen_indices = chosen_indices if chosen_indices is not None else []
+        if not answers:
+            answers = []
+        if not chosen_indices:
+            chosen_indices = []
 
         # Ensure we have exactly 4 answers
         if len(answers) < 4:
@@ -402,8 +404,10 @@ async def _evaluate_ties(
             GraderScore: Result with score=1.0 if any top-rated answer is in chosen_indices
         """
         # Handle None case for mutable arguments
-        answers = answers if answers is not None else []
-        chosen_indices = chosen_indices if chosen_indices is not None else []
+        if not answers:
+            answers = []
+        if not chosen_indices:
+            chosen_indices = []
 
         correct_indices = set(chosen_indices)
 

diff --git a/cookbooks/training_judge_model/bradley-terry/dataset.py b/cookbooks/training_judge_model/bradley-terry/dataset.py
@@ -120,7 +120,7 @@ def _tokenize_messages(self, messages: List[Dict[str, str]]) -> Dict[str, torch.
         # Handle sequence length like SFT dataset
         if sequence_length < self.max_length:
             # Pad sequences
-            pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id is not None else 0
+            pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else 0
             padded_input_ids = (
                 torch.ones(
                     size=(self.max_length - sequence_length,),

diff --git a/cookbooks/training_judge_model/bradley-terry/trainer.py b/cookbooks/training_judge_model/bradley-terry/trainer.py
@@ -163,7 +163,7 @@ def _build_model_optimizer(self):
             verbose=True,
         )
 
-        if self.config.model.external_lib is not None:
+        if self.config.model.external_lib:
             import importlib
 
             importlib.import_module(self.config.model.external_lib)
@@ -538,11 +538,10 @@ def fit(self):
         last_valid_metric = None
         latest_train_metric = {}
 
-        total_training_steps = len(self.train_dataloader) * self.config.trainer.total_epochs
-        if self.config.trainer.total_training_steps is not None:
-            total_training_steps = self.config.trainer.total_training_steps
-
-        self.total_training_steps = total_training_steps
+        if self.config.trainer.total_training_steps:
+            self.total_training_steps = self.config.trainer.total_training_steps
+        else:
+            self.total_training_steps = len(self.train_dataloader) * self.config.trainer.total_epochs
         print(f"Total training steps: {self.total_training_steps}")
 
         # Create a single progress bar for all training steps
@@ -690,7 +689,7 @@ def run_bt_training(config):
     )
 
     # Ensure pad token exists
-    if tokenizer.pad_token is None:
+    if not tokenizer.pad_token:
         tokenizer.pad_token = tokenizer.eos_token
 
     # Create datasets

diff --git a/openjudge/analyzer/statistical/consistency_analyzer.py b/openjudge/analyzer/statistical/consistency_analyzer.py
@@ -101,21 +101,24 @@ def analyze(
             >>> print(f"Consistency: {result.consistency:.2f}")
             Consistency: 0.99
         """
-        # Handle the case where the method is called with the old signature
-        # i.e., analyze(first_run_results, second_run_results)
-        first_run_results = grader_results
-        second_run_results = another_grader_results
-
-        # If the parameters were passed positionally as before, dataset will be first_run_results
-        # and grader_results will be second_run_results
-        if first_run_results is None and second_run_results is None:
-            if dataset is not None and grader_results is not None:
-                first_run_results = dataset
-                second_run_results = grader_results
-            else:
-                # If still not set, use empty lists
-                first_run_results = []
-                second_run_results = []
+        # Need to support old 2-argment call signagure: analyze(first_run_results, second_run_results)
+        # Need to determine which argment is the 1st run result and which is the 2nd run result.
+        if grader_results and another_grader_results:
+            # current call signature
+            first_run_results = grader_results
+            second_run_results = another_grader_results
+        elif dataset and grader_results:
+            # The first two argments contain values but the 3rd does not.
+            # Treat this as a call following the old 2-argument signature.
+            first_run_results = dataset
+            second_run_results = grader_results
+        else:
+            # 1. Insufficient argments for the current call signature:
+            # dataset and another grader result exist,
+            # but the 2nd argment (grader result) does not have value.
+            # Or 2. none of dataset, grader_results, another_grader_results exists.
+            first_run_results = []
+            second_run_results = []
 
         if not first_run_results or not second_run_results:
             logger.warning(

diff --git a/openjudge/generator/iterative_rubric/categorizer.py b/openjudge/generator/iterative_rubric/categorizer.py
@@ -238,7 +238,7 @@ async def categorize_rubrics(
             >>> categorized_rubrics, info = await categorizer.categorize_rubrics(rubrics)
         """
 
-        if len(rubrics) == 0:
+        if not rubrics:
             logger.error("Input rubrics list is empty")
             return [], {
                 "categorization_successful": False,

diff --git a/openjudge/generator/iterative_rubric/generator.py b/openjudge/generator/iterative_rubric/generator.py
@@ -265,7 +265,7 @@ async def generate(
             grader_kwargs["max_score"] = self.config.max_score
 
         # Add custom template if provided
-        if hasattr(self.config, "custom_evaluation_prompt") and self.config.custom_evaluation_prompt is not None:
+        if hasattr(self.config, "custom_evaluation_prompt") and self.config.custom_evaluation_prompt:
             grader_kwargs["template"] = self.config.custom_evaluation_prompt
 
         return LLMGrader(**grader_kwargs)

diff --git a/openjudge/graders/agent/action/action_alignment.py b/openjudge/graders/agent/action/action_alignment.py
@@ -185,10 +185,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate action alignment with plan",
             model=model,
-            template=template,
+            template=template or DEFAULT_ACTION_ALIGNMENT_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_ACTION_ALIGNMENT_TEMPLATE
 
     def _format_history(self, history: Optional[list] = None) -> str:
         """Format history steps for evaluation.

diff --git a/openjudge/graders/agent/memory/memory_detail_preservation.py b/openjudge/graders/agent/memory/memory_detail_preservation.py
@@ -176,10 +176,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate memory detail preservation",
             model=model,
-            template=template,
+            template=template or DEFAULT_MEMORY_DETAIL_PRESERVATION_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_MEMORY_DETAIL_PRESERVATION_TEMPLATE
 
     def _format_history(self, history: Optional[list] = None) -> str:
         """Format history steps for evaluation.

diff --git a/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py b/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py
@@ -179,10 +179,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate memory retrieval effectiveness",
             model=model,
-            template=template,
+            template=template or DEFAULT_MEMORY_RETRIEVAL_EFFECTIVENESS_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_MEMORY_RETRIEVAL_EFFECTIVENESS_TEMPLATE
 
     def _format_history(self, history: Optional[list] = None) -> str:
         """Format history steps for evaluation.

diff --git a/openjudge/graders/agent/plan/plan_feasibility.py b/openjudge/graders/agent/plan/plan_feasibility.py
@@ -179,10 +179,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate plan feasibility",
             model=model,
-            template=template,
+            template=template or DEFAULT_PLAN_FEASIBILITY_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_PLAN_FEASIBILITY_TEMPLATE
 
     def _format_history(self, history: Optional[list] = None) -> str:
         """Format history steps for evaluation.

diff --git a/openjudge/graders/agent/reflection/reflection_accuracy.py b/openjudge/graders/agent/reflection/reflection_accuracy.py
@@ -176,10 +176,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate reflection accuracy",
             model=model,
-            template=template,
+            template=template or DEFAULT_REFLECTION_ACCURACY_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_REFLECTION_ACCURACY_TEMPLATE
 
     def _format_history(self, history: Optional[list] = None) -> str:
         """Format history steps for evaluation.

diff --git a/openjudge/graders/agent/reflection/reflection_outcome_understanding.py b/openjudge/graders/agent/reflection/reflection_outcome_understanding.py
@@ -300,10 +300,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate reflection outcome understanding",
             model=model,
-            template=template,
+            template=template or DEFAULT_REFLECTION_OUTCOME_UNDERSTANDING_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_REFLECTION_OUTCOME_UNDERSTANDING_TEMPLATE
 
     def _format_history(self, history: Optional[list] = None) -> str:
         """Format history steps for evaluation.

diff --git a/openjudge/graders/agent/reflection/reflection_progress_awareness.py b/openjudge/graders/agent/reflection/reflection_progress_awareness.py
@@ -217,10 +217,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate reflection progress awareness",
             model=model,
-            template=template,
+            template=template or DEFAULT_REFLECTION_PROGRESS_AWARENESS_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_REFLECTION_PROGRESS_AWARENESS_TEMPLATE
 
     def _format_history(self, history: Optional[list] = None) -> str:
         """Format history steps for evaluation.

diff --git a/openjudge/graders/agent/tool/tool_call_accuracy.py b/openjudge/graders/agent/tool/tool_call_accuracy.py
@@ -209,10 +209,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluates the accuracy of tool calls made by an agent",
             model=model,
-            template=template,
+            template=template or DEFAULT_TOOL_CALL_ACCURACY_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_TOOL_CALL_ACCURACY_TEMPLATE
 
     def _parse_tools_from_response(
         self,

diff --git a/openjudge/graders/agent/tool/tool_parameter_check.py b/openjudge/graders/agent/tool/tool_parameter_check.py
@@ -189,10 +189,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate tool parameter extraction correctness",
             model=model,
-            template=template,
+            template=template or DEFAULT_TOOL_PARAMETER_CHECK_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_TOOL_PARAMETER_CHECK_TEMPLATE
 
     async def aevaluate(
         self,

diff --git a/openjudge/graders/agent/tool/tool_selection.py b/openjudge/graders/agent/tool/tool_selection.py
@@ -202,10 +202,9 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate tool selection ",
             model=model,
-            template=template,
+            template=template or DEFAULT_TOOL_SELECTION_TEMPLATE,
             language=language,
         )
-        self.template = template if template is not None else DEFAULT_TOOL_SELECTION_TEMPLATE
 
     async def aevaluate(
         self,

diff --git a/openjudge/graders/agent/trajectory/__init__.py b/openjudge/graders/agent/trajectory/__init__.py
diff --git a/openjudge/graders/code/_utils/testing_util.py b/openjudge/graders/code/_utils/testing_util.py
@@ -154,6 +154,10 @@ def run_test(in_outs, test=None, timeout=15):
     """
     # Disable functionalities that can make destructive changes to the test.
     reliability_guard()
+
+    if not test:
+        raise AssertionError("should not happen: missing test code input")
+
     method_name = None
     tmp = None
     which_type = None
@@ -170,9 +174,6 @@ def run_test(in_outs, test=None, timeout=15):
 
     logger.debug(f"loaded input_output = {datetime.now().time()}")
 
-    if test is None:
-        raise AssertionError("should not happen: test code is none")
-
     results = []
     sol = """from string import *
 from re import *

diff --git a/openjudge/graders/common/hallucination.py b/openjudge/graders/common/hallucination.py
@@ -265,11 +265,10 @@ def __init__(
             mode=GraderMode.POINTWISE,
             description="Evaluate whether response contains hallucinations",
             model=model,
-            template=template,
+            template=template or DEFAULT_HALLUCINATION_TEMPLATE,
             language=language,
         )
         self.threshold = threshold
-        self.template = template if template is not None else DEFAULT_HALLUCINATION_TEMPLATE
 
     async def aevaluate(
         self,

diff --git a/openjudge/graders/multimodal/_internal/criteria_utils.py b/openjudge/graders/multimodal/_internal/criteria_utils.py
@@ -81,7 +81,7 @@ def validate_and_sort_rubrics(
         ... ]
         >>> sorted_rubrics = validate_and_sort_rubrics(rubrics)
     """
-    if rubrics is None:
+    if not rubrics:
         return None
 
     # Sort rubrics by start of range
@@ -120,7 +120,7 @@ def format_rubrics(rubrics: Optional[List[Rubric]]) -> Optional[str]:
         0-3: Poor quality
         7-10: High quality
     """
-    if rubrics is None:
+    if not rubrics:
         return None
 
     return "\n".join(
@@ -177,7 +177,7 @@ def get_score_range(rubric: Optional[List[Rubric]]) -> Tuple[int, int]:
         >>> get_score_range(rubrics)
         (0, 10)
     """
-    if rubric is None:
+    if not rubric:
         return (0, 10)
 
     return rubric[0].score_range[0], rubric[-1].score_range[1]

diff --git a/openjudge/models/formatter/dashscope_formatter.py b/openjudge/models/formatter/dashscope_formatter.py
@@ -75,8 +75,8 @@ def _convert_content_to_openai(
         Returns:
             Content in OpenAI format.
         """
-        # If content is None, return empty string
-        if content is None:
+        # Return empty string if no content input
+        if not content:
             return ""
 
         # If content is a string, return as is
@@ -135,8 +135,8 @@ def _convert_content_to_dashscope(
         Returns:
             Content in DashScope format.
         """
-        # If content is None, return empty string
-        if content is None:
+        # Return empty string if no content input
+        if not content:
             return ""
 
         # If content is a string, return as is

diff --git a/openjudge/models/schema/prompt_template.py b/openjudge/models/schema/prompt_template.py
@@ -184,7 +184,7 @@ def to_messages(
         if isinstance(self.messages, list):
             messages = self.messages
         elif isinstance(self.messages, dict):
-            if language is None:
+            if not language:
                 language = LanguageEnum.EN
             assert language in self.messages
             messages = self.messages.get(language, [])

diff --git a/openjudge/runner/aggregator/base_aggregator.py b/openjudge/runner/aggregator/base_aggregator.py
@@ -37,12 +37,12 @@ def __name__(self):
         return self.name
 
     @abstractmethod
-    def __call__(self, results: Dict[str, GraderResult], **kwargs) -> GraderResult:
+    def __call__(self, grader_results: Dict[str, GraderResult], **kwargs) -> GraderResult:
         """
         Aggregate results from multiple graders for a single sample.
 
         Args:
-            results: Dictionary mapping grader names to GraderResult objects for a single sample
+            grader_results: Dictionary mapping grader names to GraderResult objects for a single sample
             **kwargs: Additional arguments for aggregation
 
         Returns:

diff --git a/openjudge/runner/aggregator/weighted_sum_aggregator.py b/openjudge/runner/aggregator/weighted_sum_aggregator.py
@@ -28,27 +28,27 @@ def __init__(self, name: str, weights: Dict[str, float] = None):
         super().__init__(name)
         self.weights = weights or {}
 
-    def __call__(self, results: Dict[str, GraderResult], **kwargs) -> GraderResult:
+    def __call__(self, grader_results: Dict[str, GraderResult], **kwargs) -> GraderResult:
         """
-        Aggregate results using weighted sum for a single sample.
+        Aggregate multiple grader results using weighted sum for a single sample.
 
         Args:
-            results: Dictionary mapping grader names to GraderResult objects for a single sample
+            grader_results: Dictionary mapping grader names to GraderResult objects for a single sample
             **kwargs: Additional arguments (unused)
 
         Returns:
             Aggregated result as a GraderResult object
         """
-        if not results:
+        if not grader_results:
             return GraderError(
                 name=self.name,
-                reason="No results to aggregate",
-                error="No results provided for aggregation",
+                reason="No grader result to aggregate",
+                error="No grader result provided for aggregation",
             )
 
         # Initialize weights if not provided (equal weights)
         if not self.weights:
-            grader_names = list(results.keys())
+            grader_names = list(grader_results.keys())
             equal_weight = 1.0 / len(grader_names) if grader_names else 0.0
             weights = {name: equal_weight for name in grader_names}
         else:
@@ -59,8 +59,8 @@ def __call__(self, results: Dict[str, GraderResult], **kwargs) -> GraderResult:
         component_scores = {}
 
         # Collect scores from all graders for this sample
-        for grader_name, result in results.items():
-            # Only process GraderScore results (skip errors, ranks, etc.)
+        for grader_name, result in grader_results.items():
+            # Only process results of GraderScore type (skip errors, ranks, etc.)
             if isinstance(result, GraderScore):
                 weight = weights.get(grader_name, 0.0)
                 weighted_sum += result.score * weight