diff --git a/cookbooks/grader_validation/rewardbench2.py b/cookbooks/grader_validation/rewardbench2.py index 7189cdce..45431d63 100644 --- a/cookbooks/grader_validation/rewardbench2.py +++ b/cookbooks/grader_validation/rewardbench2.py @@ -307,8 +307,10 @@ async def _evaluate_four_way( GraderScore: Result with score=1.0 if predicted best answer matches ground truth """ # Handle None case for mutable arguments - answers = answers if answers is not None else [] - chosen_indices = chosen_indices if chosen_indices is not None else [] + if not answers: + answers = [] + if not chosen_indices: + chosen_indices = [] # Ensure we have exactly 4 answers if len(answers) < 4: @@ -402,8 +404,10 @@ async def _evaluate_ties( GraderScore: Result with score=1.0 if any top-rated answer is in chosen_indices """ # Handle None case for mutable arguments - answers = answers if answers is not None else [] - chosen_indices = chosen_indices if chosen_indices is not None else [] + if not answers: + answers = [] + if not chosen_indices: + chosen_indices = [] correct_indices = set(chosen_indices) diff --git a/cookbooks/training_judge_model/bradley-terry/dataset.py b/cookbooks/training_judge_model/bradley-terry/dataset.py index 3baf3c6e..8ee3d8cc 100644 --- a/cookbooks/training_judge_model/bradley-terry/dataset.py +++ b/cookbooks/training_judge_model/bradley-terry/dataset.py @@ -120,7 +120,7 @@ def _tokenize_messages(self, messages: List[Dict[str, str]]) -> Dict[str, torch. # Handle sequence length like SFT dataset if sequence_length < self.max_length: # Pad sequences - pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id is not None else 0 + pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else 0 padded_input_ids = ( torch.ones( size=(self.max_length - sequence_length,), diff --git a/cookbooks/training_judge_model/bradley-terry/trainer.py b/cookbooks/training_judge_model/bradley-terry/trainer.py index d21aefb8..3228de28 100644 --- a/cookbooks/training_judge_model/bradley-terry/trainer.py +++ b/cookbooks/training_judge_model/bradley-terry/trainer.py @@ -163,7 +163,7 @@ def _build_model_optimizer(self): verbose=True, ) - if self.config.model.external_lib is not None: + if self.config.model.external_lib: import importlib importlib.import_module(self.config.model.external_lib) @@ -538,11 +538,10 @@ def fit(self): last_valid_metric = None latest_train_metric = {} - total_training_steps = len(self.train_dataloader) * self.config.trainer.total_epochs - if self.config.trainer.total_training_steps is not None: - total_training_steps = self.config.trainer.total_training_steps - - self.total_training_steps = total_training_steps + if self.config.trainer.total_training_steps: + self.total_training_steps = self.config.trainer.total_training_steps + else: + self.total_training_steps = len(self.train_dataloader) * self.config.trainer.total_epochs print(f"Total training steps: {self.total_training_steps}") # Create a single progress bar for all training steps @@ -690,7 +689,7 @@ def run_bt_training(config): ) # Ensure pad token exists - if tokenizer.pad_token is None: + if not tokenizer.pad_token: tokenizer.pad_token = tokenizer.eos_token # Create datasets diff --git a/openjudge/analyzer/statistical/consistency_analyzer.py b/openjudge/analyzer/statistical/consistency_analyzer.py index 379a8184..3eebb568 100644 --- a/openjudge/analyzer/statistical/consistency_analyzer.py +++ b/openjudge/analyzer/statistical/consistency_analyzer.py @@ -101,21 +101,24 @@ def analyze( >>> print(f"Consistency: {result.consistency:.2f}") Consistency: 0.99 """ - # Handle the case where the method is called with the old signature - # i.e., analyze(first_run_results, second_run_results) - first_run_results = grader_results - second_run_results = another_grader_results - - # If the parameters were passed positionally as before, dataset will be first_run_results - # and grader_results will be second_run_results - if first_run_results is None and second_run_results is None: - if dataset is not None and grader_results is not None: - first_run_results = dataset - second_run_results = grader_results - else: - # If still not set, use empty lists - first_run_results = [] - second_run_results = [] + # Need to support old 2-argment call signagure: analyze(first_run_results, second_run_results) + # Need to determine which argment is the 1st run result and which is the 2nd run result. + if grader_results and another_grader_results: + # current call signature + first_run_results = grader_results + second_run_results = another_grader_results + elif dataset and grader_results: + # The first two argments contain values but the 3rd does not. + # Treat this as a call following the old 2-argument signature. + first_run_results = dataset + second_run_results = grader_results + else: + # 1. Insufficient argments for the current call signature: + # dataset and another grader result exist, + # but the 2nd argment (grader result) does not have value. + # Or 2. none of dataset, grader_results, another_grader_results exists. + first_run_results = [] + second_run_results = [] if not first_run_results or not second_run_results: logger.warning( diff --git a/openjudge/generator/iterative_rubric/categorizer.py b/openjudge/generator/iterative_rubric/categorizer.py index fa08fb5d..d599d401 100644 --- a/openjudge/generator/iterative_rubric/categorizer.py +++ b/openjudge/generator/iterative_rubric/categorizer.py @@ -238,7 +238,7 @@ async def categorize_rubrics( >>> categorized_rubrics, info = await categorizer.categorize_rubrics(rubrics) """ - if len(rubrics) == 0: + if not rubrics: logger.error("Input rubrics list is empty") return [], { "categorization_successful": False, diff --git a/openjudge/generator/iterative_rubric/generator.py b/openjudge/generator/iterative_rubric/generator.py index 94b6f0f0..b4ccbb29 100644 --- a/openjudge/generator/iterative_rubric/generator.py +++ b/openjudge/generator/iterative_rubric/generator.py @@ -265,7 +265,7 @@ async def generate( grader_kwargs["max_score"] = self.config.max_score # Add custom template if provided - if hasattr(self.config, "custom_evaluation_prompt") and self.config.custom_evaluation_prompt is not None: + if hasattr(self.config, "custom_evaluation_prompt") and self.config.custom_evaluation_prompt: grader_kwargs["template"] = self.config.custom_evaluation_prompt return LLMGrader(**grader_kwargs) diff --git a/openjudge/graders/agent/action/action_alignment.py b/openjudge/graders/agent/action/action_alignment.py index 13f4016c..c083a11a 100644 --- a/openjudge/graders/agent/action/action_alignment.py +++ b/openjudge/graders/agent/action/action_alignment.py @@ -185,10 +185,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate action alignment with plan", model=model, - template=template, + template=template or DEFAULT_ACTION_ALIGNMENT_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_ACTION_ALIGNMENT_TEMPLATE def _format_history(self, history: Optional[list] = None) -> str: """Format history steps for evaluation. diff --git a/openjudge/graders/agent/memory/memory_detail_preservation.py b/openjudge/graders/agent/memory/memory_detail_preservation.py index 8d64a9e9..28b3eb6c 100644 --- a/openjudge/graders/agent/memory/memory_detail_preservation.py +++ b/openjudge/graders/agent/memory/memory_detail_preservation.py @@ -176,10 +176,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate memory detail preservation", model=model, - template=template, + template=template or DEFAULT_MEMORY_DETAIL_PRESERVATION_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_MEMORY_DETAIL_PRESERVATION_TEMPLATE def _format_history(self, history: Optional[list] = None) -> str: """Format history steps for evaluation. diff --git a/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py b/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py index 50256a16..2cb9e5b1 100644 --- a/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py +++ b/openjudge/graders/agent/memory/memory_retrieval_effectiveness.py @@ -179,10 +179,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate memory retrieval effectiveness", model=model, - template=template, + template=template or DEFAULT_MEMORY_RETRIEVAL_EFFECTIVENESS_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_MEMORY_RETRIEVAL_EFFECTIVENESS_TEMPLATE def _format_history(self, history: Optional[list] = None) -> str: """Format history steps for evaluation. diff --git a/openjudge/graders/agent/plan/plan_feasibility.py b/openjudge/graders/agent/plan/plan_feasibility.py index 5104988a..ebf4dd68 100644 --- a/openjudge/graders/agent/plan/plan_feasibility.py +++ b/openjudge/graders/agent/plan/plan_feasibility.py @@ -179,10 +179,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate plan feasibility", model=model, - template=template, + template=template or DEFAULT_PLAN_FEASIBILITY_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_PLAN_FEASIBILITY_TEMPLATE def _format_history(self, history: Optional[list] = None) -> str: """Format history steps for evaluation. diff --git a/openjudge/graders/agent/reflection/reflection_accuracy.py b/openjudge/graders/agent/reflection/reflection_accuracy.py index 0697d569..296cfab7 100644 --- a/openjudge/graders/agent/reflection/reflection_accuracy.py +++ b/openjudge/graders/agent/reflection/reflection_accuracy.py @@ -176,10 +176,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate reflection accuracy", model=model, - template=template, + template=template or DEFAULT_REFLECTION_ACCURACY_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_REFLECTION_ACCURACY_TEMPLATE def _format_history(self, history: Optional[list] = None) -> str: """Format history steps for evaluation. diff --git a/openjudge/graders/agent/reflection/reflection_outcome_understanding.py b/openjudge/graders/agent/reflection/reflection_outcome_understanding.py index c4cec5b4..6a15b2ef 100644 --- a/openjudge/graders/agent/reflection/reflection_outcome_understanding.py +++ b/openjudge/graders/agent/reflection/reflection_outcome_understanding.py @@ -300,10 +300,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate reflection outcome understanding", model=model, - template=template, + template=template or DEFAULT_REFLECTION_OUTCOME_UNDERSTANDING_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_REFLECTION_OUTCOME_UNDERSTANDING_TEMPLATE def _format_history(self, history: Optional[list] = None) -> str: """Format history steps for evaluation. diff --git a/openjudge/graders/agent/reflection/reflection_progress_awareness.py b/openjudge/graders/agent/reflection/reflection_progress_awareness.py index e434443b..896b2cb3 100644 --- a/openjudge/graders/agent/reflection/reflection_progress_awareness.py +++ b/openjudge/graders/agent/reflection/reflection_progress_awareness.py @@ -217,10 +217,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate reflection progress awareness", model=model, - template=template, + template=template or DEFAULT_REFLECTION_PROGRESS_AWARENESS_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_REFLECTION_PROGRESS_AWARENESS_TEMPLATE def _format_history(self, history: Optional[list] = None) -> str: """Format history steps for evaluation. diff --git a/openjudge/graders/agent/tool/tool_call_accuracy.py b/openjudge/graders/agent/tool/tool_call_accuracy.py index 1466f627..7551dfeb 100644 --- a/openjudge/graders/agent/tool/tool_call_accuracy.py +++ b/openjudge/graders/agent/tool/tool_call_accuracy.py @@ -209,10 +209,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluates the accuracy of tool calls made by an agent", model=model, - template=template, + template=template or DEFAULT_TOOL_CALL_ACCURACY_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_TOOL_CALL_ACCURACY_TEMPLATE def _parse_tools_from_response( self, diff --git a/openjudge/graders/agent/tool/tool_parameter_check.py b/openjudge/graders/agent/tool/tool_parameter_check.py index 948ffefa..442719b3 100644 --- a/openjudge/graders/agent/tool/tool_parameter_check.py +++ b/openjudge/graders/agent/tool/tool_parameter_check.py @@ -189,10 +189,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate tool parameter extraction correctness", model=model, - template=template, + template=template or DEFAULT_TOOL_PARAMETER_CHECK_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_TOOL_PARAMETER_CHECK_TEMPLATE async def aevaluate( self, diff --git a/openjudge/graders/agent/tool/tool_selection.py b/openjudge/graders/agent/tool/tool_selection.py index 39da2955..dec310b1 100644 --- a/openjudge/graders/agent/tool/tool_selection.py +++ b/openjudge/graders/agent/tool/tool_selection.py @@ -202,10 +202,9 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate tool selection ", model=model, - template=template, + template=template or DEFAULT_TOOL_SELECTION_TEMPLATE, language=language, ) - self.template = template if template is not None else DEFAULT_TOOL_SELECTION_TEMPLATE async def aevaluate( self, diff --git a/openjudge/graders/agent/trajectory/__init__.py b/openjudge/graders/agent/trajectory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openjudge/graders/code/_utils/testing_util.py b/openjudge/graders/code/_utils/testing_util.py index 797b3e59..b8156ac3 100644 --- a/openjudge/graders/code/_utils/testing_util.py +++ b/openjudge/graders/code/_utils/testing_util.py @@ -154,6 +154,10 @@ def run_test(in_outs, test=None, timeout=15): """ # Disable functionalities that can make destructive changes to the test. reliability_guard() + + if not test: + raise AssertionError("should not happen: missing test code input") + method_name = None tmp = None which_type = None @@ -170,9 +174,6 @@ def run_test(in_outs, test=None, timeout=15): logger.debug(f"loaded input_output = {datetime.now().time()}") - if test is None: - raise AssertionError("should not happen: test code is none") - results = [] sol = """from string import * from re import * diff --git a/openjudge/graders/common/hallucination.py b/openjudge/graders/common/hallucination.py index f793e0b5..5a35c319 100644 --- a/openjudge/graders/common/hallucination.py +++ b/openjudge/graders/common/hallucination.py @@ -265,11 +265,10 @@ def __init__( mode=GraderMode.POINTWISE, description="Evaluate whether response contains hallucinations", model=model, - template=template, + template=template or DEFAULT_HALLUCINATION_TEMPLATE, language=language, ) self.threshold = threshold - self.template = template if template is not None else DEFAULT_HALLUCINATION_TEMPLATE async def aevaluate( self, diff --git a/openjudge/graders/multimodal/_internal/criteria_utils.py b/openjudge/graders/multimodal/_internal/criteria_utils.py index 452cde55..ef71e74e 100644 --- a/openjudge/graders/multimodal/_internal/criteria_utils.py +++ b/openjudge/graders/multimodal/_internal/criteria_utils.py @@ -81,7 +81,7 @@ def validate_and_sort_rubrics( ... ] >>> sorted_rubrics = validate_and_sort_rubrics(rubrics) """ - if rubrics is None: + if not rubrics: return None # Sort rubrics by start of range @@ -120,7 +120,7 @@ def format_rubrics(rubrics: Optional[List[Rubric]]) -> Optional[str]: 0-3: Poor quality 7-10: High quality """ - if rubrics is None: + if not rubrics: return None return "\n".join( @@ -177,7 +177,7 @@ def get_score_range(rubric: Optional[List[Rubric]]) -> Tuple[int, int]: >>> get_score_range(rubrics) (0, 10) """ - if rubric is None: + if not rubric: return (0, 10) return rubric[0].score_range[0], rubric[-1].score_range[1] diff --git a/openjudge/models/formatter/dashscope_formatter.py b/openjudge/models/formatter/dashscope_formatter.py index b0b3debd..b445ae8f 100644 --- a/openjudge/models/formatter/dashscope_formatter.py +++ b/openjudge/models/formatter/dashscope_formatter.py @@ -75,8 +75,8 @@ def _convert_content_to_openai( Returns: Content in OpenAI format. """ - # If content is None, return empty string - if content is None: + # Return empty string if no content input + if not content: return "" # If content is a string, return as is @@ -135,8 +135,8 @@ def _convert_content_to_dashscope( Returns: Content in DashScope format. """ - # If content is None, return empty string - if content is None: + # Return empty string if no content input + if not content: return "" # If content is a string, return as is diff --git a/openjudge/models/schema/prompt_template.py b/openjudge/models/schema/prompt_template.py index cf3018bd..ea39ef81 100644 --- a/openjudge/models/schema/prompt_template.py +++ b/openjudge/models/schema/prompt_template.py @@ -184,7 +184,7 @@ def to_messages( if isinstance(self.messages, list): messages = self.messages elif isinstance(self.messages, dict): - if language is None: + if not language: language = LanguageEnum.EN assert language in self.messages messages = self.messages.get(language, []) diff --git a/openjudge/runner/aggregator/base_aggregator.py b/openjudge/runner/aggregator/base_aggregator.py index 97cae548..979f493a 100644 --- a/openjudge/runner/aggregator/base_aggregator.py +++ b/openjudge/runner/aggregator/base_aggregator.py @@ -37,12 +37,12 @@ def __name__(self): return self.name @abstractmethod - def __call__(self, results: Dict[str, GraderResult], **kwargs) -> GraderResult: + def __call__(self, grader_results: Dict[str, GraderResult], **kwargs) -> GraderResult: """ Aggregate results from multiple graders for a single sample. Args: - results: Dictionary mapping grader names to GraderResult objects for a single sample + grader_results: Dictionary mapping grader names to GraderResult objects for a single sample **kwargs: Additional arguments for aggregation Returns: diff --git a/openjudge/runner/aggregator/weighted_sum_aggregator.py b/openjudge/runner/aggregator/weighted_sum_aggregator.py index 96646804..e7c0dadb 100644 --- a/openjudge/runner/aggregator/weighted_sum_aggregator.py +++ b/openjudge/runner/aggregator/weighted_sum_aggregator.py @@ -28,27 +28,27 @@ def __init__(self, name: str, weights: Dict[str, float] = None): super().__init__(name) self.weights = weights or {} - def __call__(self, results: Dict[str, GraderResult], **kwargs) -> GraderResult: + def __call__(self, grader_results: Dict[str, GraderResult], **kwargs) -> GraderResult: """ - Aggregate results using weighted sum for a single sample. + Aggregate multiple grader results using weighted sum for a single sample. Args: - results: Dictionary mapping grader names to GraderResult objects for a single sample + grader_results: Dictionary mapping grader names to GraderResult objects for a single sample **kwargs: Additional arguments (unused) Returns: Aggregated result as a GraderResult object """ - if not results: + if not grader_results: return GraderError( name=self.name, - reason="No results to aggregate", - error="No results provided for aggregation", + reason="No grader result to aggregate", + error="No grader result provided for aggregation", ) # Initialize weights if not provided (equal weights) if not self.weights: - grader_names = list(results.keys()) + grader_names = list(grader_results.keys()) equal_weight = 1.0 / len(grader_names) if grader_names else 0.0 weights = {name: equal_weight for name in grader_names} else: @@ -59,8 +59,8 @@ def __call__(self, results: Dict[str, GraderResult], **kwargs) -> GraderResult: component_scores = {} # Collect scores from all graders for this sample - for grader_name, result in results.items(): - # Only process GraderScore results (skip errors, ranks, etc.) + for grader_name, result in grader_results.items(): + # Only process results of GraderScore type (skip errors, ranks, etc.) if isinstance(result, GraderScore): weight = weights.get(grader_name, 0.0) weighted_sum += result.score * weight diff --git a/openjudge/runner/grading_runner.py b/openjudge/runner/grading_runner.py index b293ac53..ef678a6c 100644 --- a/openjudge/runner/grading_runner.py +++ b/openjudge/runner/grading_runner.py @@ -177,7 +177,7 @@ def __init__( concurrency_manager.set_max_concurrency(max_concurrency) # Handle aggregators - if aggregators is None: + if not aggregators: self.aggregators = [] elif isinstance(aggregators, BaseAggregator): self.aggregators = [aggregators] @@ -345,7 +345,7 @@ async def arun( if self.show_progress: all_results = await tqdm_asyncio.gather( *all_coroutines, - desc="Grading", + desc="Evaluating a dataset", total=len(all_coroutines), ) else: @@ -472,7 +472,7 @@ async def arun_multiple_datasets( if original_show_progress: all_results = await tqdm_asyncio.gather( *tasks, - desc="Grading Datasets", + desc=f"Evaluating {len(tasks)} datasets", total=len(tasks), ) else: diff --git a/openjudge/utils/concurrency.py b/openjudge/utils/concurrency.py index 8c41a199..82b3701e 100644 --- a/openjudge/utils/concurrency.py +++ b/openjudge/utils/concurrency.py @@ -19,7 +19,7 @@ class ConcurrencyManager: _instance = None def __new__(cls): - if cls._instance is None: + if not cls._instance: cls._instance = super(ConcurrencyManager, cls).__new__(cls) return cls._instance diff --git a/openjudge/utils/instance.py b/openjudge/utils/instance.py index 50acbcaa..6e893896 100644 --- a/openjudge/utils/instance.py +++ b/openjudge/utils/instance.py @@ -80,7 +80,7 @@ class should be subclass of. If provided, will check # If config is already an instance, just check its type if not isinstance(config, dict): instance = config - if accept_type is not None and not isinstance(instance, accept_type): + if accept_type and not isinstance(instance, accept_type): raise TypeError( f"Provided instance {instance.__class__.__name__} " f"is not an instance of {accept_type.__name__}", ) @@ -98,7 +98,7 @@ class should be subclass of. If provided, will check cls = getattr(module, class_name) # Check type if accept_type is provided - if accept_type is not None and not issubclass(cls, accept_type): + if accept_type and not issubclass(cls, accept_type): raise TypeError( f"Instantiated class {cls.__name__} is not a subclass of {accept_type.__name__}", ) diff --git a/tests/runner/aggregator/test_weighted_sum_aggregator.py b/tests/runner/aggregator/test_weighted_sum_aggregator.py index 61eda44b..f6ace6b6 100644 --- a/tests/runner/aggregator/test_weighted_sum_aggregator.py +++ b/tests/runner/aggregator/test_weighted_sum_aggregator.py @@ -31,12 +31,12 @@ def test_initialization(self): def test_empty_results(self): """Test aggregation with empty results""" aggregator = WeightedSumAggregator(name="test_agg") - result = aggregator(results={}) + result = aggregator(grader_results={}) assert isinstance(result, GraderError) assert result.name == "test_agg" - assert result.reason == "No results to aggregate" - assert result.error == "No results provided for aggregation" + assert result.reason == "No grader result to aggregate" + assert result.error == "No grader result provided for aggregation" def test_equal_weight_aggregation(self): """Test aggregation with equal weights (default behavior)""" @@ -48,7 +48,7 @@ def test_equal_weight_aggregation(self): "grader3": GraderScore(name="grader3", score=4.0, reason="Poor response"), } - aggregated_result = aggregator(results=results) + aggregated_result = aggregator(grader_results=results) assert isinstance(aggregated_result, GraderScore) assert aggregated_result.name == "test_agg" @@ -67,7 +67,7 @@ def test_weighted_aggregation(self): "grader3": GraderScore(name="grader3", score=0.0, reason="Poor"), } - aggregated_result = aggregator(results=results) + aggregated_result = aggregator(grader_results=results) assert isinstance(aggregated_result, GraderScore) assert aggregated_result.name == "test_agg" @@ -85,7 +85,7 @@ def test_mixed_result_types(self): "rank_grader": GraderRank(name="rank_grader", rank=[1, 2, 3], reason="Ranked"), } - aggregated_result = aggregator(results=results) + aggregated_result = aggregator(grader_results=results) assert isinstance(aggregated_result, GraderScore) assert aggregated_result.name == "test_agg" @@ -105,7 +105,7 @@ def test_zero_weight_aggregation(self): "grader2": GraderScore(name="grader2", score=5.0, reason="Average"), } - aggregated_result = aggregator(results=results) + aggregated_result = aggregator(grader_results=results) assert isinstance(aggregated_result, GraderScore) assert aggregated_result.name == "test_agg" @@ -123,7 +123,7 @@ def test_missing_weights(self): } # grader2 should get default weight of 0.0 - aggregated_result = aggregator(results=results) + aggregated_result = aggregator(grader_results=results) assert isinstance(aggregated_result, GraderScore) assert aggregated_result.name == "test_agg"