diff --git a/eval/chat_benchmarks/LiveCodeBench/eval_instruct.py b/eval/chat_benchmarks/LiveCodeBench/eval_instruct.py index 19dcec42..a400617a 100644 --- a/eval/chat_benchmarks/LiveCodeBench/eval_instruct.py +++ b/eval/chat_benchmarks/LiveCodeBench/eval_instruct.py @@ -81,7 +81,9 @@ def generate_responses(self, model: LM) -> Dict[str, Any]: Dictionary containing generated responses and temporary directory, or None for non-primary ranks """ - examples = self.load_questions() + examples_dataset = self.load_questions() + # Convert the dataset object to a list + examples = list(examples_dataset) if self.debug: examples = examples[:10] @@ -92,6 +94,11 @@ def generate_responses(self, model: LM) -> Dict[str, Any]: seed = [s + i for s in self.seed] for idx, example in enumerate(examples): + # Type check for debugging purposes + if not isinstance(example, dict): + self.logger.error(f"Example at index {idx} is not a dict. Type: {type(example)}, Value: {example}") + continue + if example["is_stdin"]: prompt_text = ( "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition." diff --git a/eval/chat_benchmarks/LiveCodeBenchv5/eval_instruct.py b/eval/chat_benchmarks/LiveCodeBenchv5/eval_instruct.py index e1cc5c75..56fa047b 100644 --- a/eval/chat_benchmarks/LiveCodeBenchv5/eval_instruct.py +++ b/eval/chat_benchmarks/LiveCodeBenchv5/eval_instruct.py @@ -77,7 +77,9 @@ def generate_responses(self, model: LM) -> Dict[str, Any]: Dictionary containing generated responses and temporary directory, or None for non-primary ranks """ - examples = self.load_questions() + examples_dataset = self.load_questions() + # Convert the dataset object to a list + examples = list(examples_dataset) if self.debug: examples = examples[:10] @@ -88,6 +90,11 @@ def generate_responses(self, model: LM) -> Dict[str, Any]: seed = [s + i for s in self.seed] for idx, example in enumerate(examples): + # Type check for debugging purposes + if not isinstance(example, dict): + self.logger.error(f"Example at index {idx} is not a dict. Type: {type(example)}, Value: {example}") + continue + if example["is_stdin"]: prompt_text = ( "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition." diff --git a/eval/chat_benchmarks/LiveCodeBenchv5_official/eval_instruct.py b/eval/chat_benchmarks/LiveCodeBenchv5_official/eval_instruct.py index 7319c68e..285714a1 100644 --- a/eval/chat_benchmarks/LiveCodeBenchv5_official/eval_instruct.py +++ b/eval/chat_benchmarks/LiveCodeBenchv5_official/eval_instruct.py @@ -84,7 +84,9 @@ def generate_responses(self, model: LM) -> Dict[str, Any]: Dictionary containing generated responses and temporary directory, or None for non-primary ranks """ - examples = self.load_questions() + examples_dataset = self.load_questions() + # Convert the dataset object to a list + examples = list(examples_dataset) if self.debug: examples = examples[:10] @@ -95,6 +97,11 @@ def generate_responses(self, model: LM) -> Dict[str, Any]: seed = [s + i for s in self.seed] for idx, example in enumerate(examples): + # Type check for debugging purposes + if not isinstance(example, dict): + self.logger.error(f"Example at index {idx} is not a dict. Type: {type(example)}, Value: {example}") + continue + if example["is_stdin"]: prompt_text = ( "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition."