Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion eval/chat_benchmarks/LiveCodeBench/eval_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ def generate_responses(self, model: LM) -> Dict[str, Any]:
Dictionary containing generated responses and temporary directory,
or None for non-primary ranks
"""
examples = self.load_questions()
examples_dataset = self.load_questions()
# Convert the dataset object to a list
examples = list(examples_dataset)
if self.debug:
examples = examples[:10]

Expand All @@ -92,6 +94,11 @@ def generate_responses(self, model: LM) -> Dict[str, Any]:
seed = [s + i for s in self.seed]

for idx, example in enumerate(examples):
# Type check for debugging purposes
if not isinstance(example, dict):
self.logger.error(f"Example at index {idx} is not a dict. Type: {type(example)}, Value: {example}")
continue

if example["is_stdin"]:
prompt_text = (
"Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition."
Expand Down
9 changes: 8 additions & 1 deletion eval/chat_benchmarks/LiveCodeBenchv5/eval_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ def generate_responses(self, model: LM) -> Dict[str, Any]:
Dictionary containing generated responses and temporary directory,
or None for non-primary ranks
"""
examples = self.load_questions()
examples_dataset = self.load_questions()
# Convert the dataset object to a list
examples = list(examples_dataset)
if self.debug:
examples = examples[:10]

Expand All @@ -88,6 +90,11 @@ def generate_responses(self, model: LM) -> Dict[str, Any]:
seed = [s + i for s in self.seed]

for idx, example in enumerate(examples):
# Type check for debugging purposes
if not isinstance(example, dict):
self.logger.error(f"Example at index {idx} is not a dict. Type: {type(example)}, Value: {example}")
continue

if example["is_stdin"]:
prompt_text = (
"Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def generate_responses(self, model: LM) -> Dict[str, Any]:
Dictionary containing generated responses and temporary directory,
or None for non-primary ranks
"""
examples = self.load_questions()
examples_dataset = self.load_questions()
# Convert the dataset object to a list
examples = list(examples_dataset)
if self.debug:
examples = examples[:10]

Expand All @@ -95,6 +97,11 @@ def generate_responses(self, model: LM) -> Dict[str, Any]:
seed = [s + i for s in self.seed]

for idx, example in enumerate(examples):
# Type check for debugging purposes
if not isinstance(example, dict):
self.logger.error(f"Example at index {idx} is not a dict. Type: {type(example)}, Value: {example}")
continue

if example["is_stdin"]:
prompt_text = (
"Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition."
Expand Down