Skip to content

Commit 89fccc3

Browse files
author
yanxinl4
committed
add litellm + bg setup
1 parent 1813032 commit 89fccc3

File tree

291 files changed

+113
-371
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

291 files changed

+113
-371
lines changed

eval/data/problems_all.jsonl

Lines changed: 65 additions & 65 deletions
Large diffs are not rendered by default.

eval/scripts/gencode_json.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,27 @@
1414

1515
class Gencode:
1616
def __init__(self, model: str, output_dir: Path,
17-
prompt_dir: Path, temperature: float):
17+
prompt_dir: Path, with_background: bool, temperature: float):
1818
self.model = model
1919
self.output_dir = output_dir
2020
self.prompt_dir = prompt_dir
21+
self.with_background = with_background
2122
self.temperature = temperature
2223
self.previous_llm_code = []
2324

24-
def save_prompt_with_steps(self, prob_data: dict, prompt: str, num_steps: int, tot_steps: int) -> None:
25-
output_dir = Path(self.prompt_dir, self.model)
25+
def _get_background_dir(self):
26+
return "with_background" if self.with_background else "without_background"
27+
28+
def save_prompt_with_steps(self, prob_data: dict, prompt: str, num_steps: int) -> None:
29+
output_dir = Path(self.prompt_dir, Path(self.model).parts[-1], self._get_background_dir())
2630
output_dir.mkdir(parents=True, exist_ok=True)
2731
output_file_path = output_dir / f"{prob_data['problem_id']}.{num_steps}.txt"
2832
output_file_path.write_text(prompt, encoding="utf-8")
2933

30-
def save_response_with_steps(self, prob_data: dict, response: str, previous_code: str,
31-
num_steps: int, model="gpt-4o",) -> None:
34+
def save_response_with_steps(self, prob_data: dict, response: str,
35+
previous_code: str, num_steps: int) -> None:
3236
output_dir = (
33-
self.output_dir / model
37+
self.output_dir / Path(self.model).parts[-1] / self._get_background_dir()
3438
)
3539
output_dir.mkdir(parents=True, exist_ok=True)
3640
prob_id = prob_data["problem_id"]
@@ -78,7 +82,7 @@ def generate_response_with_steps(
7882
raise Exception(f'Generating {prob_id} step {num_steps} ahead of step {prev_step + 1}.')
7983
prompt, previous_code = self.generate_prompt_with_steps(prob_data, num_steps, prompt_template)
8084
if save:
81-
self.save_prompt_with_steps(prob_data, prompt, num_steps, tot_steps)
85+
self.save_prompt_with_steps(prob_data, prompt, num_steps)
8286

8387
model_kwargs = {}
8488
if "claude" in model:
@@ -94,7 +98,7 @@ def generate_response_with_steps(
9498
model_fct = get_model_function(model, **model_kwargs)
9599
response_from_llm = model_fct(prompt)
96100
self.previous_llm_code[num_steps - 1] = extract_python_script(response_from_llm)
97-
self.save_response_with_steps(prob_data, response_from_llm, previous_code, num_steps, model)
101+
self.save_response_with_steps(prob_data, response_from_llm, previous_code, num_steps)
98102

99103
@staticmethod
100104
def process_problem_code(prob_data: dict, num_steps: int) -> str:
@@ -109,11 +113,16 @@ def process_problem_steps(self, problem_data: dict, num_steps: int):
109113
next_step = []
110114
previous_code = []
111115
for i in range(num_steps - 1):
116+
output_lines.append(problem_data["sub_steps"][i]["step_description_prompt"] + '\n' +
117+
problem_data["sub_steps"][i]["step_background"] if self.with_background
118+
else problem_data["sub_steps"][i]["step_description_prompt"])
112119
output_lines.append(self.previous_llm_code[i])
113120
previous_code.append(self.previous_llm_code[i])
114121
output_lines.append("------")
115122

116-
next_step.append(problem_data["sub_steps"][num_steps - 1]["step_description_prompt"])
123+
next_step.append(problem_data["sub_steps"][num_steps - 1]["step_description_prompt"] + '\n' +
124+
problem_data["sub_steps"][num_steps - 1]["step_background"] if self.with_background
125+
else problem_data["sub_steps"][num_steps - 1]["step_description_prompt"])
117126
next_step.append(self.process_problem_code(problem_data, num_steps))
118127
output_str = "\n\n".join(output_lines[:-1]) # Remove the last "------"
119128
next_step_str = "\n\n".join(next_step)
@@ -160,6 +169,11 @@ def get_cli() -> argparse.ArgumentParser:
160169
default=Path("eval_results", "prompt"),
161170
help="Prompt directory",
162171
)
172+
parser.add_argument(
173+
"--with-background",
174+
action="store_true",
175+
help="Include problem background if enabled",
176+
)
163177
parser.add_argument(
164178
"--temperature",
165179
type=float,
@@ -173,11 +187,12 @@ def main(model: str,
173187
output_dir: Path,
174188
input_path: Path,
175189
prompt_dir: Path,
190+
with_background: bool,
176191
temperature: float
177192
) -> None:
178193
gcode = Gencode(
179194
model=model, output_dir=output_dir,
180-
prompt_dir=prompt_dir, temperature=temperature
195+
prompt_dir=prompt_dir, with_background=with_background, temperature=temperature
181196
)
182197
data = read_from_jsonl(input_path)
183198
for problem in data:

eval/scripts/test_generated_code.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import numpy as np
77
import argparse
88

9+
from scicode.parse.parse import H5PY_FILE
910
from scicode.parse.parse import read_from_jsonl
1011

1112

@@ -15,7 +16,12 @@
1516
DEV_STEP_NUM = 50
1617

1718

18-
def test_code(model_name, code_dir, log_dir, output_dir, jsonl_path, dev_set=False):
19+
def _get_background_dir(with_background):
20+
return "with_background" if with_background else "without_background"
21+
22+
23+
def test_code(model_name, code_dir, log_dir, output_dir,
24+
jsonl_path, dev_set=False, with_background=False):
1925

2026
jsonl_data = read_from_jsonl(jsonl_path)
2127
json_dct = {}
@@ -26,7 +32,7 @@ def test_code(model_name, code_dir, log_dir, output_dir, jsonl_path, dev_set=Fal
2632
json_idx[prob_data['problem_id']] = jsonl_data.index(prob_data)
2733
start_time = time.time()
2834

29-
code_dir_ = Path(code_dir, model_name)
35+
code_dir_ = Path(code_dir, model_name, _get_background_dir(with_background))
3036
tmp_dir = Path(f'tmp_{start_time}')
3137

3238
tmp_dir.mkdir(parents=True, exist_ok=True)
@@ -82,7 +88,7 @@ def run_script(script_path):
8288
prob_id = func_id.split('.')[0]
8389
print(f'Testing function {func_id} ...')
8490
tot_prob[int(prob_id) - 1] += 1
85-
logs_dir_ = Path(log_dir, model_name)
91+
logs_dir_ = Path(log_dir, model_name, _get_background_dir(with_background))
8692
logs_dir_.mkdir(parents=True, exist_ok=True)
8793
logs_file = Path(logs_dir_, f'{file_path.stem}.txt')
8894
if logs_file.exists():
@@ -116,16 +122,16 @@ def run_script(script_path):
116122
print(f'correct problems: {correct_prob_num}/{DEV_PROB_NUM if dev_set else PROB_NUM - DEV_PROB_NUM}')
117123
print(f'correct steps: {len(correct_step)}/{DEV_STEP_NUM if dev_set else STEP_NUM}')
118124

119-
Path(f'{output_dir}/{Path(model_name)}').mkdir(parents=True, exist_ok=True)
125+
Path(output_dir).mkdir(parents=True, exist_ok=True)
120126

121-
with open(f'{output_dir}/{model_name}.txt', 'w') as f:
127+
with open(f'{output_dir}/{model_name}_{_get_background_dir(with_background)}.txt', 'w') as f:
122128
f.write(f'correct problems: {correct_prob_num}/{DEV_PROB_NUM if dev_set else PROB_NUM - DEV_PROB_NUM}\n')
123129
f.write(f'correct steps: {len(correct_step)}/{DEV_STEP_NUM if dev_set else STEP_NUM}\n\n')
124130
f.write(f'duration: {test_time} seconds\n')
125131
f.write('\ncorrect problems: ')
126132
f.write(f'\n\n{[i + 1 for i in range(PROB_NUM) if correct_prob[i] == tot_prob[i] and tot_prob[i] != 0]}\n')
127133

128-
with open(f'{output_dir}/{model_name}.json', 'w', encoding='utf-8') as f:
134+
with open(f'{output_dir}/{model_name}_{_get_background_dir(with_background)}.json', 'w', encoding='utf-8') as f:
129135
json.dump(correct_dict, f, indent=4)
130136

131137
shutil.rmtree(tmp_dir)
@@ -166,6 +172,11 @@ def get_cli() -> argparse.ArgumentParser:
166172
"--dev-set",
167173
action='store_true',
168174
help="Test dev set if enabled",
175+
),
176+
parser.add_argument(
177+
"--with-background",
178+
action="store_true",
179+
help="Include problem background if enabled",
169180
)
170181
return parser
171182

@@ -175,9 +186,13 @@ def main(model: str,
175186
log_dir: Path,
176187
output_dir: Path,
177188
jsonl_path: Path,
178-
dev_set: bool
189+
dev_set: bool,
190+
with_background: bool
179191
) -> None:
180-
test_code(model, code_dir, log_dir, output_dir, jsonl_path, dev_set)
192+
if not Path(H5PY_FILE).exists():
193+
raise FileNotFoundError("Please download the numeric test results before testing generated code.")
194+
model = Path(model).parts[-1]
195+
test_code(model, code_dir, log_dir, output_dir, jsonl_path, dev_set, with_background)
181196

182197

183198
if __name__ == "__main__":

logs/litellm/together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo/11.1.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

logs/litellm/together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo/11.10.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

logs/litellm/together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo/11.11.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

logs/litellm/together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo/11.12.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

logs/litellm/together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo/11.2.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

logs/litellm/together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo/11.3.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

logs/litellm/together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo/11.4.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)