Skip to content

Commit 7efbac9

Browse files
committed
Pushing code
1 parent 45b7015 commit 7efbac9

31 files changed

+7821
-6
lines changed
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
__author__ = "qiao"
2+
3+
import json
4+
import contextlib
5+
import os
6+
import io
7+
import re
8+
import traceback
9+
import sys
10+
import pandas as pd
11+
import argparse
12+
from concurrent.futures import ThreadPoolExecutor, as_completed
13+
import os
14+
import openai
15+
from run import extract_answer
16+
from evaluate import check_correctness
17+
import numpy as np
18+
from table_stats import compute_overall_accuracy
19+
20+
21+
openai.api_key = os.getenv("OPENAI_API_KEY")
22+
23+
24+
25+
def capture_exec_output_and_errors(code):
26+
"""
27+
Executes the given code and captures its printed output and any error messages.
28+
29+
Parameters:
30+
code (str): The Python code to execute.
31+
32+
Returns:
33+
str: The captured output and error messages of the executed code.
34+
"""
35+
globals = {}
36+
37+
with io.StringIO() as buffer, contextlib.redirect_stdout(buffer), contextlib.redirect_stderr(buffer):
38+
try:
39+
exec(code, globals)
40+
except Exception as e:
41+
# Print the error to the buffer
42+
traceback.print_exc()
43+
44+
return buffer.getvalue()
45+
46+
47+
def extract_python_code(text):
48+
pattern = r"```python\n(.*?)```"
49+
matches = re.findall(pattern, text, re.DOTALL)
50+
return "\n".join(matches)
51+
52+
53+
def apply_calc(question, patient_note, model_name):
54+
system = f"You are a helpful assistant. Your task is to read a patient note and compute a medical value based on the following the question: {question}.\n"
55+
system += "If there are multiple values for a given measurement or attribute, then please use the value recorded based on when the patient note was written. You should not be using values that the patient had post-treatment or values from a patient's history in the past. "
56+
system += "Additionally, if the problem doesn't directly imply or provide information regarding a particular patient attribute, assume the patient does not have it."
57+
system += "Do not perform any computations yourself. Do not state a numerical answer. First write code for any equations you are using and then plug in values based on the patient note. Make sure the code prints all of its outputs."
58+
system += "If there are any errors with compiling your script, you may need to re-write your code to obtain the output. Note that all of the necessary information is provided in the patient note and you should not need to prompt the user for any information."
59+
system += "When you are finished with all the computations, please output your final answer value in the following format: <answer> YOUR_ANSWER_HERE <\\answer>, where YOUR_ANSWER_HERE is your final answer value without any units (examples: <answer> 17.29 <\\answer> (an example answer where the output can be a decimal), <answer> 5 <\\answer> (an example answer for score-based problems), <answer> 5/4/2021 <\\answer> (an example answer for estimated date problems), <answer> (4 weeks, 3 days) <\\answer> (an example answer for estimated gestational age))."
60+
system += "Asides for the step where you give your final answer in the <answer> <\\answer> tags, all your other responses must ALWAYS have code with the ```python tag as part of your response. This code should all be written in a single block used for computing the final answer value. The last statement in your code should be a print() statement so that the user can execute your code and provide you with the final answer. "
61+
62+
prompt = "Patient Note:\n\n"
63+
prompt += patient_note + "\n\n"
64+
prompt += "Question:\n\n"
65+
prompt += question + "\n\n"
66+
67+
messages = [
68+
{"role": "system", "content": system},
69+
{"role": "user", "content": prompt},
70+
]
71+
72+
n = 0
73+
74+
while True:
75+
response = openai.ChatCompletion.create(
76+
model=model_name,
77+
messages=messages
78+
)
79+
80+
output = response.choices[0].message.content
81+
82+
n += 1
83+
print(f"Round {n}\n")
84+
print("LLM OUTPUT:\n")
85+
print(output)
86+
print("\n")
87+
88+
messages.append({"role": "assistant", "content": output})
89+
90+
if "<answer>" in output and "<\\answer>" in output:
91+
match_ = re.search(r'<answer>(.*?)<\\answer>', output)
92+
93+
if match_:
94+
answer = match_.group(1).strip()
95+
messages.append({"role": "user", "content": str(answer)})
96+
return str(answer), messages
97+
else:
98+
message_code = extract_python_code(output)
99+
100+
if "```python" not in output:
101+
new_output = f"It seems that you have not written any code as part of your response. This was your last thought:\n\n\n{output}\n\n\n. Based on this, please write a single block of code which the user will execute for you so that you can obtain the final answer. To get the final answer value from the console, please add a print() statement at the end, i.e. print(creatinine_clearance), print(bmi), print(curb-65 score)"
102+
print("MESSAGE CODE:\n")
103+
print(message_code)
104+
print("\n")
105+
messages.append({"role": "user", "content": new_output})
106+
107+
elif "print" not in message_code:
108+
new_output = f"This was your previous response:\n\n\n{output}\n\n\n. There is no print() statement in your code. Please add a print statement to your code so that the user can execute your code for you to print out the value of the final answer value, i.e. print(creatinine_clearance), print(bmi), print(curb-65 score) "
109+
messages.append({"role": "user", "content": new_output})
110+
111+
elif "input(" in message_code:
112+
return "N/A", messages
113+
else:
114+
115+
console_output = capture_exec_output_and_errors(message_code)
116+
117+
new_output = f"""I have executed your code, and the output is:
118+
119+
{console_output}
120+
121+
If there was an error, or the computed answer is obviously incorrect, please revise your code. Otherwise please output your final answer in the following format:
122+
123+
<answer> YOUR_ANSWER_HERE <\\answer> where YOUR_ANSWER_HERE is your final answer.
124+
125+
Decimal Example:
126+
<answer> 17.29 <\\answer>
127+
128+
Score-Based Example:
129+
<answer> 5 <\\answer>
130+
131+
Estimated Date Example:
132+
<answer> 5/21/2021 <\\answer>
133+
134+
Estimated Age Example:
135+
<answer> (4 weeks, 3 days) <\\answer>
136+
137+
All of the information needed is in the patient note and you should not need to prompt the user for any more information.
138+
"""
139+
140+
print("CONSOLE OUTPUT:\n")
141+
print(console_output)
142+
print("\n")
143+
144+
messages.append({"role": "user", "content": new_output})
145+
146+
if n >= 20:
147+
return None, messages
148+
149+
150+
def process_row(row, model_name):
151+
return apply_calc(row["Question"], row["Patient Note"], model_name)
152+
153+
if __name__ == "__main__":
154+
parser = argparse.ArgumentParser(description='Parse arguments')
155+
parser.add_argument('--gpt', type=float, help='Specify GPT version')
156+
157+
args = parser.parse_args()
158+
159+
gpt_model = str(int(args.gpt))
160+
161+
if gpt_model == "4":
162+
gpt_model = "gpt-4"
163+
model_name = "gpt_4"
164+
elif gpt_model == "35":
165+
gpt_model = "gpt-3.5-turbo-16k"
166+
model_name = "gpt_35_16k"
167+
168+
evaluations = {}
169+
170+
df = pd.read_csv("../dataset/test_data.csv")
171+
172+
if not os.path.exists("outputs"):
173+
os.makedirs("outputs")
174+
175+
output_path = f"code_exec_{model_name}.jsonl"
176+
177+
if os.path.exists("outputs/" + output_path):
178+
existing = pd.read_json(output_path, lines=True)
179+
existing["Calculator ID"] = existing["Calculator ID"].astype(str)
180+
existing["Note ID"] = existing["Note ID"].astype(str)
181+
182+
else:
183+
existing = None
184+
185+
count = 0
186+
187+
to_execute = {}
188+
future_to_row = {}
189+
190+
row_list = []
191+
192+
for index, row in df.iterrows():
193+
194+
calc_id = str(row["Calculator ID"])
195+
note_id = str(row["Note ID"])
196+
197+
if existing is not None:
198+
if existing[(existing["Calculator ID"] == calc_id) & (existing["Note ID"] == str(row["Note ID"]))].shape[0] > 0:
199+
continue
200+
201+
row_list.append(row)
202+
203+
for row in row_list:
204+
205+
answer, messages = process_row(row, gpt_model)
206+
calc_id = str(row["Calculator ID"])
207+
note_id = str(row["Note ID"])
208+
209+
if not answer:
210+
extracted_answer = "None"
211+
result = "Incorrect"
212+
else:
213+
214+
try:
215+
extracted_answer = extract_answer(f"{{'answer': {answer}}}")
216+
except:
217+
extracted_answer = answer
218+
219+
try:
220+
status = check_correctness(extracted_answer, row["Ground Truth Answer"], calc_id, row["Upper Limit"], row["Lower Limit"])
221+
222+
if status:
223+
result = "Correct"
224+
else:
225+
result = "Incorrect"
226+
227+
except:
228+
result = "Incorrect"
229+
230+
231+
outputs = {
232+
"Row Number": int(row["Row Number"]),
233+
"Calculator Name": row["Calculator Name"],
234+
"Calculator ID": calc_id,
235+
"Category": row["Category"],
236+
"Note ID": note_id,
237+
"Patient Note": row["Patient Note"],
238+
"Question": row["Question"],
239+
"LLM Answer": extracted_answer,
240+
"LLM Chat History": messages,
241+
"Ground Truth Answer": row["Ground Truth Answer"],
242+
"Ground Truth Explanation": row["Ground Truth Explanation"],
243+
"Result": result
244+
}
245+
246+
with open(f"outputs/{output_path}", "a") as f:
247+
f.write(json.dumps(outputs) + "\n")
248+
249+
compute_overall_accuracy(output_path, model_name, "code_augmented")
250+

0 commit comments

Comments
 (0)