Skip to content

Commit 456c7f8

Browse files
authored
Merge pull request #136 from michaelhhogue/update-evaluate
Update `evalutate.py`
2 parents 593c664 + 791d963 commit 456c7f8

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

evaluate.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
Guideline: {guideline}
2525
"""
2626

27-
SUMMARY_SCREENSHOT_PATH = os.path.join('screenshots', 'summary_screenshot.png')
27+
SCREENSHOT_PATH = os.path.join('screenshots', 'screenshot.png')
2828

2929
# Check if on a windows terminal that supports ANSI escape codes
3030
def supports_ansi():
@@ -80,9 +80,9 @@ def parse_eval_content(content):
8080
exit(1)
8181

8282

83-
def evaluate_summary_screenshot(guideline):
84-
'''Load the summary screenshot and return True or False if it meets the given guideline.'''
85-
with open(SUMMARY_SCREENSHOT_PATH, "rb") as img_file:
83+
def evaluate_final_screenshot(guideline):
84+
'''Load the final screenshot and return True or False if it meets the given guideline.'''
85+
with open(SCREENSHOT_PATH, "rb") as img_file:
8686
img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
8787

8888
eval_message = [{
@@ -116,9 +116,9 @@ def run_test_case(objective, guideline):
116116
subprocess.run(['operate', '--prompt', f'"{objective}"'], stdout=subprocess.DEVNULL)
117117

118118
try:
119-
result = evaluate_summary_screenshot(guideline)
119+
result = evaluate_final_screenshot(guideline)
120120
except(OSError):
121-
print("Couldn't open the summary screenshot")
121+
print("[Error] Couldn't open the screenshot for evaluation")
122122
return False
123123

124124
return result
@@ -143,7 +143,7 @@ def main():
143143
failed += 1
144144

145145
print(
146-
f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} tests passed, {failed} tests failed"
146+
f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} test{'' if passed == 1 else 's'} passed, {failed} test{'' if failed == 1 else 's'} failed"
147147
)
148148

149149
if __name__ == "__main__":

0 commit comments

Comments
 (0)