Merge pull request #136 from michaelhhogue/update-evaluate

joshbickett · web-flow · commit 456c7f83cf63 · 2024-01-18T16:08:58.000-08:00
Update `evalutate.py`
diff --git a/evaluate.py b/evaluate.py
@@ -24,7 +24,7 @@
 Guideline: {guideline}
 """
 
-SUMMARY_SCREENSHOT_PATH = os.path.join('screenshots', 'summary_screenshot.png')
+SCREENSHOT_PATH = os.path.join('screenshots', 'screenshot.png')
 
 # Check if on a windows terminal that supports ANSI escape codes
 def supports_ansi():
@@ -80,9 +80,9 @@ def parse_eval_content(content):
         exit(1)
 
 
-def evaluate_summary_screenshot(guideline):
-    '''Load the summary screenshot and return True or False if it meets the given guideline.'''
-    with open(SUMMARY_SCREENSHOT_PATH, "rb") as img_file:
+def evaluate_final_screenshot(guideline):
+    '''Load the final screenshot and return True or False if it meets the given guideline.'''
+    with open(SCREENSHOT_PATH, "rb") as img_file:
         img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
 
         eval_message = [{
@@ -116,9 +116,9 @@ def run_test_case(objective, guideline):
     subprocess.run(['operate', '--prompt', f'"{objective}"'], stdout=subprocess.DEVNULL)
     
     try:
-        result = evaluate_summary_screenshot(guideline)
+        result = evaluate_final_screenshot(guideline)
     except(OSError):
-        print("Couldn't open the summary screenshot")
+        print("[Error] Couldn't open the screenshot for evaluation")
         return False
     
     return result
@@ -143,7 +143,7 @@ def main():
             failed += 1
 
     print(
-        f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} tests passed, {failed} tests failed"
+        f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} test{'' if passed == 1 else 's'} passed, {failed} test{'' if failed == 1 else 's'} failed"
     )
 
 if __name__ == "__main__":