Skip to content

Commit ddbbba0

Browse files
committed
Change test cases
1 parent c9379e1 commit ddbbba0

File tree

1 file changed

+7
-9
lines changed

1 file changed

+7
-9
lines changed

evaluate.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77

88
from dotenv import load_dotenv
99

10-
SUMMARY_SCREENSHOT_PATH = os.path.join('screenshots', 'summary_screenshot.png')
10+
TEST_CASES = {
11+
"Go to Google.com": "The Google home page is visible with the search bar.",
12+
"Play a video on YouTube": "A YouTube video is playing.",
13+
}
1114

1215
EVALUATION_PROMPT = """
1316
Your job is to look at the given screenshot and determine if the following guideline is met in the image.
@@ -17,6 +20,8 @@
1720
Guideline: {guideline}
1821
"""
1922

23+
SUMMARY_SCREENSHOT_PATH = os.path.join('screenshots', 'summary_screenshot.png')
24+
2025
# Check if on a windows terminal that supports ANSI escape codes
2126
def supports_ansi():
2227
"""
@@ -116,17 +121,10 @@ def run_test_case(objective, guideline):
116121
def main():
117122
load_dotenv()
118123
openai.api_key = os.getenv("OPENAI_API_KEY")
119-
120-
# Define the test cases and the guidelines
121-
test_cases = {
122-
"Go to Google.com": "The Google home page is visible with the search bar.",
123-
"Open YouTube and play holiday music": "The YouTube video player is loaded and actively playing holiday music.",
124-
"Open Google Docs and write a poem": "A Google Doc file is opened in the browser with a poem typed into it.",
125-
}
126124

127125
print(f"{ANSI_BRIGHT_MAGENTA}[STARTING EVALUATION]{ANSI_RESET} NOTE: `operate` output is silenced.")
128126

129-
for objective, guideline in test_cases.items():
127+
for objective, guideline in TEST_CASES.items():
130128
print(f"{ANSI_BLUE}[EVALUATING]{ANSI_RESET} '{objective}'")
131129

132130
result = run_test_case(objective, guideline)

0 commit comments

Comments
 (0)