Skip to content

Commit a16fce9

Browse files
committed
Add evaluator.py
1 parent 0528644 commit a16fce9

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

evaluator.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import sys
2+
import os
3+
import subprocess
4+
import platform
5+
import openai
6+
7+
from dotenv import load_dotenv
8+
9+
# Check if on a windows terminal that supports ANSI escape codes
10+
def supports_ansi():
11+
"""
12+
Check if the terminal supports ANSI escape codes
13+
"""
14+
plat = platform.system()
15+
supported_platform = plat != "Windows" or "ANSICON" in os.environ
16+
is_a_tty = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
17+
return supported_platform and is_a_tty
18+
19+
if supports_ansi():
20+
# Standard green text
21+
ANSI_GREEN = "\033[32m"
22+
# Bright/bold green text
23+
ANSI_BRIGHT_GREEN = "\033[92m"
24+
# Reset to default text color
25+
ANSI_RESET = "\033[0m"
26+
# ANSI escape code for blue text
27+
ANSI_BLUE = "\033[94m" # This is for bright blue
28+
29+
# Standard yellow text
30+
ANSI_YELLOW = "\033[33m"
31+
32+
ANSI_RED = "\033[31m"
33+
34+
# Bright magenta text
35+
ANSI_BRIGHT_MAGENTA = "\033[95m"
36+
else:
37+
ANSI_GREEN = ""
38+
ANSI_BRIGHT_GREEN = ""
39+
ANSI_RESET = ""
40+
ANSI_BLUE = ""
41+
ANSI_YELLOW = ""
42+
ANSI_RED = ""
43+
ANSI_BRIGHT_MAGENTA = ""
44+
45+
46+
def run_test_case(prompt, guideline):
47+
'''Returns True if the result of the test with the given prompt meets the given guideline.'''
48+
# Run main.py with the test case prompt
49+
subprocess.run(['operate', '--prompt', f'"{prompt}"'])
50+
51+
return True
52+
53+
54+
def main():
55+
load_dotenv()
56+
openai.api_key = os.getenv("OPENAI_API_KEY")
57+
58+
# Define the test cases and the guidelines
59+
test_cases = {
60+
"Open YouTube and play holiday music": "The YouTube video player is loaded and actively playing holiday music.",
61+
"Open Google Docs and write a poem": "A Google Doc file is opened in the browser with a poem typed into it.",
62+
}
63+
64+
for prompt, guideline in test_cases.items():
65+
print(f"{ANSI_BLUE}[EVALUATING]{ANSI_RESET} Test case '{prompt}'")
66+
67+
result = run_test_case(prompt, guideline)
68+
if result:
69+
print(f"{ANSI_GREEN}[PASSED]{ANSI_RESET} Test case '{prompt}'")
70+
else:
71+
print(f"{ANSI_RED}[FAILED]{ANSI_RESET} Test case '{prompt}'")
72+
73+
74+
if __name__ == "__main__":
75+
main()

0 commit comments

Comments
 (0)