Skip to content

Commit e253790

Browse files
committed
Add -m argument to evaluate.py
1 parent d5c3024 commit e253790

File tree

1 file changed

+21
-1
lines changed

1 file changed

+21
-1
lines changed

evaluate.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55
import base64
66
import json
77
import openai
8+
import argparse
89

910
from dotenv import load_dotenv
1011

1112
# "Objective for `operate`" : "Guideline for passing this test case given to GPT-4v"
1213
TEST_CASES = {
13-
"Go to Github.com": "The Github home page is visible.",
14+
"Go to Github.com": "A Github page is visible.",
1415
"Go to Youtube.com and play a video": "The YouTube video player is visible.",
1516
}
1617

@@ -124,10 +125,29 @@ def run_test_case(objective, guideline):
124125
return result
125126

126127

128+
def get_test_model():
129+
parser = argparse.ArgumentParser(
130+
description="Run the self-operating-computer with a specified model."
131+
)
132+
133+
parser.add_argument(
134+
"-m",
135+
"--model",
136+
help="Specify the model to evaluate.",
137+
required=False,
138+
default="gpt-4-with-ocr",
139+
)
140+
141+
return parser.parse_args().model
142+
143+
127144
def main():
128145
load_dotenv()
129146
openai.api_key = os.getenv("OPENAI_API_KEY")
130147

148+
model = get_test_model()
149+
150+
print(f"{ANSI_BLUE}[EVALUATING MODEL `{model}`]{ANSI_RESET}")
131151
print(f"{ANSI_BRIGHT_MAGENTA}[STARTING EVALUATION]{ANSI_RESET}")
132152

133153
passed = 0; failed = 0

0 commit comments

Comments
 (0)