@@ -46,7 +46,7 @@ def supports_ansi():
4646def run_test_case (prompt , guideline ):
4747 '''Returns True if the result of the test with the given prompt meets the given guideline.'''
4848 # Run main.py with the test case prompt
49- subprocess .run (['operate' , '--prompt' , f'"{ prompt } "' ])
49+ subprocess .run (['operate' , '--prompt' , f'"{ prompt } "' ], stdout = subprocess . DEVNULL )
5050
5151 return True
5252
@@ -60,15 +60,17 @@ def main():
6060 "Open YouTube and play holiday music" : "The YouTube video player is loaded and actively playing holiday music." ,
6161 "Open Google Docs and write a poem" : "A Google Doc file is opened in the browser with a poem typed into it." ,
6262 }
63+
64+ print (f"{ ANSI_BRIGHT_MAGENTA } [STARTING EVALUATION]{ ANSI_RESET } NOTE: `operate` output is silenced." )
6365
6466 for prompt , guideline in test_cases .items ():
65- print (f"{ ANSI_BLUE } [EVALUATING]{ ANSI_RESET } Test case '{ prompt } '" )
67+ print (f"{ ANSI_BLUE } [EVALUATING]{ ANSI_RESET } '{ prompt } '" )
6668
6769 result = run_test_case (prompt , guideline )
6870 if result :
69- print (f"{ ANSI_GREEN } [PASSED]{ ANSI_RESET } Test case '{ prompt } '" )
71+ print (f"{ ANSI_GREEN } [PASSED]{ ANSI_RESET } '{ prompt } '" )
7072 else :
71- print (f"{ ANSI_RED } [FAILED]{ ANSI_RESET } Test case '{ prompt } '" )
73+ print (f"{ ANSI_RED } [FAILED]{ ANSI_RESET } '{ prompt } '" )
7274
7375
7476if __name__ == "__main__" :
0 commit comments