1- from typing import Union
1+ import fcntl
2+ import json
3+ import os
4+ from typing import List , Union
25
6+ from trinity .common .experience import Experience
37from trinity .common .workflows .customized_math_workflows import MathBoxedWorkflow , Task
48from trinity .common .workflows .workflow import WORKFLOWS
59
@@ -21,6 +25,48 @@ def format_messages(self):
2125 return self .task_desc
2226
2327
28+ @WORKFLOWS .register_module ("bots_ref_eval_collect_math_boxed_workflow" )
29+ class BOTSRefEvalCollectMathBoxedWorkflow (MathBoxedWorkflow ):
30+ """A reference evaluation collection workflow for math tasks that give answers in boxed format for BOTS."""
31+
32+ def reset (self , task : Task ):
33+ super ().reset (task )
34+ from trinity .plugins .bots_math_boxed_reward import BOTSMathBoxedRewardFn
35+
36+ self .reward_fn = BOTSMathBoxedRewardFn (** self .reward_fn_args )
37+ self .task_desc = nested_query (self .format_args .prompt_key , self .raw_task )
38+ self .truth = nested_query (self .format_args .response_key , self .raw_task )
39+
40+ def format_messages (self ):
41+ # the prompts are already in message format
42+ return self .task_desc
43+
44+ def run (self ) -> List [Experience ]:
45+ responses = super ().run ()
46+
47+ rewards = [response .reward for response in responses ]
48+
49+ log_entry = {
50+ "model_version" : self .model .model_version ,
51+ "rewards" : rewards ,
52+ "question" : self .task_desc ,
53+ "truth" : self .truth ,
54+ }
55+
56+ log_file_path = os .environ .get ("BOTS_REF_EVAL_LOG_FILE" , "./bots_ref_eval_log.jsonl" )
57+ os .makedirs (os .path .dirname (log_file_path ), exist_ok = True )
58+
59+ with open (log_file_path , "a" ) as f :
60+ fcntl .flock (f , fcntl .LOCK_EX )
61+ try :
62+ json .dump (log_entry , f )
63+ f .write ("\n " )
64+ finally :
65+ fcntl .flock (f , fcntl .LOCK_UN )
66+
67+ return responses
68+
69+
2470def nested_query (query_key : str , query_obj : Union [dict , None ]):
2571 # support nested query for a dict given query_keys split by '.'
2672 if query_obj is None :
0 commit comments