@@ -457,7 +457,7 @@ def run(self):
457457 logger .debug ("Chat info sent." )
458458
459459 if hasattr (env .unwrapped , "hint_labeling" ) and isinstance (env .unwrapped .hint_labeling , HintLabeling ):
460- _update_hint_labeling (env .unwrapped .hint_labeling , action , agent , step_info )
460+ action = _update_hint_labeling (env .unwrapped .hint_labeling , action , agent , step_info )
461461
462462 if action is None :
463463 logger .debug ("Agent returned None action. Ending episode." )
@@ -972,11 +972,6 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent
972972 "id" : "1" ,
973973 "action" : action ,
974974 "think" : step_info .agent_info .think ,
975- },
976- {
977- "id" : "2" ,
978- "action" : "test" ,
979- "think" : "test" ,
980975 }
981976 ]
982977 )
@@ -994,16 +989,19 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent
994989 # reprompt model 5 times
995990 hint = response ["payload" ]["hint" ]
996991 agent .flags .extra_instructions = hint
992+ seen_actions = set ()
997993 suggestions = []
998994 for i in tqdm (range (5 )):
999995 # TODO: make this more optimal
1000996 action = step_info .from_action (agent )
1001997 think = step_info .agent_info .think
1002- suggestions .append ({"id" : str (i + 1 ), "action" : action , "think" : think })
1003-
998+ if action not in seen_actions :
999+ seen_actions .add (action )
1000+ suggestions .append ({"id" : str (len (seen_actions )), "action" : action , "think" : think })
1001+
10041002 # update context
10051003 context = HintLabelingInputs (
1006- goal = "blablabli" ,
1004+ goal = context . goal ,
10071005 error_feedback = context .error_feedback ,
10081006 screenshot = context .screenshot ,
10091007 axtree = context .axtree ,
0 commit comments