From 9aa4005dfec706d6e058c85cb039a6ac6499bb97 Mon Sep 17 00:00:00 2001 From: buiilding Date: Thu, 18 Dec 2025 22:40:48 -0500 Subject: [PATCH] fix: Handle normalized coordinates from grounding models --- gui_agents/s3/agents/grounding.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/gui_agents/s3/agents/grounding.py b/gui_agents/s3/agents/grounding.py index 57fa9d62..b3a21c18 100644 --- a/gui_agents/s3/agents/grounding.py +++ b/gui_agents/s3/agents/grounding.py @@ -240,9 +240,23 @@ def generate_coords(self, ref_expr: str, obs: Dict) -> List[int]: # Generate and parse coordinates response = call_llm_safe(self.grounding_model) print("RAW GROUNDING MODEL RESPONSE:", response) - numericals = re.findall(r"\d+", response) + + # Regex to find floating point numbers (0.xxxx) or integers + numericals = re.findall(r"\d+\.?\d*", response) assert len(numericals) >= 2 - return [int(numericals[0]), int(numericals[1])] + + x = float(numericals[0]) + y = float(numericals[1]) + + # If coordinates are normalized (0-1), scale them up + if x <= 1.0 and y <= 1.0: + x = int(x * self.engine_params_for_grounding["grounding_width"]) + y = int(y * self.engine_params_for_grounding["grounding_height"]) + else: + x = int(x) + y = int(y) + + return [x, y] # Calls pytesseract to generate word level bounding boxes for text grounding def get_ocr_elements(self, b64_image_data: str) -> Tuple[str, List]: