FEAT: Change position data from Point back to a large JSON object and create functions to support it. See commit description

kPatch · kPatch · commit 711251191050 · 2025-04-24T19:08:54.000-04:00
We are making changes to allow `state_manager_node` to manage all sensory data and make decisions. For this, we simply passing through the face JSON object to the face / plaipin display node, and the head motion / head tracking node (that calls the dynamixel node). In state_manager_node we have also comment out subscribing to `/vision/face_position` and instead leverage a new topic `/vision/face_position_v2` and callback `face_position_callback_v2`
diff --git a/coffee_ws/src/coffee_expressions/coffee_expressions/plaipin_expressive_eyes.py b/coffee_ws/src/coffee_expressions/coffee_expressions/plaipin_expressive_eyes.py
@@ -5,6 +5,7 @@
 import time
 from geometry_msgs.msg import Point
 from coffee_expressions_msgs.msg import AffectiveState
+import json
 import os
 import sys
 
@@ -32,10 +33,21 @@ def __init__(self):
         # self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
         pygame.display.set_caption("Coffee Buddy - Plaipin Eyes")
         
+        # Add parameters for mapping
+        self.declare_parameter('invert_x', False)  # Default FALSE for correct eye movement
+        self.declare_parameter('invert_y', False)  # Default FALSE for correct eye movement
+
+        # The constraint for the values of the eye movement in the UI
+        self.declare_parameter('eye_range', 1.0)   # Max range for eye movement (-3.0 to 3.0)
+
+        self.invert_x = self.get_parameter('invert_x').value
+        self.invert_y = self.get_parameter('invert_y').value
+        self.eye_range = self.get_parameter('eye_range').value
+
         # Create custom eye configuration
         config = EyeConfig(
-            width=120,  # Scaled down for 800x400 display
-            height=480,  # Scaled down for 800x400 display
+            width=200,  # Scaled down for 800x400 display
+            height=720,  # Scaled down for 800x400 display
             spacing=140,  # Scaled down for 800x400 display
             blink_interval=120,
             blink_speed=0.1,
@@ -95,13 +107,117 @@ def affective_state_callback(self, msg: AffectiveState):
         if not msg.is_idle:
             # Convert ROS Point to normalized coordinates for plaipin
             # Assuming gaze_target is in the range [-1, 1] for x and y
-            self.eye_controller.set_eye_positions(
-                (msg.gaze_target.x, msg.gaze_target.y)
-            )
+            # TODO: COMMENTED THIS OUT
+            # self.eye_controller.set_eye_positions(
+            #     (msg.gaze_target.x, msg.gaze_target.y)
+            # )
+            self.handle_faces(msg.gaze_target_v2)
+            # self.eye_controller.set_eye_positions((gaze_target_x, gaze_target_y))
+
         else:
             # Return to center when idle
-            # self.eye_controller.set_eye_positions((0.0, 0.0))
-            self.eye_controller.set_eye_positions((msg.gaze_target.x, msg.gaze_target.y))
+            self.eye_controller.set_eye_positions((0.0, 0.0))
+            # self.eye_controller.set_eye_positions((msg.gaze_target.x, msg.gaze_target.y))
+    
+    # SEE `face_data_callback` in `coffee_eyes.py` for details
+    def handle_faces(self, msg):
+        """Process incoming face detection data"""
+        try:
+            # Parse the JSON data
+            data = json.loads(msg)
+            
+            # Update frame dimensions if provided
+            if 'frame_width' in data and 'frame_height' in data:
+                self.frame_width = data['frame_width']
+                self.frame_height = data['frame_height']
+            
+            # Update face positions
+            self.face_positions = data.get('faces', [])
+            self.last_face_update = time.time()
+            
+            # If no faces detected, just return
+            if not self.face_positions:
+                self.target_face_position = None
+                return
+                
+            # Select target face (largest/closest)
+            largest_area = 0
+            largest_face = None
+            
+            for face in self.face_positions:
+                width = face['x2'] - face['x1']
+                height = face['y2'] - face['y1']
+                area = width * height
+                
+                if area > largest_area:
+                    largest_area = area
+                    largest_face = face
+            
+            if largest_face:
+                self.target_face_position = (
+                    largest_face['center_x'], 
+                    largest_face['center_y']
+                )
+                
+                # Log face position before transformation
+                face_x = self.target_face_position[0]
+                face_y = self.target_face_position[1]
+                center_x = self.frame_width / 2
+                center_y = self.frame_height / 2
+                dx = face_x - center_x
+                dy = face_y - center_y
+                
+                self.get_logger().debug(f"Face detected at ({face_x:.1f}, {face_y:.1f}), offset from center: ({dx:.1f}, {dy:.1f})")
+                
+                # Transform camera coordinates to eye controller coordinates
+                eye_position = self.transform_camera_to_eye_coords(
+                    self.target_face_position[0],
+                    self.target_face_position[1]
+                )
+                
+                # Call go_to_pos only if we have a valid position
+                if eye_position:
+                    # self.controller.go_to_pos(eye_position)
+                    self.eye_controller.set_eye_positions((eye_position[0], eye_position[1]))
+                    self.get_logger().info(f'Moving eyes to position: ({eye_position[0]:.2f}, {eye_position[1]:.2f})')
+
+        except Exception as e:
+            self.get_logger().error(f"Error processing face data: {e}")
+            
+
+    def transform_camera_to_eye_coords(self, camera_x, camera_y):
+        """Transform camera coordinates to eye controller coordinates (-3.0 to 3.0 range)"""
+        # Normalize to -1.0 to 1.0
+        # Note: We invert the coordinates to ensure proper eye direction
+        # (When face is on right side, eyes should look right)
+        norm_x = (camera_x - self.frame_width/2) / (self.frame_width/2)
+        norm_y = (camera_y - self.frame_height/2) / (self.frame_height/2)
+        
+        # Add sensitivity multiplier (like in eye_tracking.py)
+        sensitivity = 1.5  # Higher = more sensitive eye movement
+        norm_x *= sensitivity
+        norm_y *= sensitivity
+        
+        # Apply inversions if configured
+        # Note: By default we want norm_x to be positive when face is on right side
+        # So default should have invert_x=False
+        if self.invert_x:
+            norm_x = -norm_x
+        if self.invert_y:
+            norm_y = -norm_y
+        
+        # Scale to eye controller range (-3.0 to 3.0)
+        eye_x = norm_x * self.eye_range
+        eye_y = norm_y * self.eye_range
+        
+        # Clamp values to valid range
+        eye_x = max(-self.eye_range, min(self.eye_range, eye_x))
+        eye_y = max(-self.eye_range, min(self.eye_range, eye_y))
+        
+        # Debug output for tuning
+        self.get_logger().debug(f'Camera coords: ({camera_x}, {camera_y}) -> Eye coords: ({eye_x}, {eye_y})')
+        
+        return (eye_x, eye_y)
     
     def run(self):
         """Main animation loop"""
diff --git a/coffee_ws/src/coffee_expressions_msgs/msg/AffectiveState.msg b/coffee_ws/src/coffee_expressions_msgs/msg/AffectiveState.msg
@@ -7,5 +7,8 @@ string trigger_source # "audio", "vision", "payment", "coffee"
 # Where the robot should direct its gaze (e.g. face position)
 geometry_msgs/Point gaze_target
 
+# Where the robot should direct its gaze (e.g. face position)
+string gaze_target_v2
+
 # Whether the robot is in idle state (e.g. no one is interacting)
-bool is_idle
+bool is_idle
diff --git a/coffee_ws/src/coffee_expressions_state_manager/coffee_expressions_state_manager/state_manager_node.py b/coffee_ws/src/coffee_expressions_state_manager/coffee_expressions_state_manager/state_manager_node.py
@@ -47,6 +47,7 @@ def __init__(self):
         self._base_expression = self.get_parameter('default_expression').value
         self._last_voice_intent = "None"
         self._last_face_position = Point(x=0.0, y=0.0, z=1.0)
+        self._last_face_position_v2 = String()
         self._override_expression: Optional[str] = None
         self._override_reason: Optional[str] = None
         self._override_expire_time: Optional[float] = None
@@ -61,8 +62,11 @@ def __init__(self):
             String, '/vision/emotion', self.vision_callback, qos)
         self.create_subscription(
             String, '/voice/intent', self.voice_callback, qos)
+        # self.create_subscription(
+        #     Point, '/vision/face_position', self.face_position_callback, qos)
         self.create_subscription(
-            Point, '/vision/face_position', self.face_position_callback, qos)
+            String, '/vision/face_position_v2', self.face_position_callback_v2, qos)
+
         self.create_subscription(
             String, '/system/event', self.event_callback, qos)
 
@@ -115,6 +119,11 @@ def face_position_callback(self, msg: Point):
         """Handle incoming face position updates."""
         self._last_face_position = msg
         self._last_active_time = time.time()
+    
+    def face_position_callback_v2(self, msg: String):
+        """Handle incoming face position updates."""
+        self._last_face_position_v2.data = msg.data
+        self._last_active_time = time.time()
 
     def event_callback(self, msg: String):
         """Handle incoming system events."""
@@ -159,6 +168,7 @@ def publish_state(self):
         msg.expression = expression
         msg.trigger_source = trigger_source
         msg.gaze_target = self._last_face_position
+        msg.gaze_target_v2 = self._last_face_position_v2.data
         msg.is_idle = is_idle
 
         self.state_pub.publish(msg)
diff --git a/coffee_ws/src/coffee_head/coffee_head/camera_node.py b/coffee_ws/src/coffee_head/coffee_head/camera_node.py
@@ -107,6 +107,7 @@ def __init__(self, node=None):
         if self.node:
             self.face_pub = node.create_publisher(String, 'face_detection_data', 10)
             self.face_position_pub = node.create_publisher(Point, '/vision/face_position', 10)
+            self.face_position_pub_v2 = node.create_publisher(String, '/vision/face_position_v2', 10)
             self.frame_pub = node.create_publisher(Image, 'camera_frame', 10)
             self.face_image_pub = node.create_publisher(Image, 'face_images', 10)
             self.bridge = CvBridge()
@@ -203,6 +204,37 @@ def publish_face_data(self, faces):
         msg.data = json.dumps(face_data)
         self.face_pub.publish(msg)
 
+    def publish_face_position_v2(self, faces):
+        """Publish face detection data for other nodes"""
+        if not self.node or not faces:
+            return
+            
+        # Create JSON with face data - convert NumPy types to Python native types
+        face_data = {
+            "timestamp": float(time.time()),
+            "frame_width": int(self.frame_width),
+            "frame_height": int(self.frame_height),
+            "faces": [
+                {
+                    "x1": int(face["x1"]),
+                    "y1": int(face["y1"]),
+                    "x2": int(face["x2"]),
+                    "y2": int(face["y2"]),
+                    "center_x": int(face["center_x"]),
+                    "center_y": int(face["center_y"]),
+                    "confidence": float(face["confidence"]),
+                    "id": str(face.get("id", "Unknown"))
+                }
+                for face in faces
+            ]
+        }
+        
+        # Publish
+        msg = String()
+        msg.data = json.dumps(face_data)
+        self.face_position_pub_v2.publish(msg)
+
+        
     def publish_face_position(self, faces):
         """Process incoming face detection data"""
 
@@ -870,17 +902,17 @@ def _publish_loop(self):
                         self.publish_frame(frame)
                         # Always publish face position, even when no faces are detected
                         # This is used so that we can re-center the eyes -- zero them in.
-                        self.publish_face_position(faces)
+                        # self.publish_face_position(faces)
                         # Only publish other face data if faces are detected
                         if faces:
+                            self.publish_face_position_v2(faces)
                             self.publish_face_data(faces)
                             self.publish_face_images(frame, faces)
                         
                         self.last_publish_time = current_time
         except Exception as e:
             self.error.emit(f"Error in publish thread: {str(e)}")
 
-
 class CameraViewer(QMainWindow):
     def __init__(self, node):
         super().__init__()