merging cv stuff into main (#70)

Roozki · Cameron-Basara · web-flow · commit bcffd52f0218 · 2025-11-24T21:59:47.000-08:00
* merging cv stuff into main

* wtf was that random pkg

---------

Co-authored-by: Cameron &lt;cameronbasara@gmail.com&gt;
diff --git a/bestHammer.pt b/bestHammer.pt
diff --git a/src/aruco_detector/package.xml b/src/aruco_detector/package.xml
@@ -17,6 +17,9 @@
   <!-- Rosdep fails with the following: -->
   <!-- <depend>cv2_aruco</depend>   -->
   <!-- <depend>OpenCV</depend> -->
+  <!-- ROWTAG MERGE WARNING -->
+  <depend>OpenCV</depend>
+  <depend>cv2_aruco</depend>  
 
   <test_depend>ament_lint_auto</test_depend>
   <test_depend>ament_lint_common</test_depend>
diff --git a/src/aruco_detector/src/DetectMarker.cpp b/src/aruco_detector/src/DetectMarker.cpp
@@ -86,5 +86,4 @@ int main(int argc, char** argv) {
     rclcpp::spin(node);
     rclcpp::shutdown();
     return 0;
-    
 }
diff --git a/src/object_detection/object_detection/yolo_detector_node.py b/src/object_detection/object_detection/yolo_detector_node.py
@@ -1,27 +1,55 @@
+#!/usr/bin/env python3
+# This line should be added at the beginning of any ROS node. It specifies the location of the Python interpreter
+# The 'env' command helps ensure that the correct interpreter is used based on your system’s environment
+# This concept is useful beyond ROS, as it deals with specifying the interpreter path in scripts
+# For more details, you can check this link: https://stackoverflow.com/questions/7670303/purpose-of-usr-bin-python3-shebang
+# If you're already familiar with this and just forgot to include it, feel free to ignore this comment up to yo
+
+"""
+    Run yolo on a detected camera feed
+
+    How to use:
+        first run: 'ros2 run cameras cameras_node'  
+        then run: 'ros2 run object_detection yolo_detector_node' 
+        now to display the results: 'ros2 run rqt_image_view rqt_image_view'
+
+        do this seperately in 3 terminals. TODO: setup a launch file so this is easier lol
+
+        Make sure to build and source before running
+"""
+
 import rclpy
 from rclpy.node import Node
 from sensor_msgs.msg import Image
 from cv_bridge import CvBridge
 import cv2
 from ultralytics import YOLO
 
-
 class YOLODetectorNode(Node):
     def __init__(self):
         super().__init__('yolo_detector_node')
-        self.publisher_ = self.create_publisher(Image, 'detected_frames', 10)
+        self.publisher_ = self.create_publisher(Image, 'camera/yolo/object_detection', 30) # Changed the topic name for some more clarity
+        self.subscription = self.create_subscription(
+            Image,
+            '/camera/color/image_raw',  # This can be changed to any camera topic we want to use (realsense raw rn), for comp this will be the ptz topic
+            self.image_callback,
+            10)
         self.bridge = CvBridge()
 
         # TODO Change this to whatever YOLO model we decide to use
-        self.yolo = YOLO('yolov8n.pt') 
+        # For this I want to do some unit testing to use it for the KRIA on fpga, not sure if yolov8 will be able to be used for this
+        self.yolo = YOLO('bestHammer.pt') # right now this is fine 
 
-        # TODO Not sure if this is the right camera for video capture for the rover
-        self.videoCap = cv2.VideoCapture(0)
+        ## We do this via ros since we have the topics we can use a subscriber for this
+        # # TODO Not sure if this is the right camera for video capture for the rover
+        # self.videoCap = cv2.VideoCapture(0)
 
         # Timer to process frames
-        self.timer = self.create_timer(0.1, self.process_frame)
+        # self.timer = self.create_timer(0.1, self.process_frame)
+        
 
     def get_colours(self, cls_num):
+        # You'll have to explain to me whats going on here lol
         base_colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
         color_index = cls_num % len(base_colors)
         increments = [(1, -2, 1), (-2, 1, -1), (1, -1, 2)]
@@ -31,14 +59,23 @@ def get_colours(self, cls_num):
         ]
         return tuple(color)
 
-    def process_frame(self):
-        ret, frame = self.videoCap.read()
-        if not ret:
-            self.get_logger().warning("Failed to capture frame.")
+    # For now lets try it this way
+    # Instead of using a timer to process frames periodically, the node now processes frames as they arrive via the callback
+    # process frame here the same way, but more reactively
+    # Callback meth is called when each msg is received, so each video frame  
+    def image_callback(self, msg): 
+        # Convert ROS Image message to OpenCV image
+        try:
+            frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8')
+        except Exception as e:
+            self.get_logger().error(f'Error converting image: {e}')
             return
 
+        # Process frame with YOLO
         results = self.yolo.track(frame, stream=True)
-
+        
+        # Draw detections on frame
+        annotated_frame = frame.copy()
         for result in results:
             classes_names = result.names
             for box in result.boxes:
@@ -48,14 +85,17 @@ def process_frame(self):
                     cls = int(box.cls[0])
                     class_name = classes_names[cls]
                     colour = self.get_colours(cls)
-                    cv2.rectangle(frame, (x1, y1), (x2, y2), colour, 2)
-                    cv2.putText(frame, f'{class_name} {box.conf[0]:.2f}', 
-                                (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, colour, 2)
 
+                    if class_name == "hammer":
+                        cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), colour, 2)
+                        cv2.putText(annotated_frame, f'{class_name} {box.conf[0]:.2f}',
+                                    (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, colour, 2)
+        
         # Convert frame to ROS2 Image message and publish
-        msg = self.bridge.cv2_to_imgmsg(frame, encoding='bgr8')
-        self.publisher_.publish(msg)
-        self.get_logger().info("Published frame with detections.")
+        detected_msg = self.bridge.cv2_to_imgmsg(annotated_frame, encoding='bgr8')
+        detected_msg.header = msg.header  # Preserve original timestamp and frame_id, just allows us to communicate timestamped data if needed
+        self.publisher_.publish(detected_msg)
+        self.get_logger().debug("Published frame with detections")
 
     def destroy_node(self):
         # Release video capture when node is destroyed
diff --git a/src/object_detection/package.xml b/src/object_detection/package.xml
@@ -7,14 +7,15 @@
   <maintainer email="ubuntu@todo.todo">ubuntu</maintainer>
   <license>TODO: License declaration</license>
 
+  <!-- ros uses opencv on local so need to install it seperately to work
+       with cv bridge -->
   <depend>rclpy</depend>
   <depend>sensor_msgs</depend>
   <depend>cv_bridge</depend>
 
-  <test_depend>ament_copyright</test_depend>
-  <test_depend>ament_flake8</test_depend>
-  <test_depend>ament_pep257</test_depend>
-  <test_depend>python3-pytest</test_depend>
+  <!-- Added ultralytics exec dependancy, need to pip install ultralytics on local -->
+  <!-- <exec_depend>python3-ultralytics</exec_depend> 
+  <exec_depend>python3-opencv</exec_depend>  -->
 
   <export>
     <build_type>ament_python</build_type>
diff --git a/src/rover_vision/rover_vision/full_cameras_node.py b/src/rover_vision/rover_vision/full_cameras_node.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+import rclpy
+from rclpy.node import Node
+from sensor_msgs.msg import Image, Imu
+import cv2
+from cv_bridge import CvBridge
+import pyrealsense2 as rs
+import numpy as np
+from typing import List, Dict, Tuple, Optional
+
+class CameraPublisherFull(Node):
+    def __init__(self):
+        super().__init__('camera_publisher_full')
+        self.bridge = CvBridge()
+        
+        # Initialize RealSense if present
+        self.rs_pipeline = None
+        self.rs_config = None
+        self.rs_publishers = {}
+        self.imu_publisher = self.create_publisher(Imu, '/camera/imu/data', 30)
+        self.has_realsense = self.setup_realsense()
+        
+        # Initialize standard cameras (regardless of RealSense detection)
+        self.std_publishers = []
+        self.std_caps = []
+        self.std_dev_paths = []
+        
+        # Auto-detect available standard camera devices
+        device_paths = self.detect_available_cameras()
+        for i, dev_path in enumerate(device_paths):
+            pub = self.create_publisher(Image, f'cam_{i}', 30)
+            cap = cv2.VideoCapture(dev_path)
+            if not cap.isOpened():
+                self.get_logger().error(f'Could not open video device at: {dev_path}')
+                continue
+            self.std_publishers.append(pub)
+            self.std_caps.append(cap)
+            self.std_dev_paths.append(dev_path)
+        
+        # Check if we have any camera at all
+        if not self.has_realsense and len(self.std_caps) == 0:
+            self.get_logger().error('No active video devices found.')
+            exit(1)
+        
+        # Setup timer callback
+        self.timer_period = 0.033  # ~30 FPS
+        self.timer = self.create_timer(self.timer_period, self.timer_callback)
+    
+    def setup_realsense(self) -> bool:
+        """Initialize RealSense camera if available and IMU stream"""
+        try:
+            # Create pipeline and config
+            self.rs_pipeline = rs.pipeline()
+            self.rs_config = rs.config()
+            
+            # Try to find RealSense devices
+            ctx = rs.context()
+            devices = ctx.query_devices()
+            if len(devices) == 0:
+                self.get_logger().info('No RealSense devices detected')
+                return False
+            
+            # Setup streams for the first RealSense device
+            self.get_logger().info(f'Found RealSense device: {devices[0].get_info(rs.camera_info.name)}')
+            
+            # Enable video streams
+            self.rs_config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
+            self.rs_config.enable_stream(rs.stream.infrared, 1, 640, 480, rs.format.y8, 30)
+            self.rs_config.enable_stream(rs.stream.infrared, 2, 640, 480, rs.format.y8, 30)
+            self.rs_config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
+            
+            # Enable IMU stream
+            self.rs_config.enable_stream(rs.stream.accel, rs.format.motion_xyz32f, 30)
+            self.rs_config.enable_stream(rs.stream.gyro, rs.format.motion_xyz32f, 30)
+            
+            # Start streaming
+            self.rs_pipeline.start(self.rs_config)
+            
+            # Create publishers for each stream
+            self.rs_publishers['color'] = self.create_publisher(Image, '/camera/color/image_raw', 30)
+            self.rs_publishers['infra1'] = self.create_publisher(Image, '/camera/infra1/image_rect_raw', 30)
+            self.rs_publishers['infra2'] = self.create_publisher(Image, '/camera/infra2/image_rect_raw', 30)
+            self.rs_publishers['depth'] = self.create_publisher(Image, '/camera/depth/image_rect_raw', 30)
+            
+            self.get_logger().info('RealSense camera initialized successfully')
+            return True
+            
+        except Exception as e:
+            self.get_logger().warn(f'Failed to initialize RealSense: {e}')
+            return False
+    
+    def detect_available_cameras(self) -> List[str]:
+        """Detect standard USB cameras, excluding RealSense devices"""
+        available_cameras = []
+        max_tested = 10
+        
+        for i in range(max_tested):
+            cap = cv2.VideoCapture(f'/dev/video{i}')
+            if cap.isOpened():
+                # Check if this is not a RealSense camera by looking at properties
+                name = cap.getBackendName()
+                if 'realsense' not in name.lower():
+                    available_cameras.append(f'/dev/video{i}')
+                else:
+                    self.get_logger().info(f'Skipping RealSense camera at /dev/video{i}')
+            cap.release()
+        
+        self.get_logger().info(f'Detected standard camera devices: {available_cameras}')
+        return available_cameras
+    
+    def publish_realsense_frame(self, frame, frame_type):
+        if frame_type in self.rs_publishers:
+            if frame_type in ['color', 'infra1', 'infra2', 'depth']:
+                # Process image based on type
+                if frame_type == 'color':
+                    # Convert BGR to RGB
+                    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    msg = self.bridge.cv2_to_imgmsg(img, encoding="rgb8")
+                elif frame_type in ['infra1', 'infra2']:
+                    # Infrared is already grayscale (y8)
+                    msg = self.bridge.cv2_to_imgmsg(frame, encoding="mono8")
+                else:  # depth
+                    # Depth is 16-bit
+                    msg = self.bridge.cv2_to_imgmsg(frame, encoding="16UC1")
+                
+                self.rs_publishers[frame_type].publish(msg)
+    
+    def publish_imu_data(self, accel_data, gyro_data):
+        imu_msg = Imu()
+        imu_msg.header.stamp = self.get_clock().now().to_msg()
+        
+        # Fill in IMU data for linear acceleration
+        imu_msg.linear_acceleration.x = accel_data[0]
+        imu_msg.linear_acceleration.y = accel_data[1]
+        imu_msg.linear_acceleration.z = accel_data[2]
+        
+        # Fill in IMU data for angular velocity
+        imu_msg.angular_velocity.x = gyro_data[0]
+        imu_msg.angular_velocity.y = gyro_data[1]
+        imu_msg.angular_velocity.z = gyro_data[2]
+        
+        self.imu_publisher.publish(imu_msg)
+    
+    def timer_callback(self):
+        # Process RealSense frames if available
+        if self.has_realsense:
+            try:
+                frames = self.rs_pipeline.wait_for_frames()
+                
+                # Process camera frames
+                color_frame = frames.get_color_frame()
+                if color_frame:
+                    color_image = np.asanyarray(color_frame.get_data())
+                    self.publish_realsense_frame(color_image, 'color')
+                
+                depth_frame = frames.get_depth_frame()
+                if depth_frame:
+                    depth_image = np.asanyarray(depth_frame.get_data())
+                    self.publish_realsense_frame(depth_image, 'depth')
+                
+                # Process infrared frames
+                for i in [1, 2]:
+                    ir_frame = frames.get_infrared_frame(i)
+                    if ir_frame:
+                        ir_image = np.asanyarray(ir_frame.get_data())
+                        self.publish_realsense_frame(ir_image, f'infra{i}')
+                
+                # Process IMU frames
+                accel_frame = frames.first(rs.stream.accel)
+                gyro_frame = frames.first(rs.stream.gyro)
+                if accel_frame and gyro_frame:
+                    accel_data = accel_frame.as_motion_frame().get_motion_data()
+                    gyro_data = gyro_frame.as_motion_frame().get_motion_data()
+                    self.publish_imu_data(accel_data, gyro_data)
+                
+            except Exception as e:
+                self.get_logger().error(f'Error processing RealSense frames: {e}')
+        
+        # Process standard camera frames
+        for i in range(len(self.std_caps)):
+            cap = self.std_caps[i]
+            dev_path = self.std_dev_paths[i]
+            ret, frame = cap.read()
+            if ret:
+                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                msg = self.bridge.cv2_to_imgmsg(frame, encoding="rgb8")
+                self.std_publishers[i].publish(msg)
+            else:
+                pass
+
+def main(args=None):
+    # Initialize node
+    rclpy.init(args=args)
+    node = CameraPublisherFull()
+    try:
+        # Spin
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        # Shutdown
+        if node.rs_pipeline:
+            node.rs_pipeline.stop()
+        for cap in node.std_caps:
+            cap.release()
+        node.destroy_node()
+        rclpy.shutdown()
+
+if __name__ == '__main__':
+    main()
diff --git a/src/rover_vision/rover_vision/imu_node.py b/src/rover_vision/rover_vision/imu_node.py

Original file line number	Diff line number	Diff line change
`@@ -86,5 +86,4 @@ int main(int argc, char** argv) {`
`86`	`86`	`rclcpp::spin(node);`
`87`	`87`	`rclcpp::shutdown();`
`88`	`88`	`return 0;`
`89`		`-`
`90`	`89`	`}`