Add input validation and error handling in depth processing methods

luispri2001 · luispri2001 · commit e88bbaf519fd · 2026-01-19T13:49:16.000+01:00
diff --git a/yolo_ros/yolo_ros/detect_3d_node.py b/yolo_ros/yolo_ros/detect_3d_node.py
@@ -299,9 +299,27 @@ def compute_depth_bounds(depth_values: np.ndarray) -> Tuple[float, float, float]
         Returns:
             Tuple of (z_center, z_min, z_max) representing the object's depth
         """
+        # Basic input validation
+        if not isinstance(depth_values, np.ndarray):
+            return 0.0, 0.0, 0.0
+        
+        if len(depth_values) == 0:
+            return 0.0, 0.0, 0.0
+        
+        # Ensure all values are numeric and finite
+        try:
+            depth_values = np.asarray(depth_values, dtype=np.float64)
+            valid_mask = np.isfinite(depth_values) & (depth_values > 0)
+            depth_values = depth_values[valid_mask]
+        except (ValueError, TypeError):
+            return 0.0, 0.0, 0.0
+        
+        if len(depth_values) == 0:
+            return 0.0, 0.0, 0.0
+        
         if len(depth_values) < 4:
-            z_center = np.median(depth_values)
-            return z_center, np.min(depth_values), np.max(depth_values)
+            z_center = float(np.median(depth_values))
+            return z_center, float(np.min(depth_values)), float(np.max(depth_values))
 
         sorted_depths = np.sort(depth_values)
         n = len(sorted_depths)
@@ -372,7 +390,7 @@ def compute_depth_bounds(depth_values: np.ndarray) -> Tuple[float, float, float]
             z_min = z_center - half_min
             z_max = z_center + half_min
 
-        return z_center, z_min, z_max
+        return float(z_center), float(z_min), float(z_max)
 
     @staticmethod
     def _density_based_cluster(
@@ -505,6 +523,12 @@ def convert_bb_to_3d(
         @param detection 2D detection to convert
         @return 3D bounding box or None if conversion fails
         """
+        # Basic input validations
+        if depth_image is None or not isinstance(depth_image, np.ndarray):
+            return None
+        
+        if depth_image.size == 0:
+            return None
 
         center_x = int(detection.bbox.center.position.x)
         center_y = int(detection.bbox.center.position.y)
@@ -541,11 +565,23 @@ def convert_bb_to_3d(
             pixel_coords = np.column_stack([x_grid.flatten(), y_grid.flatten()])
 
         roi = roi / self.depth_image_units_divisor  # Convert to meters
+        
+        # Validate that division did not produce NaN or inf
+        if not np.any(np.isfinite(roi)):
+            return None
+        
         if not np.any(roi):
             return None
 
         # Extract valid depth values with their spatial positions
         valid_depths = roi.flatten()
+        
+        # Ensure correct numeric type
+        try:
+            valid_depths = np.asarray(valid_depths, dtype=np.float64)
+        except (ValueError, TypeError):
+            return None
+        
         valid_mask = (valid_depths > 0) & np.isfinite(valid_depths)
         valid_depths = valid_depths[valid_mask]
         valid_coords = pixel_coords[valid_mask]
@@ -564,18 +600,26 @@ def convert_bb_to_3d(
             valid_depths, spatial_weights
         )
 
-        if z == 0:
+        if not np.isfinite(z) or z == 0:
             return None
 
         # Compute height (y-axis) statistics from actual 3D points
         y_center, y_min, y_max = Detect3DNode._compute_height_bounds(
             valid_coords, valid_depths, spatial_weights, depth_info
         )
+        
+        # Validate results
+        if not all(np.isfinite([y_center, y_min, y_max])):
+            return None
 
         # Compute width (x-axis) statistics from actual 3D points
         x_center, x_min, x_max = Detect3DNode._compute_width_bounds(
             valid_coords, valid_depths, spatial_weights, depth_info
         )
+        
+        # Validate results
+        if not all(np.isfinite([x_center, x_min, x_max])):
+            return None
 
         # All dimensions come from actual 3D point analysis
         x = x_center
@@ -646,6 +690,16 @@ def _compute_height_bounds(
         Returns:
             Tuple of (y_center, y_min, y_max) in meters
         """
+        # Input validations
+        try:
+            valid_depths = np.asarray(valid_depths, dtype=np.float64)
+            spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
+        except (ValueError, TypeError):
+            return 0.0, 0.0, 0.0
+        
+        if len(valid_coords) == 0 or len(valid_depths) == 0:
+            return 0.0, 0.0, 0.0
+        
         if len(valid_coords) < 4:
             # Fallback: just use simple projection
             k = depth_info.k
@@ -657,8 +711,17 @@ def _compute_height_bounds(
         # Convert pixel coordinates to 3D y-coordinates
         k = depth_info.k
         py, fy = k[5], k[4]
+        
+        # Validate camera parameters
+        if fy == 0:
+            return 0.0, 0.0, 0.0
+        
         y_coords_pixel = valid_coords[:, 1]
         y_3d = valid_depths * (y_coords_pixel - py) / fy
+        
+        # Validate result
+        if not np.any(np.isfinite(y_3d)):
+            return 0.0, 0.0, 0.0
 
         # Filter outliers using robust statistics
         # Compute weighted median as reference
@@ -728,7 +791,7 @@ def _compute_height_bounds(
             y_min = y_center - half_min
             y_max = y_center + half_min
 
-        return y_center, y_min, y_max
+        return float(y_center), float(y_min), float(y_max)
 
     @staticmethod
     def _compute_width_bounds(
@@ -750,6 +813,16 @@ def _compute_width_bounds(
         Returns:
             Tuple of (x_center, x_min, x_max) in meters
         """
+        # Input validations
+        try:
+            valid_depths = np.asarray(valid_depths, dtype=np.float64)
+            spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
+        except (ValueError, TypeError):
+            return 0.0, 0.0, 0.0
+        
+        if len(valid_coords) == 0 or len(valid_depths) == 0:
+            return 0.0, 0.0, 0.0
+        
         if len(valid_coords) < 4:
             # Fallback: just use simple projection
             k = depth_info.k
@@ -761,8 +834,17 @@ def _compute_width_bounds(
         # Convert pixel coordinates to 3D x-coordinates
         k = depth_info.k
         px, fx = k[2], k[0]
+        
+        # Validate camera parameters
+        if fx == 0:
+            return 0.0, 0.0, 0.0
+        
         x_coords_pixel = valid_coords[:, 0]
         x_3d = valid_depths * (x_coords_pixel - px) / fx
+        
+        # Validate result
+        if not np.any(np.isfinite(x_3d)):
+            return 0.0, 0.0, 0.0
 
         # Filter outliers using robust statistics
         # Compute weighted median as reference
@@ -839,7 +921,7 @@ def _compute_width_bounds(
             x_min = x_center - half_min
             x_max = x_center + half_min
 
-        return x_center, x_min, x_max
+        return float(x_center), float(x_min), float(x_max)
 
     @staticmethod
     def _compute_depth_bounds_weighted(
@@ -855,9 +937,27 @@ def _compute_depth_bounds_weighted(
         Returns:
             Tuple of (z_center, z_min, z_max) representing the object's depth
         """
+        # Input validations
+        try:
+            depth_values = np.asarray(depth_values, dtype=np.float64)
+            spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
+        except (ValueError, TypeError):
+            return 0.0, 0.0, 0.0
+        
+        if len(depth_values) == 0:
+            return 0.0, 0.0, 0.0
+        
+        # Validate that all values are finite
+        valid_mask = np.isfinite(depth_values) & np.isfinite(spatial_weights)
+        depth_values = depth_values[valid_mask]
+        spatial_weights = spatial_weights[valid_mask]
+        
+        if len(depth_values) == 0:
+            return 0.0, 0.0, 0.0
+        
         if len(depth_values) < 4:
-            z_center = np.median(depth_values)
-            return z_center, np.min(depth_values), np.max(depth_values)
+            z_center = float(np.median(depth_values))
+            return z_center, float(np.min(depth_values)), float(np.max(depth_values))
 
         # Step 1: Multi-scale histogram analysis for robust mode detection
         depth_range = np.ptp(depth_values)
@@ -988,7 +1088,7 @@ def _compute_depth_bounds_weighted(
             z_min = z_center - half_min
             z_max = z_center + half_min
 
-        return z_center, z_min, z_max
+        return float(z_center), float(z_min), float(z_max)
 
     def convert_keypoints_to_3d(
         self,
@@ -1006,6 +1106,9 @@ def convert_keypoints_to_3d(
         @param detection Detection containing 2D keypoints
         @return Array of 3D keypoints
         """
+        # Validate input
+        if depth_image is None or not isinstance(depth_image, np.ndarray):
+            return KeyPoint3DArray()
 
         # Build an array of 2D keypoints
         keypoints_2d = np.array(
@@ -1016,8 +1119,20 @@ def convert_keypoints_to_3d(
 
         # Sample depth image and project to 3D
         z = depth_image[u, v]
+        
+        # Validate and convert to float
+        try:
+            z = np.asarray(z, dtype=np.float64)
+        except (ValueError, TypeError):
+            return KeyPoint3DArray()
+        
         k = depth_info.k
         px, py, fx, fy = k[2], k[5], k[0], k[4]
+        
+        # Validate camera parameters
+        if fx == 0 or fy == 0:
+            return KeyPoint3DArray()
+        
         x = z * (v - px) / fx
         y = z * (u - py) / fy
         points_3d = (
@@ -1027,11 +1142,11 @@ def convert_keypoints_to_3d(
         # Generate message
         msg_array = KeyPoint3DArray()
         for p, d in zip(points_3d, detection.keypoints.data):
-            if not np.isnan(p).any():
+            if not np.isnan(p).any() and np.all(np.isfinite(p)):
                 msg = KeyPoint3D()
-                msg.point.x = p[0]
-                msg.point.y = p[1]
-                msg.point.z = p[2]
+                msg.point.x = float(p[0])
+                msg.point.y = float(p[1])
+                msg.point.z = float(p[2])
                 msg.id = d.id
                 msg.score = d.score
                 msg_array.data.append(msg)