Skip to content

Commit 8d20df2

Browse files
authored
Add robust float validation to prevent crashes in depth processing (#106)
- Add input validation for depth images and arrays (None, empty, non-ndarray) - Check for NaN, infinity, and zero values before processing - Validate camera intrinsic parameters (fx, fy) to prevent division by zero - Convert numpy float types to Python floats for ROS message compatibility - Add early returns with safe default values (0.0, 0.0, 0.0) on validation failures - Validate intermediate calculation results to prevent propagation of invalid values This prevents crashes when dealing with: - Invalid depth camera data (NaN, inf values) - Empty depth regions or arrays - Invalid camera calibration data - Division by zero scenarios - Edge cases during depth sensor failures Affected methods: - compute_depth_bounds() - convert_bb_to_3d() - _compute_height_bounds() - _compute_width_bounds() - _compute_depth_bounds_weighted() - convert_keypoints_to_3d() Tested with edge cases including corrupted depth data, NaN/inf values, zero divisions, and empty arrays. All validations return safe defaults.
1 parent 7df174d commit 8d20df2

File tree

1 file changed

+166
-15
lines changed

1 file changed

+166
-15
lines changed

yolo_ros/yolo_ros/detect_3d_node.py

Lines changed: 166 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,27 @@ def compute_depth_bounds(depth_values: np.ndarray) -> Tuple[float, float, float]
299299
Returns:
300300
Tuple of (z_center, z_min, z_max) representing the object's depth
301301
"""
302+
# Basic input validation
303+
if not isinstance(depth_values, np.ndarray):
304+
return 0.0, 0.0, 0.0
305+
306+
if len(depth_values) == 0:
307+
return 0.0, 0.0, 0.0
308+
309+
# Ensure all values are numeric and finite
310+
try:
311+
depth_values = np.asarray(depth_values, dtype=np.float64)
312+
valid_mask = np.isfinite(depth_values) & (depth_values > 0)
313+
depth_values = depth_values[valid_mask]
314+
except (ValueError, TypeError):
315+
return 0.0, 0.0, 0.0
316+
317+
if len(depth_values) == 0:
318+
return 0.0, 0.0, 0.0
319+
302320
if len(depth_values) < 4:
303-
z_center = np.median(depth_values)
304-
return z_center, np.min(depth_values), np.max(depth_values)
321+
z_center = float(np.median(depth_values))
322+
return z_center, float(np.min(depth_values)), float(np.max(depth_values))
305323

306324
sorted_depths = np.sort(depth_values)
307325
n = len(sorted_depths)
@@ -372,7 +390,7 @@ def compute_depth_bounds(depth_values: np.ndarray) -> Tuple[float, float, float]
372390
z_min = z_center - half_min
373391
z_max = z_center + half_min
374392

375-
return z_center, z_min, z_max
393+
return float(z_center), float(z_min), float(z_max)
376394

377395
@staticmethod
378396
def _density_based_cluster(
@@ -505,6 +523,12 @@ def convert_bb_to_3d(
505523
@param detection 2D detection to convert
506524
@return 3D bounding box or None if conversion fails
507525
"""
526+
# Basic input validations
527+
if depth_image is None or not isinstance(depth_image, np.ndarray):
528+
return None
529+
530+
if depth_image.size == 0:
531+
return None
508532

509533
center_x = int(detection.bbox.center.position.x)
510534
center_y = int(detection.bbox.center.position.y)
@@ -541,11 +565,23 @@ def convert_bb_to_3d(
541565
pixel_coords = np.column_stack([x_grid.flatten(), y_grid.flatten()])
542566

543567
roi = roi / self.depth_image_units_divisor # Convert to meters
568+
569+
# Validate that division did not produce NaN or inf
570+
if not np.any(np.isfinite(roi)):
571+
return None
572+
544573
if not np.any(roi):
545574
return None
546575

547576
# Extract valid depth values with their spatial positions
548577
valid_depths = roi.flatten()
578+
579+
# Ensure correct numeric type
580+
try:
581+
valid_depths = np.asarray(valid_depths, dtype=np.float64)
582+
except (ValueError, TypeError):
583+
return None
584+
549585
valid_mask = (valid_depths > 0) & np.isfinite(valid_depths)
550586
valid_depths = valid_depths[valid_mask]
551587
valid_coords = pixel_coords[valid_mask]
@@ -564,18 +600,26 @@ def convert_bb_to_3d(
564600
valid_depths, spatial_weights
565601
)
566602

567-
if z == 0:
603+
if not np.isfinite(z) or z == 0:
568604
return None
569605

570606
# Compute height (y-axis) statistics from actual 3D points
571607
y_center, y_min, y_max = Detect3DNode._compute_height_bounds(
572608
valid_coords, valid_depths, spatial_weights, depth_info
573609
)
610+
611+
# Validate results
612+
if not all(np.isfinite([y_center, y_min, y_max])):
613+
return None
574614

575615
# Compute width (x-axis) statistics from actual 3D points
576616
x_center, x_min, x_max = Detect3DNode._compute_width_bounds(
577617
valid_coords, valid_depths, spatial_weights, depth_info
578618
)
619+
620+
# Validate results
621+
if not all(np.isfinite([x_center, x_min, x_max])):
622+
return None
579623

580624
# All dimensions come from actual 3D point analysis
581625
x = x_center
@@ -646,19 +690,56 @@ def _compute_height_bounds(
646690
Returns:
647691
Tuple of (y_center, y_min, y_max) in meters
648692
"""
693+
# Input validations
694+
try:
695+
valid_depths = np.asarray(valid_depths, dtype=np.float64)
696+
spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
697+
except (ValueError, TypeError):
698+
return 0.0, 0.0, 0.0
699+
700+
if len(valid_coords) == 0 or len(valid_depths) == 0:
701+
return 0.0, 0.0, 0.0
702+
649703
if len(valid_coords) < 4:
650704
# Fallback: just use simple projection
651705
k = depth_info.k
652706
py, fy = k[5], k[4]
707+
708+
# Validate camera parameters
709+
if fy == 0:
710+
return 0.0, 0.0, 0.0
711+
712+
# Validate depths are finite
713+
if not np.all(np.isfinite(valid_depths)):
714+
return 0.0, 0.0, 0.0
715+
653716
y_coords_pixel = valid_coords[:, 1]
654717
y_3d = valid_depths * (y_coords_pixel - py) / fy
655-
return np.median(y_3d), np.min(y_3d), np.max(y_3d)
718+
719+
# Validate result
720+
if not np.all(np.isfinite(y_3d)):
721+
return 0.0, 0.0, 0.0
722+
723+
return float(np.median(y_3d)), float(np.min(y_3d)), float(np.max(y_3d))
656724

657725
# Convert pixel coordinates to 3D y-coordinates
658726
k = depth_info.k
659727
py, fy = k[5], k[4]
728+
729+
# Validate camera parameters
730+
if fy == 0:
731+
return 0.0, 0.0, 0.0
732+
733+
# Validate depths are finite before calculation
734+
if not np.all(np.isfinite(valid_depths)):
735+
return 0.0, 0.0, 0.0
736+
660737
y_coords_pixel = valid_coords[:, 1]
661738
y_3d = valid_depths * (y_coords_pixel - py) / fy
739+
740+
# Validate result
741+
if not np.any(np.isfinite(y_3d)):
742+
return 0.0, 0.0, 0.0
662743

663744
# Filter outliers using robust statistics
664745
# Compute weighted median as reference
@@ -728,7 +809,7 @@ def _compute_height_bounds(
728809
y_min = y_center - half_min
729810
y_max = y_center + half_min
730811

731-
return y_center, y_min, y_max
812+
return float(y_center), float(y_min), float(y_max)
732813

733814
@staticmethod
734815
def _compute_width_bounds(
@@ -750,19 +831,56 @@ def _compute_width_bounds(
750831
Returns:
751832
Tuple of (x_center, x_min, x_max) in meters
752833
"""
834+
# Input validations
835+
try:
836+
valid_depths = np.asarray(valid_depths, dtype=np.float64)
837+
spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
838+
except (ValueError, TypeError):
839+
return 0.0, 0.0, 0.0
840+
841+
if len(valid_coords) == 0 or len(valid_depths) == 0:
842+
return 0.0, 0.0, 0.0
843+
753844
if len(valid_coords) < 4:
754845
# Fallback: just use simple projection
755846
k = depth_info.k
756847
px, fx = k[2], k[0]
848+
849+
# Validate camera parameters
850+
if fx == 0:
851+
return 0.0, 0.0, 0.0
852+
853+
# Validate depths are finite
854+
if not np.all(np.isfinite(valid_depths)):
855+
return 0.0, 0.0, 0.0
856+
757857
x_coords_pixel = valid_coords[:, 0]
758858
x_3d = valid_depths * (x_coords_pixel - px) / fx
759-
return np.median(x_3d), np.min(x_3d), np.max(x_3d)
859+
860+
# Validate result
861+
if not np.all(np.isfinite(x_3d)):
862+
return 0.0, 0.0, 0.0
863+
864+
return float(np.median(x_3d)), float(np.min(x_3d)), float(np.max(x_3d))
760865

761866
# Convert pixel coordinates to 3D x-coordinates
762867
k = depth_info.k
763868
px, fx = k[2], k[0]
869+
870+
# Validate camera parameters
871+
if fx == 0:
872+
return 0.0, 0.0, 0.0
873+
874+
# Validate depths are finite before calculation
875+
if not np.all(np.isfinite(valid_depths)):
876+
return 0.0, 0.0, 0.0
877+
764878
x_coords_pixel = valid_coords[:, 0]
765879
x_3d = valid_depths * (x_coords_pixel - px) / fx
880+
881+
# Validate result
882+
if not np.any(np.isfinite(x_3d)):
883+
return 0.0, 0.0, 0.0
766884

767885
# Filter outliers using robust statistics
768886
# Compute weighted median as reference
@@ -839,7 +957,7 @@ def _compute_width_bounds(
839957
x_min = x_center - half_min
840958
x_max = x_center + half_min
841959

842-
return x_center, x_min, x_max
960+
return float(x_center), float(x_min), float(x_max)
843961

844962
@staticmethod
845963
def _compute_depth_bounds_weighted(
@@ -855,9 +973,27 @@ def _compute_depth_bounds_weighted(
855973
Returns:
856974
Tuple of (z_center, z_min, z_max) representing the object's depth
857975
"""
976+
# Input validations
977+
try:
978+
depth_values = np.asarray(depth_values, dtype=np.float64)
979+
spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
980+
except (ValueError, TypeError):
981+
return 0.0, 0.0, 0.0
982+
983+
if len(depth_values) == 0:
984+
return 0.0, 0.0, 0.0
985+
986+
# Validate that all values are finite
987+
valid_mask = np.isfinite(depth_values) & np.isfinite(spatial_weights)
988+
depth_values = depth_values[valid_mask]
989+
spatial_weights = spatial_weights[valid_mask]
990+
991+
if len(depth_values) == 0:
992+
return 0.0, 0.0, 0.0
993+
858994
if len(depth_values) < 4:
859-
z_center = np.median(depth_values)
860-
return z_center, np.min(depth_values), np.max(depth_values)
995+
z_center = float(np.median(depth_values))
996+
return z_center, float(np.min(depth_values)), float(np.max(depth_values))
861997

862998
# Step 1: Multi-scale histogram analysis for robust mode detection
863999
depth_range = np.ptp(depth_values)
@@ -988,7 +1124,7 @@ def _compute_depth_bounds_weighted(
9881124
z_min = z_center - half_min
9891125
z_max = z_center + half_min
9901126

991-
return z_center, z_min, z_max
1127+
return float(z_center), float(z_min), float(z_max)
9921128

9931129
def convert_keypoints_to_3d(
9941130
self,
@@ -1006,6 +1142,9 @@ def convert_keypoints_to_3d(
10061142
@param detection Detection containing 2D keypoints
10071143
@return Array of 3D keypoints
10081144
"""
1145+
# Validate input
1146+
if depth_image is None or not isinstance(depth_image, np.ndarray):
1147+
return KeyPoint3DArray()
10091148

10101149
# Build an array of 2D keypoints
10111150
keypoints_2d = np.array(
@@ -1016,8 +1155,20 @@ def convert_keypoints_to_3d(
10161155

10171156
# Sample depth image and project to 3D
10181157
z = depth_image[u, v]
1158+
1159+
# Validate and convert to float
1160+
try:
1161+
z = np.asarray(z, dtype=np.float64)
1162+
except (ValueError, TypeError):
1163+
return KeyPoint3DArray()
1164+
10191165
k = depth_info.k
10201166
px, py, fx, fy = k[2], k[5], k[0], k[4]
1167+
1168+
# Validate camera parameters
1169+
if fx == 0 or fy == 0:
1170+
return KeyPoint3DArray()
1171+
10211172
x = z * (v - px) / fx
10221173
y = z * (u - py) / fy
10231174
points_3d = (
@@ -1027,11 +1178,11 @@ def convert_keypoints_to_3d(
10271178
# Generate message
10281179
msg_array = KeyPoint3DArray()
10291180
for p, d in zip(points_3d, detection.keypoints.data):
1030-
if not np.isnan(p).any():
1181+
if not np.isnan(p).any() and np.all(np.isfinite(p)):
10311182
msg = KeyPoint3D()
1032-
msg.point.x = p[0]
1033-
msg.point.y = p[1]
1034-
msg.point.z = p[2]
1183+
msg.point.x = float(p[0])
1184+
msg.point.y = float(p[1])
1185+
msg.point.z = float(p[2])
10351186
msg.id = d.id
10361187
msg.score = d.score
10371188
msg_array.data.append(msg)

0 commit comments

Comments
 (0)