Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 166 additions & 15 deletions yolo_ros/yolo_ros/detect_3d_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,27 @@ def compute_depth_bounds(depth_values: np.ndarray) -> Tuple[float, float, float]
Returns:
Tuple of (z_center, z_min, z_max) representing the object's depth
"""
# Basic input validation
if not isinstance(depth_values, np.ndarray):
return 0.0, 0.0, 0.0

if len(depth_values) == 0:
return 0.0, 0.0, 0.0

# Ensure all values are numeric and finite
try:
depth_values = np.asarray(depth_values, dtype=np.float64)
valid_mask = np.isfinite(depth_values) & (depth_values > 0)
depth_values = depth_values[valid_mask]
except (ValueError, TypeError):
return 0.0, 0.0, 0.0

if len(depth_values) == 0:
return 0.0, 0.0, 0.0

if len(depth_values) < 4:
z_center = np.median(depth_values)
return z_center, np.min(depth_values), np.max(depth_values)
z_center = float(np.median(depth_values))
return z_center, float(np.min(depth_values)), float(np.max(depth_values))

sorted_depths = np.sort(depth_values)
n = len(sorted_depths)
Expand Down Expand Up @@ -372,7 +390,7 @@ def compute_depth_bounds(depth_values: np.ndarray) -> Tuple[float, float, float]
z_min = z_center - half_min
z_max = z_center + half_min

return z_center, z_min, z_max
return float(z_center), float(z_min), float(z_max)

@staticmethod
def _density_based_cluster(
Expand Down Expand Up @@ -505,6 +523,12 @@ def convert_bb_to_3d(
@param detection 2D detection to convert
@return 3D bounding box or None if conversion fails
"""
# Basic input validations
if depth_image is None or not isinstance(depth_image, np.ndarray):
return None

if depth_image.size == 0:
return None

center_x = int(detection.bbox.center.position.x)
center_y = int(detection.bbox.center.position.y)
Expand Down Expand Up @@ -541,11 +565,23 @@ def convert_bb_to_3d(
pixel_coords = np.column_stack([x_grid.flatten(), y_grid.flatten()])

roi = roi / self.depth_image_units_divisor # Convert to meters

# Validate that division did not produce NaN or inf
if not np.any(np.isfinite(roi)):
return None

if not np.any(roi):
return None

# Extract valid depth values with their spatial positions
valid_depths = roi.flatten()

# Ensure correct numeric type
try:
valid_depths = np.asarray(valid_depths, dtype=np.float64)
except (ValueError, TypeError):
return None

valid_mask = (valid_depths > 0) & np.isfinite(valid_depths)
valid_depths = valid_depths[valid_mask]
valid_coords = pixel_coords[valid_mask]
Expand All @@ -564,18 +600,26 @@ def convert_bb_to_3d(
valid_depths, spatial_weights
)

if z == 0:
if not np.isfinite(z) or z == 0:
return None

# Compute height (y-axis) statistics from actual 3D points
y_center, y_min, y_max = Detect3DNode._compute_height_bounds(
valid_coords, valid_depths, spatial_weights, depth_info
)

# Validate results
if not all(np.isfinite([y_center, y_min, y_max])):
return None

# Compute width (x-axis) statistics from actual 3D points
x_center, x_min, x_max = Detect3DNode._compute_width_bounds(
valid_coords, valid_depths, spatial_weights, depth_info
)

# Validate results
if not all(np.isfinite([x_center, x_min, x_max])):
return None

# All dimensions come from actual 3D point analysis
x = x_center
Expand Down Expand Up @@ -646,19 +690,56 @@ def _compute_height_bounds(
Returns:
Tuple of (y_center, y_min, y_max) in meters
"""
# Input validations
try:
valid_depths = np.asarray(valid_depths, dtype=np.float64)
spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
except (ValueError, TypeError):
return 0.0, 0.0, 0.0

if len(valid_coords) == 0 or len(valid_depths) == 0:
return 0.0, 0.0, 0.0

if len(valid_coords) < 4:
# Fallback: just use simple projection
k = depth_info.k
py, fy = k[5], k[4]

# Validate camera parameters
if fy == 0:
return 0.0, 0.0, 0.0

# Validate depths are finite
if not np.all(np.isfinite(valid_depths)):
return 0.0, 0.0, 0.0

y_coords_pixel = valid_coords[:, 1]
y_3d = valid_depths * (y_coords_pixel - py) / fy
return np.median(y_3d), np.min(y_3d), np.max(y_3d)

# Validate result
if not np.all(np.isfinite(y_3d)):
return 0.0, 0.0, 0.0

return float(np.median(y_3d)), float(np.min(y_3d)), float(np.max(y_3d))

# Convert pixel coordinates to 3D y-coordinates
k = depth_info.k
py, fy = k[5], k[4]

# Validate camera parameters
if fy == 0:
return 0.0, 0.0, 0.0

# Validate depths are finite before calculation
if not np.all(np.isfinite(valid_depths)):
return 0.0, 0.0, 0.0

y_coords_pixel = valid_coords[:, 1]
y_3d = valid_depths * (y_coords_pixel - py) / fy

# Validate result
if not np.any(np.isfinite(y_3d)):
return 0.0, 0.0, 0.0

# Filter outliers using robust statistics
# Compute weighted median as reference
Expand Down Expand Up @@ -728,7 +809,7 @@ def _compute_height_bounds(
y_min = y_center - half_min
y_max = y_center + half_min

return y_center, y_min, y_max
return float(y_center), float(y_min), float(y_max)

@staticmethod
def _compute_width_bounds(
Expand All @@ -750,19 +831,56 @@ def _compute_width_bounds(
Returns:
Tuple of (x_center, x_min, x_max) in meters
"""
# Input validations
try:
valid_depths = np.asarray(valid_depths, dtype=np.float64)
spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
except (ValueError, TypeError):
return 0.0, 0.0, 0.0

if len(valid_coords) == 0 or len(valid_depths) == 0:
return 0.0, 0.0, 0.0

if len(valid_coords) < 4:
# Fallback: just use simple projection
k = depth_info.k
px, fx = k[2], k[0]

# Validate camera parameters
if fx == 0:
return 0.0, 0.0, 0.0

# Validate depths are finite
if not np.all(np.isfinite(valid_depths)):
return 0.0, 0.0, 0.0

x_coords_pixel = valid_coords[:, 0]
x_3d = valid_depths * (x_coords_pixel - px) / fx
return np.median(x_3d), np.min(x_3d), np.max(x_3d)

# Validate result
if not np.all(np.isfinite(x_3d)):
return 0.0, 0.0, 0.0

return float(np.median(x_3d)), float(np.min(x_3d)), float(np.max(x_3d))

# Convert pixel coordinates to 3D x-coordinates
k = depth_info.k
px, fx = k[2], k[0]

# Validate camera parameters
if fx == 0:
return 0.0, 0.0, 0.0

# Validate depths are finite before calculation
if not np.all(np.isfinite(valid_depths)):
return 0.0, 0.0, 0.0

x_coords_pixel = valid_coords[:, 0]
x_3d = valid_depths * (x_coords_pixel - px) / fx

# Validate result
if not np.any(np.isfinite(x_3d)):
return 0.0, 0.0, 0.0

# Filter outliers using robust statistics
# Compute weighted median as reference
Expand Down Expand Up @@ -839,7 +957,7 @@ def _compute_width_bounds(
x_min = x_center - half_min
x_max = x_center + half_min

return x_center, x_min, x_max
return float(x_center), float(x_min), float(x_max)

@staticmethod
def _compute_depth_bounds_weighted(
Expand All @@ -855,9 +973,27 @@ def _compute_depth_bounds_weighted(
Returns:
Tuple of (z_center, z_min, z_max) representing the object's depth
"""
# Input validations
try:
depth_values = np.asarray(depth_values, dtype=np.float64)
spatial_weights = np.asarray(spatial_weights, dtype=np.float64)
except (ValueError, TypeError):
return 0.0, 0.0, 0.0

if len(depth_values) == 0:
return 0.0, 0.0, 0.0

# Validate that all values are finite
valid_mask = np.isfinite(depth_values) & np.isfinite(spatial_weights)
depth_values = depth_values[valid_mask]
spatial_weights = spatial_weights[valid_mask]

if len(depth_values) == 0:
return 0.0, 0.0, 0.0

if len(depth_values) < 4:
z_center = np.median(depth_values)
return z_center, np.min(depth_values), np.max(depth_values)
z_center = float(np.median(depth_values))
return z_center, float(np.min(depth_values)), float(np.max(depth_values))

# Step 1: Multi-scale histogram analysis for robust mode detection
depth_range = np.ptp(depth_values)
Expand Down Expand Up @@ -988,7 +1124,7 @@ def _compute_depth_bounds_weighted(
z_min = z_center - half_min
z_max = z_center + half_min

return z_center, z_min, z_max
return float(z_center), float(z_min), float(z_max)

def convert_keypoints_to_3d(
self,
Expand All @@ -1006,6 +1142,9 @@ def convert_keypoints_to_3d(
@param detection Detection containing 2D keypoints
@return Array of 3D keypoints
"""
# Validate input
if depth_image is None or not isinstance(depth_image, np.ndarray):
return KeyPoint3DArray()

# Build an array of 2D keypoints
keypoints_2d = np.array(
Expand All @@ -1016,8 +1155,20 @@ def convert_keypoints_to_3d(

# Sample depth image and project to 3D
z = depth_image[u, v]

# Validate and convert to float
try:
z = np.asarray(z, dtype=np.float64)
except (ValueError, TypeError):
return KeyPoint3DArray()

k = depth_info.k
px, py, fx, fy = k[2], k[5], k[0], k[4]

# Validate camera parameters
if fx == 0 or fy == 0:
return KeyPoint3DArray()

x = z * (v - px) / fx
y = z * (u - py) / fy
points_3d = (
Expand All @@ -1027,11 +1178,11 @@ def convert_keypoints_to_3d(
# Generate message
msg_array = KeyPoint3DArray()
for p, d in zip(points_3d, detection.keypoints.data):
if not np.isnan(p).any():
if not np.isnan(p).any() and np.all(np.isfinite(p)):
msg = KeyPoint3D()
msg.point.x = p[0]
msg.point.y = p[1]
msg.point.z = p[2]
msg.point.x = float(p[0])
msg.point.y = float(p[1])
msg.point.z = float(p[2])
msg.id = d.id
msg.score = d.score
msg_array.data.append(msg)
Expand Down