Merge pull request #45 from lbr-stack/dev-hydra-depth

mhubii · web-flow · commit c5edfaa54358 · 2024-12-07T16:01:36.000Z
Replaces point cloud by depth image
diff --git a/README.md b/README.md
@@ -119,23 +119,26 @@ Next:
 > In these examples, the [lbr_fri_ros2_stack](https://github.com/lbr-stack/lbr_fri_ros2_stack/) is used. Make sure to follow [Quick Start](https://github.com/lbr-stack/lbr_fri_ros2_stack/#quick-start) first. However, you can also use your own robot description files.
 
 ### Segment
-This is a required step to generate robot masks (also support SAM 2: `rr-sam2`).
+This is a required step to generate robot masks (also supports SAM: `rr-sam`).
 
 ```shell
-rr-sam \
-    --path <path_to_images> \
+rr-sam2 \
+    --path test/data/lbr_med7/zed2i \
     --pattern "*_image_*.png" \
-    --checkpoint <full_path_to_checkpoint>/*.pth
+    --n-positive-samples 5 \
+    --n-negative-samples 5 \
+    --device cuda
 ```
 
 ### Hydra Robust ICP
 The Hydra robust ICP implements a point-to-plane ICP registration on a Lie algebra. It does not use rendering and can also be used on CPU.
 
 ```shell
 rr-hydra \
-    --path test/data/lbr_med7/zed2i/high_res \
-    --mask-pattern mask_*.png \
-    --xyz-pattern xyz_*.npy \
+    --camera-info-file test/data/lbr_med7/zed2i/left_camera_info.yaml \
+    --path test/data/lbr_med7/zed2i \
+    --mask-pattern mask_sam2_left_image_*.png \
+    --depth-pattern depth_*.npy \
     --joint-states-pattern joint_states_*.npy \
     --ros-package lbr_description \
     --xacro-path urdf/med7/med7.xacro \
@@ -155,7 +158,7 @@ The camera swarm optimization can serve for finding an initial guess to [Monocul
 
 ```shell
 rr-cam-swarm \
-    --n-cameras 50 \
+    --n-cameras 100 \
     --min-distance 0.5 \
     --max-distance 3.0 \
     --angle-range 3.141 \
@@ -170,11 +173,11 @@ rr-cam-swarm \
     --end-link-name lbr_link_7 \
     --target-reduction 0.95 \
     --scale 0.25 \
-    --camera-info-file test/data/lbr_med7/zed2i/stereo_data/left_camera_info.yaml \
-    --path test/data/lbr_med7/zed2i/stereo_data \
-    --image-pattern left_img_*.png \
-    --joint-states-pattern joint_state_*.npy \
-    --mask-pattern left_mask_*.png \
+    --camera-info-file test/data/lbr_med7/zed2i/left_camera_info.yaml \
+    --path test/data/lbr_med7/zed2i \
+    --image-pattern left_image_*.png \
+    --joint-states-pattern joint_states_*.npy \
+    --mask-pattern mask_sam2_left_image_*.png \
     --output-file HT_cam_swarm.npy
 ```
 
@@ -189,19 +192,19 @@ This monocular differentiable rendering refinement requires a good initial estim
 ```shell
 rr-mono-dr \
     --optimizer SGD \
-    --lr 0.01 \
+    --lr 0.001 \
     --max-iterations 100 \
     --display-progress \
     --ros-package lbr_description \
     --xacro-path urdf/med7/med7.xacro \
     --root-link-name lbr_link_0 \
     --end-link-name lbr_link_7 \
-    --camera-info-file test/data/lbr_med7/zed2i/high_res/camera_info.yaml \
-    --extrinsics-file test/data/lbr_med7/zed2i/high_res/HT_hydra_robust.npy \
-    --path test/data/lbr_med7/zed2i/high_res \
-    --image-pattern image_*.png \
+    --camera-info-file test/data/lbr_med7/zed2i/left_camera_info.yaml \
+    --extrinsics-file test/data/lbr_med7/zed2i/HT_hydra_robust.npy \
+    --path test/data/lbr_med7/zed2i \
+    --image-pattern left_image_*.png \
     --joint-states-pattern joint_states_*.npy \
-    --mask-pattern mask_*.png \
+    --mask-pattern mask_sam2_left_image_*.png \
     --output-file HT_dr.npy
 ```
 
@@ -216,23 +219,23 @@ This stereo differentiable rendering refinement requires a good initial estimate
 ```shell
 rr-stereo-dr \
     --optimizer SGD \
-    --lr 0.01 \
+    --lr 0.001 \
     --max-iterations 100 \
     --display-progress \
     --ros-package lbr_description \
     --xacro-path urdf/med7/med7.xacro \
     --root-link-name lbr_link_0 \
     --end-link-name lbr_link_7 \
-    --left-camera-info-file test/data/lbr_med7/zed2i/stereo_data/left_camera_info.yaml \
-    --right-camera-info-file test/data/lbr_med7/zed2i/stereo_data/right_camera_info.yaml \
-    --left-extrinsics-file test/data/lbr_med7/zed2i/stereo_data/HT_cam_swarm.npy \
-    --right-extrinsics-file test/data/lbr_med7/zed2i/stereo_data/HT_right_to_left.npy \
-    --path test/data/lbr_med7/zed2i/stereo_data \
-    --left-image-pattern left_img_*.png \
-    --right-image-pattern right_img_*.png \
-    --joint-states-pattern joint_state_*.npy \
-    --left-mask-pattern left_mask_*.png \
-    --right-mask-pattern right_mask_*.png \
+    --left-camera-info-file test/data/lbr_med7/zed2i/left_camera_info.yaml \
+    --right-camera-info-file test/data/lbr_med7/zed2i/right_camera_info.yaml \
+    --left-extrinsics-file test/data/lbr_med7/zed2i/HT_hydra_robust.npy \
+    --right-extrinsics-file test/data/lbr_med7/zed2i/HT_right_to_left.npy \
+    --path test/data/lbr_med7/zed2i \
+    --left-image-pattern left_image_*.png \
+    --right-image-pattern right_image_*.png \
+    --joint-states-pattern joint_states_*.npy \
+    --left-mask-pattern mask_sam2_left_image_*.png \
+    --right-mask-pattern mask_sam2_right_image_*.png \
     --left-output-file HT_left_dr.npy \
     --right-output-file HT_right_dr.npy
 ```
@@ -253,13 +256,13 @@ rr-render \
     --xacro-path urdf/med7/med7.xacro \
     --root-link-name lbr_link_0 \
     --end-link-name lbr_link_7 \
-    --camera-info-file test/data/lbr_med7/zed2i/stereo_data/left_camera_info.yaml \
-    --extrinsics-file test/data/lbr_med7/zed2i/stereo_data/HT_left_dr.npy \
-    --images-path test/data/lbr_med7/zed2i/stereo_data \
-    --joint-states-path test/data/lbr_med7/zed2i/stereo_data \
-    --image-pattern left_img_*.png \
-    --joint-states-pattern joint_state_*.npy \
-    --output-path test/data/lbr_med7/zed2i/stereo_data
+    --camera-info-file test/data/lbr_med7/zed2i/left_camera_info.yaml \
+    --extrinsics-file test/data/lbr_med7/zed2i/HT_left_dr.npy \
+    --images-path test/data/lbr_med7/zed2i \
+    --joint-states-path test/data/lbr_med7/zed2i \
+    --image-pattern left_image_*.png \
+    --joint-states-pattern joint_states_*.npy \
+    --output-path test/data/lbr_med7/zed2i
 ```
 
 ## Testing
@@ -272,7 +275,7 @@ To run Hydra robust ICP on provided `xarm` and `realsense` data, run
 rr-hydra \
     --path test/data/xarm/realsense \
     --mask-pattern mask_*.png \
-    --xyz-pattern xyz_*.npy \
+    --depth-pattern depth_*.npy \
     --joint-states-pattern joint_state_*.npy \
     --ros-package xarm_description \
     --xacro-path  urdf/xarm_device.urdf.xacro \
diff --git a/roboreg/cli/rr_cam_swarm.py b/roboreg/cli/rr_cam_swarm.py
@@ -370,7 +370,7 @@ def fitness_closure() -> torch.Tensor:
         )
         renders = scene.observe_from("camera").squeeze()
         fitness = (
-            soft_dice_loss(renders, masks)
+            soft_dice_loss(renders.unsqueeze(-1), masks.unsqueeze(-1))
             .view(args.n_cameras, n_joint_states)
             .mean(dim=1)
         )
diff --git a/roboreg/cli/rr_hydra.py b/roboreg/cli/rr_hydra.py
@@ -7,18 +7,27 @@
 
 from roboreg.differentiable import TorchKinematics, TorchMeshContainer
 from roboreg.hydra_icp import hydra_centroid_alignment, hydra_robust_icp
-from roboreg.io import URDFParser, parse_hydra_data
+from roboreg.io import URDFParser, parse_camera_info, parse_hydra_data
 from roboreg.util import (
     RegistrationVisualizer,
     clean_xyz,
     compute_vertex_normals,
+    depth_to_xyz,
     from_homogeneous,
+    generate_ht_optical,
     mask_extract_boundary,
+    to_homogeneous,
 )
 
 
 def args_factory() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--camera-info-file",
+        type=str,
+        required=True,
+        help="Path to the camera parameters, <path_to>/camera_info.yaml.",
+    )
     parser.add_argument("--path", type=str, required=True, help="Path to the data.")
     parser.add_argument(
         "--mask-pattern",
@@ -27,7 +36,10 @@ def args_factory() -> argparse.Namespace:
         help="Mask file pattern.",
     )
     parser.add_argument(
-        "--xyz-pattern", type=str, default="xyz_*.npy", help="XYZ file pattern."
+        "--depth-pattern",
+        type=str,
+        default="depth_*.npy",
+        help="Depth file pattern. Note that depth values are expected in meters.",
     )
     parser.add_argument(
         "--joint-states-pattern",
@@ -113,12 +125,13 @@ def main():
     device = "cuda" if torch.cuda.is_available() else "cpu"
 
     # load data
-    joint_states, masks, xyzs = parse_hydra_data(
+    joint_states, masks, depths = parse_hydra_data(
         path=args.path,
         joint_states_pattern=args.joint_states_pattern,
         mask_pattern=args.mask_pattern,
-        xyz_pattern=args.xyz_pattern,
+        depth_pattern=args.depth_pattern,
     )
+    height, width, intrinsics = parse_camera_info(args.camera_info_file)
 
     # instantiate kinematics
     urdf_parser = URDFParser()
@@ -158,7 +171,7 @@ def main():
         device=device,
     )
 
-    # process data
+    # perform forward kinematics
     mesh_vertices = meshes.vertices.clone()
     joint_states = torch.tensor(
         np.array(joint_states), dtype=torch.float32, device=device
@@ -180,6 +193,22 @@ def main():
             ht.transpose(-1, -2),
         )
 
+    # turn depths into xyzs
+    intrinsics = torch.tensor(intrinsics, dtype=torch.float32, device=device)
+    depths = torch.tensor(np.array(depths), dtype=torch.float32, device=device)
+    xyzs = depth_to_xyz(depth=depths, intrinsics=intrinsics, z_max=1.5)
+
+    # flatten BxHxWx3 -> Bx(H*W)x3
+    xyzs = xyzs.view(-1, height * width, 3)
+    xyzs = to_homogeneous(xyzs)
+    ht_optical = generate_ht_optical(xyzs.shape[0], dtype=torch.float32, device=device)
+    xyzs = torch.matmul(xyzs, ht_optical.transpose(-1, -2))
+    xyzs = from_homogeneous(xyzs)
+
+    # unflatten
+    xyzs = xyzs.view(-1, height, width, 3)
+    xyzs = [xyz.squeeze() for xyz in xyzs.cpu().numpy()]
+
     # mesh vertices to list
     mesh_vertices = from_homogeneous(mesh_vertices)
     mesh_vertices = [mesh_vertices[i].contiguous() for i in range(batch_size)]
diff --git a/roboreg/cli/rr_render.py b/roboreg/cli/rr_render.py
@@ -144,7 +144,8 @@ def main():
             image_suffix = pathlib.Path(image_file).suffix
             cv2.imwrite(
                 os.path.join(
-                    str(output_path.absolute()), f"overlay_{image_stem + image_suffix}"
+                    str(output_path.absolute()),
+                    f"overlay_render_{image_stem + image_suffix}",
                 ),
                 overlay_mask(image, render, "b", scale=1.0),
             )
diff --git a/roboreg/cli/rr_sam.py b/roboreg/cli/rr_sam.py
@@ -99,7 +99,7 @@ def main():
             path.absolute(), f"mask_sam_{image_stem + image_suffix}"
         )
         overlay_path = os.path.join(
-            path.absolute(), f"mask_overlay_sam_{image_stem + image_suffix}"
+            path.absolute(), f"overlay_sam_{image_stem + image_suffix}"
         )
         cv2.imwrite(probability_path, (probability * 255.0).astype(np.uint8))
         cv2.imwrite(mask_path, mask)
diff --git a/roboreg/cli/rr_sam2.py b/roboreg/cli/rr_sam2.py
@@ -91,7 +91,7 @@ def main():
             path.absolute(), f"mask_sam2_{image_stem + image_suffix}"
         )
         overlay_path = os.path.join(
-            path.absolute(), f"mask_overlay_sam2_{image_stem + image_suffix}"
+            path.absolute(), f"overlay_sam2_{image_stem + image_suffix}"
         )
         cv2.imwrite(probability_path, (probability * 255.0).astype(np.uint8))
         cv2.imwrite(mask_path, mask)
diff --git a/roboreg/io.py b/roboreg/io.py
@@ -314,15 +314,15 @@ def parse_hydra_data(
     path: str,
     joint_states_pattern: str = "joint_states_*.npy",
     mask_pattern: str = "mask_*.png",
-    xyz_pattern: str = "xyz_*.npy",
+    depth_pattern: str = "depth_*.npy",
 ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
     r"""Parse data for Hydra registration.
 
     Args:
         path (str): Path to the data.
         joint_states_pattern (str): Pattern for joint states files.
         mask_pattern (str): Pattern for mask files.
-        xyz_pattern (str): Pattern for xyz files.
+        depth_pattern (str): Pattern for depth files. Note that depth values are expected in meters.
 
     Returns:
         Tuple[List[np.ndarray],List[np.ndarray],List[np.ndarray]]:
@@ -332,19 +332,19 @@ def parse_hydra_data(
     """
     joint_state_files = find_files(path, joint_states_pattern)
     mask_files = find_files(path, mask_pattern)
-    xyz_files = find_files(path, xyz_pattern)
+    depth_files = find_files(path, depth_pattern)
 
-    if len(joint_state_files) == 0 or len(mask_files) == 0 or len(xyz_files) == 0:
+    if len(joint_state_files) == 0 or len(mask_files) == 0 or len(depth_files) == 0:
         raise ValueError("No files found.")
     if len(joint_state_files) != len(mask_files) or len(joint_state_files) != len(
-        xyz_files
+        depth_files
     ):
         raise ValueError("Number of files do not match.")
 
     rich.print("Found the following files:")
     rich.print(f"Joint states: {joint_state_files}")
     rich.print(f"Masks: {mask_files}")
-    rich.print(f"XYZ: {xyz_files}")
+    rich.print(f"Depths: {depth_files}")
 
     # load data
     joint_states = [
@@ -355,5 +355,5 @@ def parse_hydra_data(
         cv2.imread(os.path.join(path, mask_file), cv2.IMREAD_GRAYSCALE)
         for mask_file in mask_files
     ]
-    xyzs = [np.load(os.path.join(path, xyz_file)) for xyz_file in xyz_files]
-    return joint_states, masks, xyzs
+    depths = [np.load(os.path.join(path, depth_file)) for depth_file in depth_files]
+    return joint_states, masks, depths
diff --git a/test/test_hydra_icp.py b/test/test_hydra_icp.py
diff --git a/test/test_io.py b/test/test_io.py

Original file line number	Diff line number	Diff line change
`@@ -370,7 +370,7 @@ def fitness_closure() -> torch.Tensor:`
`370`	`370`	`)`
`371`	`371`	`renders = scene.observe_from("camera").squeeze()`
`372`	`372`	`fitness = (`
`373`		`- soft_dice_loss(renders, masks)`
	`373`	`+ soft_dice_loss(renders.unsqueeze(-1), masks.unsqueeze(-1))`
`374`	`374`	`.view(args.n_cameras, n_joint_states)`
`375`	`375`	`.mean(dim=1)`
`376`	`376`	`)`
Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@ def main():`
`99`	`99`	`path.absolute(), f"mask_sam_{image_stem + image_suffix}"`
`100`	`100`	`)`
`101`	`101`	`overlay_path = os.path.join(`
`102`		`- path.absolute(), f"mask_overlay_sam_{image_stem + image_suffix}"`
	`102`	`+ path.absolute(), f"overlay_sam_{image_stem + image_suffix}"`
`103`	`103`	`)`
`104`	`104`	`cv2.imwrite(probability_path, (probability * 255.0).astype(np.uint8))`
`105`	`105`	`cv2.imwrite(mask_path, mask)`
Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ def main():`
`91`	`91`	`path.absolute(), f"mask_sam2_{image_stem + image_suffix}"`
`92`	`92`	`)`
`93`	`93`	`overlay_path = os.path.join(`
`94`		`- path.absolute(), f"mask_overlay_sam2_{image_stem + image_suffix}"`
	`94`	`+ path.absolute(), f"overlay_sam2_{image_stem + image_suffix}"`
`95`	`95`	`)`
`96`	`96`	`cv2.imwrite(probability_path, (probability * 255.0).astype(np.uint8))`
`97`	`97`	`cv2.imwrite(mask_path, mask)`