Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ The camera swarm optimization can serve for finding an initial guess to [Monocul

```shell
rr-cam-swarm \
--collision-meshes \
--n-cameras 1000 \
--min-distance 0.5 \
--max-distance 3.0 \
Expand Down
92 changes: 26 additions & 66 deletions roboreg/cli/rr_cam_swarm.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
import argparse
import os
from typing import Tuple

import cv2
import numpy as np
import rich
import rich.progress
import torch

from roboreg import differentiable as rrd
from roboreg.io import find_files, parse_camera_info
from roboreg.io import find_files, parse_camera_info, parse_mono_data
from roboreg.losses import soft_dice_loss
from roboreg.optim import LinearParticleSwarm, ParticleSwarmOptimizer
from roboreg.util import (
look_at_from_angle,
mask_exponential_distance_transform,
mask_exponential_decay,
overlay_mask,
random_fov_eye_space_coordinates,
)
Expand Down Expand Up @@ -123,9 +120,9 @@ def args_factory() -> argparse.Namespace:
help="Scale the camera resolution by this factor. Reduces memory usage.",
)
parser.add_argument(
"--visual-meshes",
"--collision-meshes",
action="store_true",
help="If set, visual meshes will be used instead of collision meshes.",
help="If set, collision meshes will be used instead of visual meshes.",
)
parser.add_argument(
"--camera-info-file",
Expand Down Expand Up @@ -173,58 +170,6 @@ def args_factory() -> argparse.Namespace:
return parser.parse_args()


def parse_data(
path: str,
image_pattern: str,
mask_pattern: str,
joint_states_pattern: str,
n_samples: int = 5,
device: str = "cuda",
) -> Tuple[np.ndarray, torch.Tensor, torch.Tensor]:
image_files = find_files(path, image_pattern)
mask_files = find_files(path, mask_pattern)
joint_states_files = find_files(path, joint_states_pattern)

rich.print("Found the following files:")
rich.print(f"Images: {image_files}")
rich.print(f"Masks: {mask_files}")
rich.print(f"Joint states: {joint_states_files}")

# randomly sample n_samples
if n_samples > len(image_files):
n_samples = len(image_files)
random_indices = np.random.choice(len(image_files), n_samples, replace=False)
image_files = np.array(image_files)[random_indices].tolist()
mask_files = np.array(mask_files)[random_indices].tolist()
joint_states_files = np.array(joint_states_files)[random_indices].tolist()

rich.print(f"Randomly sampled the following {n_samples} files:")
rich.print(f"Images: {image_files}")
rich.print(f"Masks: {mask_files}")
rich.print(f"Joint states: {joint_states_files}")

if len(mask_files) != len(joint_states_files):
raise ValueError("Number of masks and joint states do not match.")

images = [
cv2.imread(os.path.join(path, file), cv2.IMREAD_COLOR) for file in image_files
]
masks = [
mask_exponential_distance_transform(
cv2.imread(os.path.join(path, file), cv2.IMREAD_GRAYSCALE)
)
for file in mask_files
]
joint_states = [np.load(os.path.join(path, file)) for file in joint_states_files]

masks = torch.tensor(np.array(masks), dtype=torch.float32, device=device)
joint_states = torch.tensor(
np.array(joint_states), dtype=torch.float32, device=device
)

return images, joint_states, masks


def instantiate_particles(
n_particles: int,
height: int,
Expand Down Expand Up @@ -293,15 +238,30 @@ def main() -> None:
height, width, intrinsics = parse_camera_info(
camera_info_file=args.camera_info_file
)
images, joint_states, masks = parse_data(
image_files = find_files(args.path, args.image_pattern)
mask_files = find_files(args.path, args.mask_pattern)
joint_states_files = find_files(args.path, args.joint_states_pattern)
n_samples = args.n_samples
if n_samples > len(image_files): # randomly sample n_samples
n_samples = len(image_files)
random_indices = np.random.choice(len(image_files), n_samples, replace=False)
image_files = np.array(image_files)[random_indices].tolist()
mask_files = np.array(mask_files)[random_indices].tolist()
joint_states_files = np.array(joint_states_files)[random_indices].tolist()
images, joint_states, masks = parse_mono_data(
path=args.path,
image_pattern=args.image_pattern,
mask_pattern=args.mask_pattern,
joint_states_pattern=args.joint_states_pattern,
n_samples=args.n_samples,
device=device,
image_files=image_files,
mask_files=mask_files,
joint_states_files=joint_states_files,
)

# pre-process data
joint_states = torch.tensor(
np.array(joint_states), dtype=torch.float32, device=device
)
n_joint_states = joint_states.shape[0]
masks = [mask_exponential_decay(mask) for mask in masks]
masks = torch.tensor(np.array(masks), dtype=torch.float32, device=device)

# scale image data (memory reduction)
height = int(height * args.scale)
Expand Down Expand Up @@ -348,7 +308,7 @@ def main() -> None:
urdf_parser=urdf_parser,
root_link_name=args.root_link_name,
end_link_name=args.end_link_name,
visual=args.visual_meshes,
collision=args.collision_meshes,
batch_size=batch_size,
device=device,
target_reduction=args.target_reduction, # reduce mesh vertex count for memory reduction
Expand Down
29 changes: 16 additions & 13 deletions roboreg/cli/rr_hydra.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from roboreg.differentiable import Robot
from roboreg.hydra_icp import hydra_centroid_alignment, hydra_robust_icp
from roboreg.io import URDFParser, parse_camera_info, parse_hydra_data
from roboreg.io import URDFParser, find_files, parse_camera_info, parse_hydra_data
from roboreg.util import (
RegistrationVisualizer,
clean_xyz,
Expand Down Expand Up @@ -74,9 +74,9 @@ def args_factory() -> argparse.Namespace:
help="End link name. If unspecified, the last link with mesh will be used, which may cause errors.",
)
parser.add_argument(
"--visual-meshes",
"--collision-meshes",
action="store_true",
help="If set, visual meshes will be used instead of collision meshes.",
help="If set, collision meshes will be used instead of visual meshes.",
)
parser.add_argument(
"--depth-conversion-factor",
Expand Down Expand Up @@ -151,11 +151,14 @@ def main():
device = "cuda" if torch.cuda.is_available() else "cpu"

# load data
joint_states_files = find_files(args.path, args.joint_states_pattern)
mask_files = find_files(args.path, args.mask_pattern)
depth_files = find_files(args.path, args.depth_pattern)
joint_states, masks, depths = parse_hydra_data(
path=args.path,
joint_states_pattern=args.joint_states_pattern,
mask_pattern=args.mask_pattern,
depth_pattern=args.depth_pattern,
joint_states_files=joint_states_files,
mask_files=mask_files,
depth_files=depth_files,
)
height, width, intrinsics = parse_camera_info(args.camera_info_file)

Expand All @@ -165,16 +168,16 @@ def main():
root_link_name = args.root_link_name
end_link_name = args.end_link_name
if root_link_name == "":
root_link_name = urdf_parser.link_names_with_meshes(visual=args.visual_meshes)[
0
]
root_link_name = urdf_parser.link_names_with_meshes(
collision=args.collision_meshes
)[0]
rich.print(
f"Root link name not provided. Using the first link with mesh: '{root_link_name}'."
)
if end_link_name == "":
end_link_name = urdf_parser.link_names_with_meshes(visual=args.visual_meshes)[
-1
]
end_link_name = urdf_parser.link_names_with_meshes(
collision=args.collision_meshes
)[-1]
rich.print(
f"End link name not provided. Using the last link with mesh: '{end_link_name}'."
)
Expand All @@ -185,7 +188,7 @@ def main():
urdf_parser=urdf_parser,
root_link_name=root_link_name,
end_link_name=end_link_name,
visual=args.visual_meshes,
collision=args.collision_meshes,
batch_size=batch_size,
)

Expand Down
91 changes: 27 additions & 64 deletions roboreg/cli/rr_mono_dr.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import argparse
import importlib
import os
from typing import Tuple

import cv2
import numpy as np
Expand All @@ -10,9 +9,8 @@
import rich.progress
import torch

from roboreg.io import find_files
from roboreg.losses import soft_dice_loss
from roboreg.util import mask_exponential_distance_transform, overlay_mask
from roboreg.io import find_files, parse_mono_data
from roboreg.util import mask_distance_transform, overlay_mask
from roboreg.util.factories import create_robot_scene, create_virtual_camera


Expand Down Expand Up @@ -50,12 +48,6 @@ def args_factory() -> argparse.Namespace:
default=1.0,
help="Gamma for the learning rate scheduler.",
)
parser.add_argument(
"--sigma",
type=float,
default=2.0,
help="Sigma for the exponential distance transform on target masks.",
)
parser.add_argument(
"--display-progress",
action="store_true",
Expand Down Expand Up @@ -86,9 +78,9 @@ def args_factory() -> argparse.Namespace:
help="End link name. If unspecified, the last link with mesh will be used, which may cause errors.",
)
parser.add_argument(
"--visual-meshes",
"--collision-meshes",
action="store_true",
help="If set, visual meshes will be used instead of collision meshes.",
help="If set, collision meshes will be used instead of visual meshes.",
)
parser.add_argument(
"--camera-info-file",
Expand Down Expand Up @@ -136,59 +128,30 @@ def args_factory() -> argparse.Namespace:
return parser.parse_args()


def parse_data(
path: str,
image_pattern: str,
joint_states_pattern: str,
mask_pattern: str,
sigma: float = 2.0,
device: str = "cuda",
) -> Tuple[np.ndarray, torch.FloatTensor, torch.FloatTensor]:
image_files = find_files(path, image_pattern)
joint_states_files = find_files(path, joint_states_pattern)
left_mask_files = find_files(path, mask_pattern)

rich.print("Found the following files:")
rich.print(f"Images: {image_files}")
rich.print(f"Joint states: {joint_states_files}")
rich.print(f"Masks: {left_mask_files}")

if len(image_files) != len(joint_states_files) or len(image_files) != len(
left_mask_files
):
raise ValueError("Number of images, joint states, masks do not match.")

images = [cv2.imread(os.path.join(path, file)) for file in image_files]
joint_states = [np.load(os.path.join(path, file)) for file in joint_states_files]
masks = [
mask_exponential_distance_transform(
cv2.imread(os.path.join(path, file), cv2.IMREAD_GRAYSCALE), sigma=sigma
)
for file in left_mask_files
]

images = np.array(images)
joint_states = torch.tensor(
np.array(joint_states), dtype=torch.float32, device=device
)
masks = torch.tensor(np.array(masks), dtype=torch.float32, device=device).unsqueeze(
-1
)
return images, joint_states, masks


def main() -> None:
args = args_factory()
device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["MAX_JOBS"] = str(args.max_jobs) # limit number of concurrent jobs
images, joint_states, masks = parse_data(

# load data
image_files = find_files(args.path, args.image_pattern)
joint_states_files = find_files(args.path, args.joint_states_pattern)
mask_files = find_files(args.path, args.mask_pattern)
images, joint_states, masks = parse_mono_data(
path=args.path,
image_pattern=args.image_pattern,
joint_states_pattern=args.joint_states_pattern,
mask_pattern=args.mask_pattern,
sigma=args.sigma,
device=device,
image_files=image_files,
joint_states_files=joint_states_files,
mask_files=mask_files,
)

# pre-process data
joint_states = torch.tensor(
np.array(joint_states), dtype=torch.float32, device=device
)
distance_maps = [mask_distance_transform(mask) for mask in masks]
distance_maps = torch.tensor(
np.array(distance_maps), dtype=torch.float32, device=device
).unsqueeze(-1)

# instantiate camera with default identity extrinsics because we optimize for robot pose instead
camera = {
Expand All @@ -205,7 +168,7 @@ def main() -> None:
xacro_path=args.xacro_path,
root_link_name=args.root_link_name,
end_link_name=args.end_link_name,
visual=args.visual_meshes,
collision=args.collision_meshes,
cameras=camera,
device=device,
)
Expand Down Expand Up @@ -241,7 +204,7 @@ def main() -> None:
renders = {
"camera": scene.observe_from("camera"),
}
loss = soft_dice_loss(renders["camera"], masks).mean()
loss = torch.nn.functional.mse_loss(distance_maps, renders["camera"])
optimizer.zero_grad()
loss.backward()
optimizer.step()
Expand All @@ -268,15 +231,15 @@ def main() -> None:
# difference left / right render / mask
difference = (
cv2.cvtColor(
np.abs(render - masks[0].squeeze().cpu().numpy()),
np.abs(render - masks[0].astype(np.float32) / 255.0),
cv2.COLOR_GRAY2BGR,
)
* 255.0
).astype(np.uint8)
# overlay segmentation mask
segmentation_overlay = overlay_mask(
image,
(masks[0].squeeze().cpu().numpy() * 255.0).astype(np.uint8),
masks[0],
mode="b",
scale=1.0,
)
Expand Down Expand Up @@ -305,7 +268,7 @@ def main() -> None:
for i, render in enumerate(renders):
render = render.squeeze().cpu().numpy()
overlay = overlay_mask(images[i], (render * 255.0).astype(np.uint8), scale=1.0)
difference = np.abs(render - masks[i].squeeze().cpu().numpy())
difference = np.abs(render - masks[i].astype(np.float32) / 255.0)

cv2.imwrite(os.path.join(args.path, f"dr_overlay_{i}.png"), overlay)
cv2.imwrite(
Expand Down
Loading
Loading