diff --git a/.hydra/config.yaml b/.hydra/config.yaml deleted file mode 100644 index 73f0cd9..0000000 --- a/.hydra/config.yaml +++ /dev/null @@ -1,27 +0,0 @@ -work_dir: ${hydra:runtime.cwd} -data_dir: ${work_dir}/data -print_config: true -type: inference -task_name: demo -num_leaf: 8 -suffix: '' -save_demo: false -save_wis3d: false -use_tracking: false -model: - onepose_model_path: ${work_dir}/data/models/checkpoints/onepose/GATsSPG.ckpt - extractor_model_path: ${work_dir}/data/models/extractors/SuperPoint/superpoint_v1.pth - match_model_path: ${work_dir}/data/models/matchers/SuperGlue/superglue_outdoor.pth -scan_data_dir: ${data_dir}/onepose_datasets/val_data -sfm_model_dir: ${data_dir}/sfm_model -network: - detection: superpoint - matching: superglue -max_num_kp3d: 2500 -input: - data_dirs: /Users/PELLERITO/Desktop/mixed_reality_code/OnePose/data/onepose_datasets/tiger - tiger-test - sfm_model_dirs: /Users/PELLERITO/Desktop/mixed_reality_code/OnePose/data/sfm_model/tiger -output: - vis_dir: ${work_dir}/runs/vis/demo - eval_dir: ${work_dir}/runs/eval/demo diff --git a/.hydra/hydra.yaml b/.hydra/hydra.yaml deleted file mode 100644 index 3f952ed..0000000 --- a/.hydra/hydra.yaml +++ /dev/null @@ -1,160 +0,0 @@ -hydra: - run: - dir: ${work_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][HYDRA] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: simple - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - loggers: - logging_example: - level: DEBUG - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: simple - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: [] - task: - - +experiment=test_demo - - use_tracking=False - - input.data_dirs=/Users/PELLERITO/Desktop/mixed_reality_code/OnePose/data/onepose_datasets/tiger - tiger-test - - input.sfm_model_dirs=/Users/PELLERITO/Desktop/mixed_reality_code/OnePose/data/sfm_model/tiger - job: - name: inference_demo - override_dirname: +experiment=test_demo,input.data_dirs=/Users/PELLERITO/Desktop/mixed_reality_code/OnePose/data/onepose_datasets/tiger - tiger-test,input.sfm_model_dirs=/Users/PELLERITO/Desktop/mixed_reality_code/OnePose/data/sfm_model/tiger,use_tracking=False - id: ??? - num: ??? - config_name: config.yaml - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.1.1 - cwd: /Users/PELLERITO/Desktop/mixed_reality_code/OnePose - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /Users/PELLERITO/Desktop/mixed_reality_code/OnePose/configs - schema: file - provider: main - - path: '' - schema: structured - provider: schema - choices: - experiment: test_demo - logger: null - callbacks: null - datamodule: null - model: null - trainer: null - hydra/env: default - hydra/callbacks: null - hydra/job_logging: default - hydra/hydra_logging: default - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/.hydra/overrides.yaml b/.hydra/overrides.yaml deleted file mode 100644 index 31687cf..0000000 --- a/.hydra/overrides.yaml +++ /dev/null @@ -1,5 +0,0 @@ -- +experiment=test_demo -- use_tracking=False -- input.data_dirs=/Users/PELLERITO/Desktop/mixed_reality_code/OnePose/data/onepose_datasets/tiger - tiger-test -- input.sfm_model_dirs=/Users/PELLERITO/Desktop/mixed_reality_code/OnePose/data/sfm_model/tiger diff --git a/configs/experiment/object_detector_2.yaml b/configs/experiment/object_detector_2.yaml index 1637ba2..acce094 100644 --- a/configs/experiment/object_detector_2.yaml +++ b/configs/experiment/object_detector_2.yaml @@ -18,14 +18,16 @@ sfm_model_dir: ${data_dir}/sfm_model input: data_dirs: - - ${scan_data_dir}/0606-tiger-others tiger-2 - - ${scan_data_dir}/0613-adidasshoeright-others adidasshoeright-2 - - ${scan_data_dir}/0601-loquat-box loquat-2 - - ${scan_data_dir}/0602-aficion-box aficion-2 + - ${scan_data_dir}/0620-dinosaurcup-bottle dinosaurcup-1 + # - ${scan_data_dir}/0606-tiger-others tiger-2 + # - ${scan_data_dir}/0613-adidasshoeright-others adidasshoeright-2 + # - ${scan_data_dir}/0601-loquat-box loquat-2 + # - ${scan_data_dir}/0602-aficion-box aficion-2 sfm_model_dirs: - - ${sfm_model_dir}/0606-tiger-others - - ${sfm_model_dir}/0613-adidasshoeright-others - - ${sfm_model_dir}/0601-loquat-box - - ${sfm_model_dir}/0602-aficion-box \ No newline at end of file + - ${scan_data_dir}/0620-dinosaurcup-bottle + # - ${sfm_model_dir}/0606-tiger-others + # - ${sfm_model_dir}/0613-adidasshoeright-others + # - ${sfm_model_dir}/0601-loquat-box + # - ${sfm_model_dir}/0602-aficion-box \ No newline at end of file diff --git a/configs/experiment/test_demo.yaml b/configs/experiment/test_demo.yaml index 5516888..4b37a64 100644 --- a/configs/experiment/test_demo.yaml +++ b/configs/experiment/test_demo.yaml @@ -13,7 +13,7 @@ model: extractor_model_path: ${work_dir}/data/models/extractors/SuperPoint/superpoint_v1.pth match_model_path: ${work_dir}/data/models/matchers/SuperGlue/superglue_outdoor.pth -scan_data_dir: ${data_dir}/onepose_datasets/val_data +scan_data_dir: ${work_dir}/data/costum_datasets/test sfm_model_dir: ${data_dir}/sfm_model network: @@ -24,10 +24,11 @@ max_num_kp3d: 2500 input: data_dirs: - - ${scan_data_dir}/0606-tiger-others tiger-1 - - ${scan_data_dir}/0609-doll-others doll-3 + - ${scan_data_dir}/demo_bottle_sfm bottle-1 + # - ${scan_data_dir}/0606-tiger-others tiger-1 + # - ${scan_data_dir}/0609-doll-others doll-3 sfm_model_dirs: - - ${sfm_model_dir}/0606-tiger-others + - ${sfm_model_dir}/demo_bottle_sfm output: vis_dir: ${work_dir}/runs/vis/demo eval_dir: ${work_dir}/runs/eval/demo \ No newline at end of file diff --git a/configs/preprocess/sfm_spp_spg_val.yaml b/configs/preprocess/sfm_spp_spg_val.yaml index ef0f599..eb7b93a 100644 --- a/configs/preprocess/sfm_spp_spg_val.yaml +++ b/configs/preprocess/sfm_spp_spg_val.yaml @@ -3,18 +3,20 @@ type: sfm work_dir: ${hydra:runtime.cwd} redo: True -hololens: True +hololens: False -# scan_data_dir: ${work_dir}/data/onepose_datasets/val_data -scan_data_dir: ${work_dir}/data/costum_datasets/test +scan_data_dir: ${work_dir}/data/onepose_datasets/val_data +# scan_data_dir: ${work_dir}/data/costum_datasets/test dataset: max_num_kp3d: 2500 max_num_kp2d: 1000 data_dir: - # - ${scan_data_dir}/0606-tiger-others tiger-2 - - ${scan_data_dir}/demo_capture demo-1 + - ${scan_data_dir}/0606-tiger-others tiger-1 + # - ${scan_data_dir}/0620-dinosaurcup-bottle dinosaurcup-1 + # - ${scan_data_dir}/demo_bottle_sfm bottle-1 + # - ${scan_data_dir}/demo_capture demo-1 outputs_dir: ${work_dir}/data/sfm_model/{} @@ -27,7 +29,7 @@ network: matching_model_path: ${work_dir}/data/models/matchers/SuperGlue/superglue_outdoor.pth sfm: - down_ratio: 5 + down_ratio: 3 covis_num: 10 rotation_thresh: 50 diff --git a/run.py b/run.py index 8b39ef2..92cdd0e 100644 --- a/run.py +++ b/run.py @@ -2,6 +2,9 @@ import os import glob import hydra +import torch +import numpy as np +import os import os.path as osp from loguru import logger @@ -9,8 +12,8 @@ from omegaconf import DictConfig from src.bbox_3D_estimation.utils import predict_3D_bboxes - -from src.bbox_3D_estimation.utils import predict_3D_bboxes +from src.utils.parse_scanned_data import parse_images +from src.utils import data_utils def merge_( @@ -110,6 +113,7 @@ def sfm(cfg): """Reconstruct and postprocess sparse object point cloud, and store point cloud features""" data_dirs = cfg.dataset.data_dir down_ratio = cfg.sfm.down_ratio + crop_images = True data_dirs = [data_dirs] if isinstance(data_dirs, str) else data_dirs for data_dir in data_dirs: @@ -117,46 +121,68 @@ def sfm(cfg): root_dir, sub_dirs = data_dir.split(" ")[0], data_dir.split(" ")[1:] # Parse image, intrinsics and poses directories: - img_paths, poses_paths, full_res_img_paths = [], [], [] - for sub_dir in sub_dirs: - seq_dir = osp.join(root_dir, sub_dir) - img_paths += glob.glob(str(Path(seq_dir)) + "/color/*.png", recursive=True) - full_res_img_paths += glob.glob(str(Path(seq_dir)) + "/color_full/*.png", recursive=True) - poses_paths += glob.glob(str(Path(seq_dir)) + "/poses/*.txt", recursive=True) - intrinsics_path = str(Path(seq_dir)) + "/intrinsics.txt" - poses_paths += glob.glob(str(Path(seq_dir)) + "/poses/*.txt", recursive=True) - intrinsics_path = str(Path(seq_dir)) + "/intrinsics.txt" - - # Choose less images from the list to build the sfm model + poses_paths, full_res_img_paths = [], [] + paths = {} + seq_dir = str(Path(osp.join(root_dir, sub_dirs[0]))) - down_img_lists = [] - for img_file in img_paths: - index = int(img_file.split("/")[-1].split(".")[0]) - if index % down_ratio == 0: - down_img_lists.append(img_file) + full_res_img_paths += glob.glob(seq_dir + "/color_full/*.png", recursive=True) + poses_paths += glob.glob(seq_dir + "/poses/*.txt", recursive=True) + intrinsics_path = seq_dir + "/intrinsics.txt" + + poses_paths.sort(key=lambda i: int(os.path.splitext(os.path.basename(i))[0])) + full_res_img_paths.sort(key=lambda i: int(os.path.splitext(os.path.basename(i))[0])) + + paths['final_intrin_file'] = intrinsics_path + paths['reproj_box_dir'] = seq_dir + "/reproj_box/" + paths['crop_img_root'] = seq_dir + "/color/" + paths['intrin_dir'] = seq_dir + "/intrin/" + paths['img_list'] = full_res_img_paths + paths['M_dir'] = seq_dir + "/modified_poses/" - if len(img_paths) == 0: - logger.info(f"No png image in {root_dir}") - continue obj_name = root_dir.split("/")[-1] outputs_dir_root = cfg.dataset.outputs_dir.format(obj_name) # Begin predict 3d bboxes - predict_3D_bboxes( - intrisics_path=intrinsics_path, - full_res_img_paths=full_res_img_paths, - poses_paths=poses_paths, - data_root=root_dir, - seq_dir = seq_dir, - compute_on_GPU="cuda", - step=1, - hololens=cfg.hololens - ) + if not os.path.exists(root_dir + "/box3d_corners.txt"): + predict_3D_bboxes( + intrinsics_path=intrinsics_path, + full_res_img_paths=full_res_img_paths, + poses_paths=poses_paths, + data_root=root_dir, + seq_dir=seq_dir, + device="cuda" if torch.cuda.is_available() else "cpu", + step=1, + hololens=cfg.hololens, + root_2d_bbox=paths['reproj_box_dir'], + ) + + # Crop images and save them if you have MINIMAL folder structure + if crop_images: + img_paths = parse_images(paths, downsample_rate=1, hw=512) + else: + img_paths = glob.glob(str(Path(seq_dir)) + "/color/*.png", recursive=True) + if not os.path.exists(paths['intrin_dir']): + os.makedirs(paths['intrin_dir']) + K, _ = data_utils.get_K(intrinsics_path) + for index, _ in enumerate(img_paths): + np.savetxt(paths['intrin_dir'] + f"{index}.txt", K) + if len(img_paths) == 0: + logger.info(f"No png image in {root_dir}") + continue + + img_paths.sort(key=lambda i: int(os.path.splitext(os.path.basename(i))[0])) + # Choose less images from the list, to build the sfm model + down_img_lists = [] + for img_file in img_paths: + index = int(img_file.split("/")[-1].split(".")[0]) + if index % down_ratio == 0: + down_img_lists.append(img_file) + down_img_lists = sorted(down_img_lists, key=lambda i: int(os.path.splitext(os.path.basename(i))[0])) # Begin SfM and postprocess: sfm_core(cfg, down_img_lists, outputs_dir_root) - postprocess(cfg, down_img_lists, root_dir, outputs_dir_root) + postprocess(cfg, down_img_lists, root_dir, outputs_dir_root, filter_with_3d_bbox=True) def sfm_core(cfg, img_lists, outputs_dir_root): @@ -197,7 +223,7 @@ def sfm_core(cfg, img_lists, outputs_dir_root): ) # Reconstruct 3D point cloud with known image poses: - generate_empty.generate_model(img_lists, empty_dir) + generate_empty.generate_model(img_lists, empty_dir, do_ba=True) triangulation.main( deep_sfm_dir, empty_dir, @@ -206,17 +232,14 @@ def sfm_core(cfg, img_lists, outputs_dir_root): feature_out, matches_out, image_dir=None, - skip_geometric_verification=True, + skip_geometric_verification=False, ) -def postprocess(cfg, img_lists, root_dir, outputs_dir_root): +def postprocess(cfg, img_lists, root_dir, outputs_dir_root, filter_with_3d_bbox=False): """Filter points and average feature""" from src.sfm.postprocess import filter_points, feature_process, filter_tkl - # Probably here is where they use the first box - bbox_path = osp.join(root_dir, "box3d_corners.txt") - # Construct output directory structure: outputs_dir = osp.join( outputs_dir_root, @@ -234,8 +257,12 @@ def postprocess(cfg, img_lists, root_dir, outputs_dir_root): model_path, points_count_list, track_length, outputs_dir ) # For visualization only + # Probably here is where they use the first box + bbox_path = osp.join(root_dir, "box3d_corners.txt") + # Leverage the selected feature track length threshold and 3D BBox to filter 3D points: - xyzs, points_idxs = filter_points.filter_3d(model_path, track_length, bbox_path) + mask_filtering = not filter_with_3d_bbox + xyzs, points_idxs = filter_points.filter_3d(model_path, track_length, bbox_path, mask_filtering=mask_filtering) # Merge 3d points by distance between points merge_xyzs, merge_idxs = filter_points.merge(xyzs, points_idxs, dist_threshold=1e-3) diff --git a/shift_poses.py b/shift_poses.py index c0978f2..f7e8a4c 100644 --- a/shift_poses.py +++ b/shift_poses.py @@ -3,10 +3,16 @@ import numpy as np from src.bbox_3D_estimation.utils import read_list_poses_orig import re +from scipy.spatial.transform import Rotation as R + + + +r = R.from_quat([ 0, -0.4871745, 0, -0.8733046 ]) +# print(r.as_matrix()) regex = re.compile('[^0-9]') -DIR = "data/onepose_datasets/val_data/0606-tiger-others/tiger-2/" +DIR = "data/onepose_datasets/val_data/0620-dinosaurcup-bottle/dinosaurcup-1/" poses = sorted(glob.glob(f"{DIR}backup/poses/*.txt")) names = [] @@ -15,13 +21,60 @@ names.append(name) poses = read_list_poses_orig(poses) +M = np.empty((4, 4)) +# M[:3, :3] = np.array([[ 1.0000000, 0.0000000, 0.0000000], +# [ 0.0000000, 1.0000000, 0.0000000], +# [ 0.0000000, 0.0000000, 1.0000000 ]]) +M[:3, :3] = np.eye(3) # r.as_matrix() +M[:3, 3] = np.array([0, 0, 0]) +M[3, :] = [0, 0, 0, 1] + + +def ruf_to_flu(pose): + rotquat_ruf = R.from_matrix(pose[:3,:3]).as_quat() + rotquat_flu = np.array([-rotquat_ruf[2], rotquat_ruf[0], -rotquat_ruf[1], rotquat_ruf[3]]) + rotmat_flu = R.from_quat(rotquat_flu).as_matrix() + + transl_ruf = pose[:3, 3] + transl_flu = np.array([transl_ruf[2], -transl_ruf[0], transl_ruf[1]]) + + new_pose = np.eye(4) + new_pose[:3,:3] = rotmat_flu @ np.array([0,0,1,0,1,0,-1,0,0]).reshape((3,3)) + new_pose[:3, 3] = transl_flu + return new_pose shifted_poses = [] for pose in poses: - inverted = np.linalg.inv(pose) - inverted[0:3,3] += np.array([+5, -10, +10]) - original = np.linalg.inv(inverted) + + + pose = np.linalg.inv(pose) + # pose = np.dot(M, pose) + + # Extract rotation matrix and translation vector from left-handed pose T + # R = pose[:3, :3] + # t = pose[:3, 3] + + # # Negate last element of translation vector + # t[0] = t[2] + # t[2] = t[0] + + # # Convert the pose to a right-handed coordinate system + # R_right = np.array([[R[0,2], R[0,1], R[0,0]], + # [R[1,2], R[1,1], R[1,0]], + # [R[2,2], R[2,1], R[2,0]]]) + + # # Construct right-handed pose T' + # T_prime = np.eye(4) + # T_prime[:3, :3] = R_right + # T_prime[:3, 3] = t + + + # inverted = T_prime #np.dot(T_prime, pose) + # T_prime = pose + # pose = ruf_to_flu(pose) + + original = np.linalg.inv(pose) shifted_poses.append(original) diff --git a/src/bbox_3D_estimation/main.py b/src/bbox_3D_estimation/main.py index 54ea11a..897193b 100644 --- a/src/bbox_3D_estimation/main.py +++ b/src/bbox_3D_estimation/main.py @@ -32,6 +32,6 @@ BboxPredictor=BboxPredictor, full_res_img_paths=img_lists, poses_paths=poses_list, - intrisics_path=K, + intrinsics_path=K, data_root=data_root, ) diff --git a/src/bbox_3D_estimation/plotting.py b/src/bbox_3D_estimation/plotting.py index 2b2ff21..26bedf3 100644 --- a/src/bbox_3D_estimation/plotting.py +++ b/src/bbox_3D_estimation/plotting.py @@ -152,7 +152,7 @@ def plot_camera(M, figure_axes): The base of the pyramid points in the positive Z axis direction. """ # Compute the points for the camera at the origin, aligned with the axes. - base_width = 0.20 # Width (and height) of the pyramid base in meters + base_width = 0.05 # Width (and height) of the pyramid base in meters x = np.array([0, 0, 0, 0, 0, 1, 1, -1, -1, 1]) * base_width / 2 y = np.array([0, 0, 0, 0, 0, 1, -1, -1, 1, 1]) * base_width / 2 z = np.array([0, 0, 0, 0, 0, 2, 2, 2, 2, 2]) * base_width / 2 @@ -180,14 +180,15 @@ def plot_camera(M, figure_axes): y = points[1, :].reshape(2, 5) z = points[2, :].reshape(2, 5) - # figure_axes.plot_wireframe( - # x, y, z, rstride=1, cstride=1, color=[0, 0, 0], linewidth=0.5 - # ) - figure_axes.scatter(t[0], t[1], t[2]) + figure_axes.plot_wireframe( + x, y, z, rstride=1, cstride=1, color=[0, 0, 0], linewidth=0.5 + ) + # figure_axes.scatter(t[0], t[1], t[2]) + # figure_axes.scatter(0, 0, 0) def plot_3D_scene( - estQs, gtQs, Ms_t, dataset, save_output_images, points, GT_points, visibility=None + estQs, gtQs, Ms_t, dataset, save_output_images, points, orig_points, GT_points, visibility=None ): """Plot""" fig = plt.figure(figsize=(8, 8)) # Open a new figure. @@ -259,6 +260,26 @@ def plot_3D_scene( ) figure_axes.scatter(GT_points[:, 0], GT_points[:, 1], GT_points[:, 2]) + + Z = orig_points + # list of sides' polygons of figure + verts = [ + [Z[0], Z[1], Z[3], Z[2]], + [Z[4], Z[5], Z[7], Z[6]], + [Z[2], Z[3], Z[7], Z[6]], + [Z[0], Z[1], Z[5], Z[4]], + [Z[0], Z[2], Z[6], Z[4]], + [Z[1], Z[3], Z[7], Z[5]], + ] + + # plot sides + figure_axes.add_collection3d( + Poly3DCollection( + verts, facecolors="blue", linewidths=1, edgecolors="r", alpha=0.25 + ) + ) + + figure_axes.scatter(orig_points[:, 0], orig_points[:, 1], orig_points[:, 2]) fig.show() diff --git a/src/bbox_3D_estimation/utils.py b/src/bbox_3D_estimation/utils.py index 196aa3c..be157e0 100644 --- a/src/bbox_3D_estimation/utils.py +++ b/src/bbox_3D_estimation/utils.py @@ -34,7 +34,7 @@ def add_view(self, bbox_t: np.ndarray, pose_t: np.ndarray, poses_orig: list): self.poses_list.append(poses_orig) - def detect_3D_box(self): + def detect_3D_box(self, plot_3dbbox=False): object_idx = 0 selected_frames = self.bboxes.shape[0] self.visibility = np.ones((selected_frames, 1)) @@ -49,40 +49,27 @@ def detect_3D_box(self): points = np.array(list(itertools.product(*zip(mins, maxs)))) # Points in the camera frame - # points = np.dot(points, R.T) + orig_points = np.dot(points, R.T) - # Shift correctly the parralelepiped (we want it centered in the origin) - # points[:, 0:3] = np.add(centre[None, :], points[:, :3]) + # Shift correctly the parralelepiped + orig_points[:, 0:3] = np.add(centre[None, :], orig_points[:, :3],) self.axes = axes self.points = points + self.orig_points = orig_points self.centre = centre self.R = R + self.estQs = estQs # Transformation to have coordinates centered in the bounding box (and aligned with it) M = np.empty((4, 4)) - M[:3, :3] = R - M[:3, 3] = centre + M[:3, :3] = R # np.eye(3) # R + M[:3, 3] = centre # [0, 0, 0] # centre M[3, :] = [0, 0, 0, 1] - # print(M) self.M = np.linalg.inv(M) - # gt_p = np.loadtxt(f"data/onepose_datasets/val_data/0606-tiger-others/box3d_corners_GT.txt") - - # plot_3D_scene( - # estQs=estQs, - # gtQs=gt_p, - # Ms_t=self.poses, - # dataset="tiger", - # save_output_images=False, - # points=points, - # GT_points=gt_p - # ) - # plt.show() - - def save_3D_box(self, data_root): np.savetxt(data_root + "/box3d_corners.txt", self.points, delimiter=" ") @@ -95,9 +82,11 @@ def shift_centres(self): shifted_poses.append(original) self.shifted_poses = shifted_poses - def save_poses(self, seq_dir): + def save_poses(self, seq_dir, hololens): """Saves poses in the OnePose format (which is inverted respect to the Hololens format)""" - shift_pose_dir = f"{seq_dir}/poses_shifted/" + shift_pose_dir = f"{seq_dir}/poses/" + if hololens: + shift_pose_dir = f"{seq_dir}/poses/" # Overwrite poses os.makedirs(shift_pose_dir, exist_ok=True) for idx, pose in enumerate(self.shifted_poses): np.savetxt(f"{shift_pose_dir}{idx}.txt", pose, delimiter=" ") @@ -105,24 +94,65 @@ def save_poses(self, seq_dir): def save_dimensions(self, data_root): np.savetxt(data_root + "/box3d_dimensions.txt", self.axes, delimiter=" ") + def plot_3D_bb(self, poses, GT_points=None): + plot_3D_scene( + estQs=self.estQs, + gtQs=None, + Ms_t=poses, + dataset="tiger", + save_output_images=False, + points=self.points, + orig_points=self.orig_points, + GT_points=GT_points + ) + plt.show() + + def iou(self, coords2): + + # Predicted parallelepiped + coords1 = self.points + + # Find the min and max x, y, and z coordinates for each parallelepiped + min_coords1 = np.min(coords1, axis=0) + max_coords1 = np.max(coords1, axis=0) + min_coords2 = np.min(coords2, axis=0) + max_coords2 = np.max(coords2, axis=0) + + # Find the intersection of the two parallelepipeds + intersection_min = np.max([min_coords1, min_coords2], axis=0) + intersection_max = np.min([max_coords1, max_coords2], axis=0) + + # Find the volume of the intersection + intersection_volume = np.prod(intersection_max - intersection_min) + + # Find the volume of each parallelepiped + volume1 = np.prod(max_coords1 - min_coords1) + volume2 = np.prod(max_coords2 - min_coords2) + + # Calculate the intersection over union (IoU) + iou = intersection_volume / (volume1 + volume2 - intersection_volume) + + return iou + def predict_3D_bboxes( full_res_img_paths, - intrisics_path, + intrinsics_path, poses_paths, data_root, seq_dir, step=1, downscale_factor=0.3, - compute_on_GPU="cpu", + device="cpu", + root_2d_bbox=None, hololens=False ): full_res_img_paths = sort_path_list(full_res_img_paths) poses_paths = sort_path_list(poses_paths) - _K, _ = data_utils.get_K(intrisics_path) + _K, _ = data_utils.get_K(intrinsics_path) DetectorBox3D = Detector3D(_K) - BboxPredictor = UnsupBbox(downscale_factor=downscale_factor, device=compute_on_GPU) + BboxPredictor = UnsupBbox(downscale_factor=downscale_factor, device=device) for id, img_path in enumerate(tqdm(full_res_img_paths)): if id % step == 0 or id == 0: @@ -132,13 +162,31 @@ def predict_3D_bboxes( poses_orig = read_list_poses_orig([poses_paths[id]]) bbox_orig_res = BboxPredictor.infer_2d_bbox(image=image, K=_K) - DetectorBox3D.add_view(bbox_orig_res, poses, poses_orig) + if root_2d_bbox is not None: + if not os.path.exists(root_2d_bbox): + os.makedirs(root_2d_bbox) + BboxPredictor.save_2d_bbox(file_path = root_2d_bbox + f"{id}.txt") + DetectorBox3D.add_view(bbox_t=bbox_orig_res, pose_t=poses, poses_orig=poses_orig) DetectorBox3D.detect_3D_box() DetectorBox3D.save_3D_box(data_root) DetectorBox3D.shift_centres() - DetectorBox3D.save_poses(seq_dir) + DetectorBox3D.save_poses(seq_dir, hololens) DetectorBox3D.save_dimensions(data_root) + poses_t=None + for id, img_path in enumerate(tqdm(full_res_img_paths)): + poses = read_list_poses([poses_paths[id]], hololens=False) + if poses_t is None: + poses_t = poses + else: + poses_t = np.vstack((poses_t, poses)) + + GT_points = np.loadtxt(data_root + "/box3d_corners_GT.txt") + + iou = DetectorBox3D.iou(GT_points) + print("IoU is: ", iou) + print("Predicted transformation is: \n", np.linalg.inv(DetectorBox3D.M)) + DetectorBox3D.plot_3D_bb(poses_t, GT_points=GT_points) def sort_path_list(path_list): @@ -151,7 +199,8 @@ def read_list_poses(list, hololens=False): for idx, file_path in enumerate(list): with open(file_path) as f_input: if hololens: - pose = np.transpose(np.linalg.inv(np.loadtxt(f_input))[:3, :]) # TODO poses are inverted when from hololens + # TODO poses are inverted when from hololens + pose = np.transpose(np.linalg.inv(np.loadtxt(f_input))[:3, :]) else: pose = np.transpose(np.loadtxt(f_input)[:3, :]) if idx == 0: @@ -165,7 +214,7 @@ def read_list_poses_orig(list): poses = [] for idx, file_path in enumerate(list): with open(file_path) as f_input: - poses.append(np.linalg.inv(np.loadtxt(f_input))) + poses.append(np.loadtxt(f_input)) return poses diff --git a/src/deep_spectral_method/detection_2D_utils.py b/src/deep_spectral_method/detection_2D_utils.py index 3a11de4..cc5426c 100644 --- a/src/deep_spectral_method/detection_2D_utils.py +++ b/src/deep_spectral_method/detection_2D_utils.py @@ -63,7 +63,10 @@ def infer_2d_bbox(self, image, K): # Bounding boxes bbox = extract.extract_bboxes(feature_dict=feature_dict, segmap=segmap) - bbox_orig_res = ( + self.bbox_orig_res = ( np.array(bbox["bboxes_original_resolution"][0]) / self.downscale_factor ) - return bbox_orig_res + return self.bbox_orig_res + + def save_2d_bbox(self, file_path): + np.savetxt(file_path, self.bbox_orig_res, delimiter=" ") diff --git a/scripts/parse_scanned_data.py b/src/utils/parse_scanned_data.py similarity index 83% rename from scripts/parse_scanned_data.py rename to src/utils/parse_scanned_data.py index 653e2fa..d469ca9 100644 --- a/scripts/parse_scanned_data.py +++ b/src/utils/parse_scanned_data.py @@ -5,8 +5,10 @@ import os.path as osp import argparse from pathlib import Path +from tqdm import tqdm from transforms3d import affines, quaternions from src.utils import data_utils +from src.bbox_3D_estimation.utils import sort_path_list def get_arkit_default_path(data_dir, AR_annotations=True): @@ -188,10 +190,59 @@ def parse_video(paths, downsample_rate=5, bbox_3d_homo=None, hw=512, AR_annotati index += 1 cap.release() +def parse_images(paths, downsample_rate=5, hw=512, save_rotations=False): + orig_intrin_file = paths['final_intrin_file'] + crop_img_root = paths['crop_img_root'] + intrin_dir = paths['intrin_dir'] + img_list = sort_path_list(paths['img_list']) -def data_process_anno(data_dir, downsample_rate=1, hw=512, AR_annotations=True): + for key in paths.keys(): + if not isinstance(paths[key], list) and not os.path.exists(paths[key]): + os.makedirs(paths[key]) - paths = get_arkit_default_path(data_dir, AR_annotations) + K, _ = data_utils.get_K(orig_intrin_file) + crop_img_paths = [] + for index, img_path in enumerate(tqdm(img_list)): + image = cv2.imread(str(img_path)) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + if index % downsample_rate != 0: + continue + + save_intrin_path = osp.join(intrin_dir, '{}.txt'.format(index)) + reproj_box3d_file = osp.join(paths['reproj_box_dir'], '{}.txt'.format(index)) + + if not osp.isfile(reproj_box3d_file): + continue + + reproj_box3d = np.loadtxt(osp.join(paths['reproj_box_dir'], '{}.txt'.format(index))).astype(int) + x0, y0 = reproj_box3d[0], reproj_box3d[1] + x1, y1 = reproj_box3d[2], reproj_box3d[3] + + box = np.array([x0, y0, x1, y1]) + resize_shape = np.array([y1 - y0, x1 - x0]) + K_crop, K_crop_homo = data_utils.get_K_crop_resize(box, K, resize_shape) + image_crop, trans1 = data_utils.get_image_crop_resize(image, box, resize_shape) + + box_new = np.array([0, 0, x1-x0, y1-y0]) + resize_shape = np.array([hw, hw]) + K_crop, K_crop_homo = data_utils.get_K_crop_resize(box_new, K_crop, resize_shape) + image_crop, trans2 = data_utils.get_image_crop_resize(image_crop, box_new, resize_shape) + + crop_img_file = crop_img_root + f"{index}.png" + crop_img_paths.append(crop_img_file) + + if save_rotations: + trans_full_to_crop = trans2 @ trans1 + trans_crop_to_full = np.linalg.inv(trans_full_to_crop) + np.savetxt(osp.join(paths['M_dir'], '{}.txt'.format(index)), trans_crop_to_full) + + np.savetxt(save_intrin_path, K_crop) + cv2.imwrite(crop_img_file, cv2.cvtColor(image_crop, cv2.COLOR_RGB2BGR)) + + return crop_img_paths + +def rename_intrinsics_file(paths): with open(paths['orig_intrin_file'], 'r') as f: lines = [l.strip() for l in f.readlines() if len(l) > 0 and l[0] != '#'] eles = [[float(e) for e in l.split(',')] for l in lines] @@ -200,6 +251,13 @@ def data_process_anno(data_dir, downsample_rate=1, hw=512, AR_annotations=True): with open(paths['final_intrin_file'], 'w') as f: f.write('fx: {0}\nfy: {1}\ncx: {2}\ncy: {3}'.format(fx, fy, cx, cy)) + return fx, fy, cx, cy + +def data_process_anno(data_dir, downsample_rate=1, hw=512, AR_annotations=True): + + paths = get_arkit_default_path(data_dir, AR_annotations) + fx, fy, cx, cy = rename_intrinsics_file(paths) + K_homo = np.array([ [fx, 0, cx, 0], [0, fy, cy, 0],