Sparse point cloud support in visualization and sai-cli process (#5)

oseiskar · web-flow · commit f2985c62cebb · 2023-12-20T15:24:55.000+02:00
* Sparse point cloud visualization
* Use sparse map points through Mapping API and add colors to them in mono mode
diff --git a/python/cli/process/process.py b/python/cli/process/process.py
@@ -35,11 +35,12 @@ def process(args):
     import shutil
     import numpy as np
     import pandas as pd
-    from scipy.spatial import KDTree
+    from collections import OrderedDict
 
     useMono = None
 
     def interpolate_missing_properties(df_source, df_query, k_nearest=3):
+        from scipy.spatial import KDTree
         xyz = list('xyz')
 
         tree = KDTree(df_source[xyz].values)
@@ -57,6 +58,7 @@ def interpolate_missing_properties(df_source, df_query, k_nearest=3):
         return df_result
 
     def exclude_points(df_source, df_exclude, radius):
+        from scipy.spatial import KDTree
         xyz = list('xyz')
         tree = KDTree(df_exclude[xyz].values)
         ii = tree.query_ball_point(df_source[xyz], r=radius, return_length=True)
@@ -117,12 +119,13 @@ def transform_camera(c):
         return value
 
     # TODO: don't use "Taichi" as the intermediate format
-    def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True):
+    def convert_json_taichi_to_colmap(pose_data, points_df, sparse_observations, nerfstudio_fake_obs=True):
         from scipy.spatial.transform import Rotation as R
 
         images = []
         cameras = []
         camera_id = 0
+        max_pt_id = 0
         for image_id, c in enumerate(pose_data):
             k = c['camera_intrinsics']
             mat = np.linalg.inv(np.array(c['T_pointcloud_camera']))
@@ -132,7 +135,11 @@ def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True
             images.append([image_id] + list(q) + list(p) + [camera_id, os.path.split(c['image_path'])[-1]])
 
             points = []
-            if nerfstudio_fake_obs:
+            for pt in sparse_observations.get(image_id, {}):
+                max_pt_id = max(max_pt_id, pt.id)
+                points.extend([pt.pixelCoordinates.x, pt.pixelCoordinates.y, pt.id])
+
+            if nerfstudio_fake_obs and len(points) == 0:
                 points = [100,100,0,200,200,1] # NeRFstudio loader will crash without this
 
             images.append(points)
@@ -151,9 +158,18 @@ def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True
                 ]]
 
         points = []
-        for point_id, row in points_df.iterrows():
+        for _, row in points_df.iterrows():
+            if 'id' in row:
+                point_id = row['id']
+            else:
+                point_id = 0
+
+            if point_id == 0:
+                point_id = max_pt_id + 1
+                max_pt_id += 1
+
             point = [
-                point_id,
+                int(point_id),
                 row['x'],
                 row['y'],
                 row['z'],
@@ -162,6 +178,7 @@ def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True
                 round(row['b'])
             ]
 
+            # TODO: compute reprojection errors here if really necessary for some use case
             if nerfstudio_fake_obs:
                 fake_err = 1
                 img_id, point_id = 0, 0
@@ -174,6 +191,7 @@ def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True
     # Globals
     savedKeyFrames = {}
     pointClouds = {}
+    sparsePointColors = {}
     frameWidth = -1
     frameHeight = -1
     intrinsics = None
@@ -190,19 +208,21 @@ def blurScore(path):
     def post_process_point_clouds(globalPointCloud, sparse_point_cloud_df):
         # Save point clouds
         if len(globalPointCloud) == 0:
-            # add fake (gray) colors
             merged_df = sparse_point_cloud_df
-            for c in 'rgb': merged_df[c] = 128
 
         else:
             point_cloud_df = pd.DataFrame(np.array(globalPointCloud), columns=list('xyzrgb'))
 
             # drop uncolored points
             colored_point_cloud_df = point_cloud_df.loc[point_cloud_df[list('rgb')].max(axis=1) > 0].reset_index()
+            colored_point_cloud_df['id'] = 0 # ID = 0 is not used for valid sparse map points
 
             filtered_point_cloud_df = exclude_points(colored_point_cloud_df, sparse_point_cloud_df, radius=args.cell_size)
             decimated_df = voxel_decimate(filtered_point_cloud_df, args.cell_size)
-            sparse_colored_point_cloud_df = interpolate_missing_properties(colored_point_cloud_df, sparse_point_cloud_df)
+
+            # the dense points clouds have presumably more stable colors at corner points
+            # rather use them than using the same approach as without dense data
+            sparse_colored_point_cloud_df = interpolate_missing_properties(colored_point_cloud_df, sparse_point_cloud_df[list('xyz')])
             merged_df = pd.concat([sparse_colored_point_cloud_df, decimated_df])
 
         if args.distance_quantile > 0:
@@ -222,6 +242,7 @@ def onVioOutput(vioOutput):
     def onMappingOutput(output):
         nonlocal savedKeyFrames
         nonlocal pointClouds
+        nonlocal sparsePointColors
         nonlocal frameWidth
         nonlocal frameHeight
         nonlocal intrinsics
@@ -255,10 +276,23 @@ def onMappingOutput(output):
                 undistortedFrame = frameSet.getUndistortedFrame(targetFrame)
                 if intrinsics is None: intrinsics = undistortedFrame.cameraPose.camera.getIntrinsicMatrix()
                 img = undistortedFrame.image.toArray()
+
                 bgrImage = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                 fileName = f"{args.output}/tmp/frame_{frameId:05}.{args.image_format}"
                 cv2.imwrite(fileName, bgrImage)
 
+                # Find colors for sparse features
+                SHOW_FEATURE_MARKERS = True
+                for mpObs in undistortedFrame.sparseFeatures:
+                    if mpObs.id not in sparsePointColors:
+                        px = np.clip(round(mpObs.pixelCoordinates.x), 0, img.shape[1]-1)
+                        py = np.clip(round(mpObs.pixelCoordinates.y), 0, img.shape[0]-1)
+                        rgb = list(img[py, px, ...].view(np.uint8))
+                        sparsePointColors[mpObs.id] = rgb
+                        if args.preview and SHOW_FEATURE_MARKERS:
+                            MARKER_COLOR = (0, 255, 0)
+                            cv2.circle(bgrImage, (px, py), 5, MARKER_COLOR, thickness=1)
+
                 # Legacy: support SDK versions which also produced images where frameSet.depthFrame.image was None
                 if frameSet.depthFrame is not None and frameSet.depthFrame.image is not None and not useMono:
                     alignedDepth = frameSet.getAlignedDepthFrame(undistortedFrame)
@@ -280,105 +314,124 @@ def onMappingOutput(output):
 
         else:
             # Final optimized poses
-            try:
-                blurryImages = {}
-                imageSharpness = []
-                for frameId in output.map.keyFrames:
-                    imageSharpness.append((frameId, blurScore(f"{args.output}/tmp/frame_{frameId:05}.{args.image_format}")))
-
-                # Look two images forward and two backwards, if current frame is blurriest, don't use it
-                for i in range(len(imageSharpness)):
-                    if i + 2 > len(imageSharpness): break
-                    group = [imageSharpness[j+i] for j in range(-2,2)]
-                    group.sort(key=lambda x : x[1])
-                    cur = imageSharpness[i][0]
-                    if group[0][0] == cur:
-                        blurryImages[cur] = True
-
-                trainingFrames = []
-                validationFrames = []
-                globalPointCloud = []
-                index = 0
-                name = os.path.split(args.output)[-1]
-                for frameId in output.map.keyFrames:
-                    if blurryImages.get(frameId): continue # Skip blurry images
-
-                    # Image data
-                    keyFrame = output.map.keyFrames.get(frameId)
-
-                    targetFrame = keyFrame.frameSet.rgbFrame
-                    if not targetFrame: targetFrame = keyFrame.frameSet.primaryFrame
-                    cameraPose = targetFrame.cameraPose
-
-                    # Camera data
-                    frame = {
-                        "image_path": f"data/{name}/images/frame_{index:05}.{args.image_format}",
-                        "T_pointcloud_camera": cameraPose.getCameraToWorldMatrix().tolist(), # 4x4 matrix, the transformation matrix from camera coordinate to point cloud coordinate
-                        "camera_intrinsics": intrinsics.tolist(), # 3x3 matrix, the camera intrinsics matrix K
-                        "camera_height": frameHeight, # image height, in pixel
-                        "camera_width": frameWidth, # image width, in pixel
-                        "camera_id": index # camera id, not used
-                    }
+            blurryImages = {}
+            sparseObservations = {}
+            # OrderedDict to avoid undefined iteration order = different output files for the same input
+            sparsePointCloud = OrderedDict()
+            imageSharpness = []
+            for frameId in output.map.keyFrames:
+                imageSharpness.append((frameId, blurScore(f"{args.output}/tmp/frame_{frameId:05}.{args.image_format}")))
+
+            # Look two images forward and two backwards, if current frame is blurriest, don't use it
+            for i in range(len(imageSharpness)):
+                if i + 2 > len(imageSharpness): break
+                group = [imageSharpness[j+i] for j in range(-2,2)]
+                group.sort(key=lambda x : x[1])
+                cur = imageSharpness[i][0]
+                if group[0][0] == cur:
+                    blurryImages[cur] = True
+
+            trainingFrames = []
+            validationFrames = []
+            globalPointCloud = []
+            index = 0
+            name = os.path.split(args.output)[-1]
+            for frameId in output.map.keyFrames:
+                if blurryImages.get(frameId): continue # Skip blurry images
+
+                # Image and pose data
+                keyFrame = output.map.keyFrames.get(frameId)
 
-                    oldImgName = f"{args.output}/tmp/frame_{frameId:05}.{args.image_format}"
-                    newImgName = f"{args.output}/images/frame_{index:05}.{args.image_format}"
-                    os.rename(oldImgName, newImgName)
-
-                    oldDepth = f"{args.output}/tmp/depth_{frameId:05}.png"
-                    newDepth = f"{args.output}/images/depth_{index:05}.png"
-                    if os.path.exists(oldDepth):
-                        os.rename(oldDepth, newDepth)
-                        frame['depth_image_path'] = f"data/{name}/images/depth_{index:05}.png"
-
-                    if (index + 3) % 7 == 0:
-                        validationFrames.append(frame)
-                    else:
-                        trainingFrames.append(frame)
-
-                    if frameId in pointClouds:
-                        # Pointcloud data
-                        posData, colorData = pointClouds[frameId]
-                        pc = np.vstack((posData.T, np.ones((1, posData.shape[0]))))
-                        pc = (cameraPose.getCameraToWorldMatrix() @ pc)[:3, :].T
-                        pc = np.hstack((pc, colorData))
-                        globalPointCloud.extend(pc)
-
-                    index += 1
-
-                merged_df = post_process_point_clouds(
-                    globalPointCloud,
-                    pd.read_csv(f"{args.output}/points.sparse.csv", usecols=list('xyz')))
-
-                if args.format == 'taichi':
-                    # merged_df.to_csv(f"{args.output}/points.merged-decimated.csv", index=False)
-                    merged_df.to_parquet(f"{args.output}/point_cloud.parquet")
-
-                    with open(f"{args.output}/train.json", "w") as outFile:
-                        json.dump(trainingFrames, outFile, indent=2, sort_keys=True)
-
-                    with open(f"{args.output}/val.json", "w") as outFile:
-                        json.dump(validationFrames, outFile, indent=2, sort_keys=True)
-                elif args.format == 'nerfstudio':
-                    allFrames = trainingFrames + validationFrames
-                    with open(f"{args.output}/transforms.json", "w") as outFile:
-                        json.dump(convert_json_taichi_to_nerfstudio(allFrames), outFile, indent=2, sort_keys=True)
-
-                    # colmap text point format
-                    fake_colmap = f"{args.output}/colmap/sparse/0"
-                    os.makedirs(fake_colmap, exist_ok=True)
-
-                    c_points, c_images, c_cameras = convert_json_taichi_to_colmap(allFrames, merged_df, nerfstudio_fake_obs=True)
-
-                    def write_colmap_csv(data, fn):
-                        with open(fn, 'wt') as f:
-                            for row in data:
-                                f.write(' '.join([str(c) for c in row])+'\n')
-
-                    write_colmap_csv(c_points, f"{fake_colmap}/points3D.txt")
-                    write_colmap_csv(c_images, f"{fake_colmap}/images.txt")
-                    write_colmap_csv(c_cameras, f"{fake_colmap}/cameras.txt")
-            except Exception as e:
-                print(f"Something went wrong: {e}")
+                targetFrame = keyFrame.frameSet.rgbFrame
+                if not targetFrame: targetFrame = keyFrame.frameSet.primaryFrame
+                cameraPose = targetFrame.cameraPose
+
+                sparseObsForKeyFrame = []
+                DEFAULT_POINT_COLOR = [128, 128, 128] # default: 50% gray
+                for mpObs in targetFrame.sparseFeatures:
+                    # keeping native object: OK since this not used after the callback
+                    sparseObsForKeyFrame.append(mpObs)
+                    sparsePointCloud[mpObs.id] = {
+                        'position': [mpObs.position.x, mpObs.position.y, mpObs.position.z],
+                        'color': sparsePointColors.get(mpObs.id, DEFAULT_POINT_COLOR)
+                    }
+                sparseObservations[frameId] = sparseObsForKeyFrame
+
+                # Camera data
+                frame = {
+                    "image_path": f"data/{name}/images/frame_{index:05}.{args.image_format}",
+                    "T_pointcloud_camera": cameraPose.getCameraToWorldMatrix().tolist(), # 4x4 matrix, the transformation matrix from camera coordinate to point cloud coordinate
+                    "camera_intrinsics": intrinsics.tolist(), # 3x3 matrix, the camera intrinsics matrix K
+                    "camera_height": frameHeight, # image height, in pixel
+                    "camera_width": frameWidth, # image width, in pixel
+                    "camera_id": index # camera id, not used
+                }
+
+                oldImgName = f"{args.output}/tmp/frame_{frameId:05}.{args.image_format}"
+                newImgName = f"{args.output}/images/frame_{index:05}.{args.image_format}"
+                os.rename(oldImgName, newImgName)
+
+                oldDepth = f"{args.output}/tmp/depth_{frameId:05}.png"
+                newDepth = f"{args.output}/images/depth_{index:05}.png"
+                if os.path.exists(oldDepth):
+                    os.rename(oldDepth, newDepth)
+                    frame['depth_image_path'] = f"data/{name}/images/depth_{index:05}.png"
+
+                if (index + 3) % 7 == 0:
+                    validationFrames.append(frame)
+                else:
+                    trainingFrames.append(frame)
+
+                if frameId in pointClouds:
+                    # Pointcloud data
+                    posData, colorData = pointClouds[frameId]
+                    pc = np.vstack((posData.T, np.ones((1, posData.shape[0]))))
+                    pc = (cameraPose.getCameraToWorldMatrix() @ pc)[:3, :].T
+                    pc = np.hstack((pc, colorData))
+                    globalPointCloud.extend(pc)
+
+                index += 1
+
+            data = [list([pointId]) + list(point['position']) + list(point['color']) for pointId, point in sparsePointCloud.items()]
+            sparse_point_cloud_df = pd.DataFrame(
+                data,
+                columns=['id'] + list('xyzrgb'))
+            for c in 'rgb': sparse_point_cloud_df[c] = sparse_point_cloud_df[c].astype(np.uint8)
+
+            merged_df = post_process_point_clouds(
+                globalPointCloud,
+                sparse_point_cloud_df)
+
+            # print(merged_df)
+
+            if args.format == 'taichi':
+                # merged_df.to_csv(f"{args.output}/points.merged-decimated.csv", index=False)
+                merged_df[list('xyzrgb')].to_parquet(f"{args.output}/point_cloud.parquet")
+
+                with open(f"{args.output}/train.json", "w") as outFile:
+                    json.dump(trainingFrames, outFile, indent=2, sort_keys=True)
+
+                with open(f"{args.output}/val.json", "w") as outFile:
+                    json.dump(validationFrames, outFile, indent=2, sort_keys=True)
+            elif args.format == 'nerfstudio':
+                allFrames = trainingFrames + validationFrames
+                with open(f"{args.output}/transforms.json", "w") as outFile:
+                    json.dump(convert_json_taichi_to_nerfstudio(allFrames), outFile, indent=2, sort_keys=True)
+
+                # colmap text point format
+                fake_colmap = f"{args.output}/colmap/sparse/0"
+                os.makedirs(fake_colmap, exist_ok=True)
+
+                c_points, c_images, c_cameras = convert_json_taichi_to_colmap(allFrames, merged_df, sparseObservations, nerfstudio_fake_obs=True)
+
+                def write_colmap_csv(data, fn):
+                    with open(fn, 'wt') as f:
+                        for row in data:
+                            f.write(' '.join([str(c) for c in row])+'\n')
+
+                write_colmap_csv(c_points, f"{fake_colmap}/points3D.txt")
+                write_colmap_csv(c_images, f"{fake_colmap}/images.txt")
+                write_colmap_csv(c_cameras, f"{fake_colmap}/cameras.txt")
 
     def copy_input_to_tmp_safe(input_dir, tmp_input):
         # also works if tmp dir is inside the input directory
@@ -429,8 +482,7 @@ def detect_device_preset(input_dir):
         "useSlam": True,
         "passthroughColorImages": True,
         "keyframeDecisionDistanceThreshold": args.key_frame_distance,
-        "icpVoxelSize": min(args.key_frame_distance, 0.1),
-        "mapSavePath": f"{args.output}/points.sparse.csv"
+        "icpVoxelSize": min(args.key_frame_distance, 0.1)
     }
 
     device_preset, cameras = detect_device_preset(args.input)
diff --git a/python/cli/visualization/visualizer.py b/python/cli/visualization/visualizer.py
@@ -89,6 +89,9 @@ class VisualizerArgs:
     # Pose trail
     poseTrailLength = None # Number of frames in pose trail (unlimited=None)
 
+    # Sparse point cloud (yes, no, auto = only if dense point clouds do not exist)
+    showSparsePointCloud = 'auto'
+
     # Grid
     gridRadius = 20 # Grid side length is 2*n
     gridCellLength = 1.0 # Length of single cell (m)
@@ -161,6 +164,7 @@ def __init__(self, args=VisualizerArgs()):
             skipPointsWithoutColor=args.skipPointsWithoutColor,
             visualizationScale=args.visualizationScale,
             renderPointCloud=args.showPointCloud,
+            renderSparsePointCloud=args.showSparsePointCloud,
             renderKeyFrames=args.showKeyFrames,
             renderMesh=args.showMesh)
         self.poseTrail = PoseTrailRenderer(maxLength=args.poseTrailLength)
diff --git a/python/cli/visualization/visualizer_renderers/renderers.py b/python/cli/visualization/visualizer_renderers/renderers.py