@@ -35,11 +35,12 @@ def process(args):
35
35
import shutil
36
36
import numpy as np
37
37
import pandas as pd
38
- from scipy . spatial import KDTree
38
+ from collections import OrderedDict
39
39
40
40
useMono = None
41
41
42
42
def interpolate_missing_properties (df_source , df_query , k_nearest = 3 ):
43
+ from scipy .spatial import KDTree
43
44
xyz = list ('xyz' )
44
45
45
46
tree = KDTree (df_source [xyz ].values )
@@ -57,6 +58,7 @@ def interpolate_missing_properties(df_source, df_query, k_nearest=3):
57
58
return df_result
58
59
59
60
def exclude_points (df_source , df_exclude , radius ):
61
+ from scipy .spatial import KDTree
60
62
xyz = list ('xyz' )
61
63
tree = KDTree (df_exclude [xyz ].values )
62
64
ii = tree .query_ball_point (df_source [xyz ], r = radius , return_length = True )
@@ -117,12 +119,13 @@ def transform_camera(c):
117
119
return value
118
120
119
121
# TODO: don't use "Taichi" as the intermediate format
120
- def convert_json_taichi_to_colmap (pose_data , points_df , nerfstudio_fake_obs = True ):
122
+ def convert_json_taichi_to_colmap (pose_data , points_df , sparse_observations , nerfstudio_fake_obs = True ):
121
123
from scipy .spatial .transform import Rotation as R
122
124
123
125
images = []
124
126
cameras = []
125
127
camera_id = 0
128
+ max_pt_id = 0
126
129
for image_id , c in enumerate (pose_data ):
127
130
k = c ['camera_intrinsics' ]
128
131
mat = np .linalg .inv (np .array (c ['T_pointcloud_camera' ]))
@@ -132,7 +135,11 @@ def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True
132
135
images .append ([image_id ] + list (q ) + list (p ) + [camera_id , os .path .split (c ['image_path' ])[- 1 ]])
133
136
134
137
points = []
135
- if nerfstudio_fake_obs :
138
+ for pt in sparse_observations .get (image_id , {}):
139
+ max_pt_id = max (max_pt_id , pt .id )
140
+ points .extend ([pt .pixelCoordinates .x , pt .pixelCoordinates .y , pt .id ])
141
+
142
+ if nerfstudio_fake_obs and len (points ) == 0 :
136
143
points = [100 ,100 ,0 ,200 ,200 ,1 ] # NeRFstudio loader will crash without this
137
144
138
145
images .append (points )
@@ -151,9 +158,18 @@ def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True
151
158
]]
152
159
153
160
points = []
154
- for point_id , row in points_df .iterrows ():
161
+ for _ , row in points_df .iterrows ():
162
+ if 'id' in row :
163
+ point_id = row ['id' ]
164
+ else :
165
+ point_id = 0
166
+
167
+ if point_id == 0 :
168
+ point_id = max_pt_id + 1
169
+ max_pt_id += 1
170
+
155
171
point = [
156
- point_id ,
172
+ int ( point_id ) ,
157
173
row ['x' ],
158
174
row ['y' ],
159
175
row ['z' ],
@@ -162,6 +178,7 @@ def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True
162
178
round (row ['b' ])
163
179
]
164
180
181
+ # TODO: compute reprojection errors here if really necessary for some use case
165
182
if nerfstudio_fake_obs :
166
183
fake_err = 1
167
184
img_id , point_id = 0 , 0
@@ -174,6 +191,7 @@ def convert_json_taichi_to_colmap(pose_data, points_df, nerfstudio_fake_obs=True
174
191
# Globals
175
192
savedKeyFrames = {}
176
193
pointClouds = {}
194
+ sparsePointColors = {}
177
195
frameWidth = - 1
178
196
frameHeight = - 1
179
197
intrinsics = None
@@ -190,19 +208,21 @@ def blurScore(path):
190
208
def post_process_point_clouds (globalPointCloud , sparse_point_cloud_df ):
191
209
# Save point clouds
192
210
if len (globalPointCloud ) == 0 :
193
- # add fake (gray) colors
194
211
merged_df = sparse_point_cloud_df
195
- for c in 'rgb' : merged_df [c ] = 128
196
212
197
213
else :
198
214
point_cloud_df = pd .DataFrame (np .array (globalPointCloud ), columns = list ('xyzrgb' ))
199
215
200
216
# drop uncolored points
201
217
colored_point_cloud_df = point_cloud_df .loc [point_cloud_df [list ('rgb' )].max (axis = 1 ) > 0 ].reset_index ()
218
+ colored_point_cloud_df ['id' ] = 0 # ID = 0 is not used for valid sparse map points
202
219
203
220
filtered_point_cloud_df = exclude_points (colored_point_cloud_df , sparse_point_cloud_df , radius = args .cell_size )
204
221
decimated_df = voxel_decimate (filtered_point_cloud_df , args .cell_size )
205
- sparse_colored_point_cloud_df = interpolate_missing_properties (colored_point_cloud_df , sparse_point_cloud_df )
222
+
223
+ # the dense points clouds have presumably more stable colors at corner points
224
+ # rather use them than using the same approach as without dense data
225
+ sparse_colored_point_cloud_df = interpolate_missing_properties (colored_point_cloud_df , sparse_point_cloud_df [list ('xyz' )])
206
226
merged_df = pd .concat ([sparse_colored_point_cloud_df , decimated_df ])
207
227
208
228
if args .distance_quantile > 0 :
@@ -222,6 +242,7 @@ def onVioOutput(vioOutput):
222
242
def onMappingOutput (output ):
223
243
nonlocal savedKeyFrames
224
244
nonlocal pointClouds
245
+ nonlocal sparsePointColors
225
246
nonlocal frameWidth
226
247
nonlocal frameHeight
227
248
nonlocal intrinsics
@@ -255,10 +276,23 @@ def onMappingOutput(output):
255
276
undistortedFrame = frameSet .getUndistortedFrame (targetFrame )
256
277
if intrinsics is None : intrinsics = undistortedFrame .cameraPose .camera .getIntrinsicMatrix ()
257
278
img = undistortedFrame .image .toArray ()
279
+
258
280
bgrImage = cv2 .cvtColor (img , cv2 .COLOR_RGB2BGR )
259
281
fileName = f"{ args .output } /tmp/frame_{ frameId :05} .{ args .image_format } "
260
282
cv2 .imwrite (fileName , bgrImage )
261
283
284
+ # Find colors for sparse features
285
+ SHOW_FEATURE_MARKERS = True
286
+ for mpObs in undistortedFrame .sparseFeatures :
287
+ if mpObs .id not in sparsePointColors :
288
+ px = np .clip (round (mpObs .pixelCoordinates .x ), 0 , img .shape [1 ]- 1 )
289
+ py = np .clip (round (mpObs .pixelCoordinates .y ), 0 , img .shape [0 ]- 1 )
290
+ rgb = list (img [py , px , ...].view (np .uint8 ))
291
+ sparsePointColors [mpObs .id ] = rgb
292
+ if args .preview and SHOW_FEATURE_MARKERS :
293
+ MARKER_COLOR = (0 , 255 , 0 )
294
+ cv2 .circle (bgrImage , (px , py ), 5 , MARKER_COLOR , thickness = 1 )
295
+
262
296
# Legacy: support SDK versions which also produced images where frameSet.depthFrame.image was None
263
297
if frameSet .depthFrame is not None and frameSet .depthFrame .image is not None and not useMono :
264
298
alignedDepth = frameSet .getAlignedDepthFrame (undistortedFrame )
@@ -280,105 +314,124 @@ def onMappingOutput(output):
280
314
281
315
else :
282
316
# Final optimized poses
283
- try :
284
- blurryImages = {}
285
- imageSharpness = []
286
- for frameId in output .map .keyFrames :
287
- imageSharpness .append ((frameId , blurScore (f"{ args .output } /tmp/frame_{ frameId :05} .{ args .image_format } " )))
288
-
289
- # Look two images forward and two backwards, if current frame is blurriest, don't use it
290
- for i in range (len (imageSharpness )):
291
- if i + 2 > len (imageSharpness ): break
292
- group = [imageSharpness [j + i ] for j in range (- 2 ,2 )]
293
- group .sort (key = lambda x : x [1 ])
294
- cur = imageSharpness [i ][0 ]
295
- if group [0 ][0 ] == cur :
296
- blurryImages [cur ] = True
297
-
298
- trainingFrames = []
299
- validationFrames = []
300
- globalPointCloud = []
301
- index = 0
302
- name = os .path .split (args .output )[- 1 ]
303
- for frameId in output .map .keyFrames :
304
- if blurryImages .get (frameId ): continue # Skip blurry images
305
-
306
- # Image data
307
- keyFrame = output .map .keyFrames .get (frameId )
308
-
309
- targetFrame = keyFrame .frameSet .rgbFrame
310
- if not targetFrame : targetFrame = keyFrame .frameSet .primaryFrame
311
- cameraPose = targetFrame .cameraPose
312
-
313
- # Camera data
314
- frame = {
315
- "image_path" : f"data/{ name } /images/frame_{ index :05} .{ args .image_format } " ,
316
- "T_pointcloud_camera" : cameraPose .getCameraToWorldMatrix ().tolist (), # 4x4 matrix, the transformation matrix from camera coordinate to point cloud coordinate
317
- "camera_intrinsics" : intrinsics .tolist (), # 3x3 matrix, the camera intrinsics matrix K
318
- "camera_height" : frameHeight , # image height, in pixel
319
- "camera_width" : frameWidth , # image width, in pixel
320
- "camera_id" : index # camera id, not used
321
- }
317
+ blurryImages = {}
318
+ sparseObservations = {}
319
+ # OrderedDict to avoid undefined iteration order = different output files for the same input
320
+ sparsePointCloud = OrderedDict ()
321
+ imageSharpness = []
322
+ for frameId in output .map .keyFrames :
323
+ imageSharpness .append ((frameId , blurScore (f"{ args .output } /tmp/frame_{ frameId :05} .{ args .image_format } " )))
324
+
325
+ # Look two images forward and two backwards, if current frame is blurriest, don't use it
326
+ for i in range (len (imageSharpness )):
327
+ if i + 2 > len (imageSharpness ): break
328
+ group = [imageSharpness [j + i ] for j in range (- 2 ,2 )]
329
+ group .sort (key = lambda x : x [1 ])
330
+ cur = imageSharpness [i ][0 ]
331
+ if group [0 ][0 ] == cur :
332
+ blurryImages [cur ] = True
333
+
334
+ trainingFrames = []
335
+ validationFrames = []
336
+ globalPointCloud = []
337
+ index = 0
338
+ name = os .path .split (args .output )[- 1 ]
339
+ for frameId in output .map .keyFrames :
340
+ if blurryImages .get (frameId ): continue # Skip blurry images
341
+
342
+ # Image and pose data
343
+ keyFrame = output .map .keyFrames .get (frameId )
322
344
323
- oldImgName = f"{ args .output } /tmp/frame_{ frameId :05} .{ args .image_format } "
324
- newImgName = f"{ args .output } /images/frame_{ index :05} .{ args .image_format } "
325
- os .rename (oldImgName , newImgName )
326
-
327
- oldDepth = f"{ args .output } /tmp/depth_{ frameId :05} .png"
328
- newDepth = f"{ args .output } /images/depth_{ index :05} .png"
329
- if os .path .exists (oldDepth ):
330
- os .rename (oldDepth , newDepth )
331
- frame ['depth_image_path' ] = f"data/{ name } /images/depth_{ index :05} .png"
332
-
333
- if (index + 3 ) % 7 == 0 :
334
- validationFrames .append (frame )
335
- else :
336
- trainingFrames .append (frame )
337
-
338
- if frameId in pointClouds :
339
- # Pointcloud data
340
- posData , colorData = pointClouds [frameId ]
341
- pc = np .vstack ((posData .T , np .ones ((1 , posData .shape [0 ]))))
342
- pc = (cameraPose .getCameraToWorldMatrix () @ pc )[:3 , :].T
343
- pc = np .hstack ((pc , colorData ))
344
- globalPointCloud .extend (pc )
345
-
346
- index += 1
347
-
348
- merged_df = post_process_point_clouds (
349
- globalPointCloud ,
350
- pd .read_csv (f"{ args .output } /points.sparse.csv" , usecols = list ('xyz' )))
351
-
352
- if args .format == 'taichi' :
353
- # merged_df.to_csv(f"{args.output}/points.merged-decimated.csv", index=False)
354
- merged_df .to_parquet (f"{ args .output } /point_cloud.parquet" )
355
-
356
- with open (f"{ args .output } /train.json" , "w" ) as outFile :
357
- json .dump (trainingFrames , outFile , indent = 2 , sort_keys = True )
358
-
359
- with open (f"{ args .output } /val.json" , "w" ) as outFile :
360
- json .dump (validationFrames , outFile , indent = 2 , sort_keys = True )
361
- elif args .format == 'nerfstudio' :
362
- allFrames = trainingFrames + validationFrames
363
- with open (f"{ args .output } /transforms.json" , "w" ) as outFile :
364
- json .dump (convert_json_taichi_to_nerfstudio (allFrames ), outFile , indent = 2 , sort_keys = True )
365
-
366
- # colmap text point format
367
- fake_colmap = f"{ args .output } /colmap/sparse/0"
368
- os .makedirs (fake_colmap , exist_ok = True )
369
-
370
- c_points , c_images , c_cameras = convert_json_taichi_to_colmap (allFrames , merged_df , nerfstudio_fake_obs = True )
371
-
372
- def write_colmap_csv (data , fn ):
373
- with open (fn , 'wt' ) as f :
374
- for row in data :
375
- f .write (' ' .join ([str (c ) for c in row ])+ '\n ' )
376
-
377
- write_colmap_csv (c_points , f"{ fake_colmap } /points3D.txt" )
378
- write_colmap_csv (c_images , f"{ fake_colmap } /images.txt" )
379
- write_colmap_csv (c_cameras , f"{ fake_colmap } /cameras.txt" )
380
- except Exception as e :
381
- print (f"Something went wrong: { e } " )
345
+ targetFrame = keyFrame .frameSet .rgbFrame
346
+ if not targetFrame : targetFrame = keyFrame .frameSet .primaryFrame
347
+ cameraPose = targetFrame .cameraPose
348
+
349
+ sparseObsForKeyFrame = []
350
+ DEFAULT_POINT_COLOR = [128 , 128 , 128 ] # default: 50% gray
351
+ for mpObs in targetFrame .sparseFeatures :
352
+ # keeping native object: OK since this not used after the callback
353
+ sparseObsForKeyFrame .append (mpObs )
354
+ sparsePointCloud [mpObs .id ] = {
355
+ 'position' : [mpObs .position .x , mpObs .position .y , mpObs .position .z ],
356
+ 'color' : sparsePointColors .get (mpObs .id , DEFAULT_POINT_COLOR )
357
+ }
358
+ sparseObservations [frameId ] = sparseObsForKeyFrame
359
+
360
+ # Camera data
361
+ frame = {
362
+ "image_path" : f"data/{ name } /images/frame_{ index :05} .{ args .image_format } " ,
363
+ "T_pointcloud_camera" : cameraPose .getCameraToWorldMatrix ().tolist (), # 4x4 matrix, the transformation matrix from camera coordinate to point cloud coordinate
364
+ "camera_intrinsics" : intrinsics .tolist (), # 3x3 matrix, the camera intrinsics matrix K
365
+ "camera_height" : frameHeight , # image height, in pixel
366
+ "camera_width" : frameWidth , # image width, in pixel
367
+ "camera_id" : index # camera id, not used
368
+ }
369
+
370
+ oldImgName = f"{ args .output } /tmp/frame_{ frameId :05} .{ args .image_format } "
371
+ newImgName = f"{ args .output } /images/frame_{ index :05} .{ args .image_format } "
372
+ os .rename (oldImgName , newImgName )
373
+
374
+ oldDepth = f"{ args .output } /tmp/depth_{ frameId :05} .png"
375
+ newDepth = f"{ args .output } /images/depth_{ index :05} .png"
376
+ if os .path .exists (oldDepth ):
377
+ os .rename (oldDepth , newDepth )
378
+ frame ['depth_image_path' ] = f"data/{ name } /images/depth_{ index :05} .png"
379
+
380
+ if (index + 3 ) % 7 == 0 :
381
+ validationFrames .append (frame )
382
+ else :
383
+ trainingFrames .append (frame )
384
+
385
+ if frameId in pointClouds :
386
+ # Pointcloud data
387
+ posData , colorData = pointClouds [frameId ]
388
+ pc = np .vstack ((posData .T , np .ones ((1 , posData .shape [0 ]))))
389
+ pc = (cameraPose .getCameraToWorldMatrix () @ pc )[:3 , :].T
390
+ pc = np .hstack ((pc , colorData ))
391
+ globalPointCloud .extend (pc )
392
+
393
+ index += 1
394
+
395
+ data = [list ([pointId ]) + list (point ['position' ]) + list (point ['color' ]) for pointId , point in sparsePointCloud .items ()]
396
+ sparse_point_cloud_df = pd .DataFrame (
397
+ data ,
398
+ columns = ['id' ] + list ('xyzrgb' ))
399
+ for c in 'rgb' : sparse_point_cloud_df [c ] = sparse_point_cloud_df [c ].astype (np .uint8 )
400
+
401
+ merged_df = post_process_point_clouds (
402
+ globalPointCloud ,
403
+ sparse_point_cloud_df )
404
+
405
+ # print(merged_df)
406
+
407
+ if args .format == 'taichi' :
408
+ # merged_df.to_csv(f"{args.output}/points.merged-decimated.csv", index=False)
409
+ merged_df [list ('xyzrgb' )].to_parquet (f"{ args .output } /point_cloud.parquet" )
410
+
411
+ with open (f"{ args .output } /train.json" , "w" ) as outFile :
412
+ json .dump (trainingFrames , outFile , indent = 2 , sort_keys = True )
413
+
414
+ with open (f"{ args .output } /val.json" , "w" ) as outFile :
415
+ json .dump (validationFrames , outFile , indent = 2 , sort_keys = True )
416
+ elif args .format == 'nerfstudio' :
417
+ allFrames = trainingFrames + validationFrames
418
+ with open (f"{ args .output } /transforms.json" , "w" ) as outFile :
419
+ json .dump (convert_json_taichi_to_nerfstudio (allFrames ), outFile , indent = 2 , sort_keys = True )
420
+
421
+ # colmap text point format
422
+ fake_colmap = f"{ args .output } /colmap/sparse/0"
423
+ os .makedirs (fake_colmap , exist_ok = True )
424
+
425
+ c_points , c_images , c_cameras = convert_json_taichi_to_colmap (allFrames , merged_df , sparseObservations , nerfstudio_fake_obs = True )
426
+
427
+ def write_colmap_csv (data , fn ):
428
+ with open (fn , 'wt' ) as f :
429
+ for row in data :
430
+ f .write (' ' .join ([str (c ) for c in row ])+ '\n ' )
431
+
432
+ write_colmap_csv (c_points , f"{ fake_colmap } /points3D.txt" )
433
+ write_colmap_csv (c_images , f"{ fake_colmap } /images.txt" )
434
+ write_colmap_csv (c_cameras , f"{ fake_colmap } /cameras.txt" )
382
435
383
436
def copy_input_to_tmp_safe (input_dir , tmp_input ):
384
437
# also works if tmp dir is inside the input directory
@@ -429,8 +482,7 @@ def detect_device_preset(input_dir):
429
482
"useSlam" : True ,
430
483
"passthroughColorImages" : True ,
431
484
"keyframeDecisionDistanceThreshold" : args .key_frame_distance ,
432
- "icpVoxelSize" : min (args .key_frame_distance , 0.1 ),
433
- "mapSavePath" : f"{ args .output } /points.sparse.csv"
485
+ "icpVoxelSize" : min (args .key_frame_distance , 0.1 )
434
486
}
435
487
436
488
device_preset , cameras = detect_device_preset (args .input )
0 commit comments