4
4
"""
5
5
import json
6
6
import os
7
+ import math
7
8
from collections import OrderedDict
8
9
9
10
# --- The following mechanism allows using this both as a stand-alone
@@ -21,6 +22,8 @@ def define_args(parser):
21
22
parser .add_argument ('--fast' , action = 'store_true' , help = 'Fast but lower quality settings' )
22
23
parser .add_argument ('--mono' , action = 'store_true' , help = 'Monocular mode: disable ToF and stereo data' )
23
24
parser .add_argument ('--internal' , action = 'append' , type = str , help = 'Internal override parameters in the form --internal=name:value' )
25
+ parser .add_argument ('--blur_filter_range' , type = int , default = 4 , help = 'Remove key frames that are the blurriest in a neighborhood of this size (0=disabled)' )
26
+ parser .add_argument ('--no_undistort' , action = 'store_true' , help = 'Do not undistort output images (only supported with certain devices)' )
24
27
parser .add_argument ('--image_format' , type = str , default = 'jpg' , help = "Color image format (use 'png' for top quality)" )
25
28
parser .add_argument ("--preview" , help = "Show latest primary image as a preview" , action = "store_true" )
26
29
parser .add_argument ("--preview3d" , help = "Show 3D visualization" , action = "store_true" )
@@ -67,15 +70,34 @@ def grouping_function(row):
67
70
grouped = df .assign (voxel_index = df .apply (grouping_function , axis = 1 )).groupby ('voxel_index' )
68
71
return grouped .first ().reset_index ()[[c for c in df .columns if c != 'voxel_index' ]]
69
72
70
- def blurScore (path ):
71
- import cv2
73
+
74
+ def compute_cam_velocities (targetFrame , angularVelocity ):
75
+ # Image and pose data
76
+ WToC = targetFrame .cameraPose .getWorldToCameraMatrix ()
77
+ vW = targetFrame .cameraPose .velocity
78
+ vCam = WToC [:3 , :3 ] @ [vW .x , vW .y , vW .z ]
79
+ vAngCam = WToC [:3 , :3 ] @ [angularVelocity .x , angularVelocity .y , angularVelocity .z ]
80
+ return vCam , vAngCam
81
+
82
+ def blurScore (WToC , vCam , vAngCam , targetFrame , exposureTime ):
72
83
import numpy as np
73
- image = cv2 .imread (path )
74
- gray = cv2 .cvtColor (image , cv2 .COLOR_BGR2GRAY )
75
- f_transform = np .fft .fft2 (gray )
76
- f_transform_shifted = np .fft .fftshift (f_transform )
77
- magnitude_spectrum = np .abs (f_transform_shifted )
78
- return np .percentile (magnitude_spectrum , 95 )
84
+ sumVels = 0
85
+ n = 0
86
+ for mpObs in targetFrame .sparseFeatures :
87
+ pW = mpObs .position
88
+ pCam = (WToC @ [pW .x , pW .y , pW .z , 1 ])[:3 ]
89
+ pointVelCam = vCam + np .cross (vAngCam , pCam )
90
+ vPix = targetFrame .cameraPose .camera .getIntrinsicMatrix ()[:2 ,:2 ] @ (pointVelCam [:2 ] / np .maximum (pCam [2 ], 1e-6 ))
91
+ n += 1
92
+ sumVels += np .linalg .norm (vPix )
93
+
94
+ if exposureTime > 0 :
95
+ sumVels *= exposureTime
96
+
97
+ # print('blur score %g (n = %d)' % (float(sumVels) / max(n, 1), n))
98
+
99
+ if n == 0 : return 1e6
100
+ return sumVels / n
79
101
80
102
def point_cloud_data_frame_to_ply (df , out_fn ):
81
103
with open (out_fn , 'wt' ) as f :
@@ -97,16 +119,50 @@ def point_cloud_data_frame_to_ply(df, out_fn):
97
119
for prop in 'rgb' : r .append (int (row [prop ]))
98
120
f .write (' ' .join ([str (v ) for v in r ]) + '\n ' )
99
121
122
+ def convert_distortion (cam ):
123
+ coeffs = cam .get ('distortionCoefficients' , None )
124
+ if coeffs is None :
125
+ return None
126
+
127
+ if all ([c == 0.0 for c in coeffs ]): return None
128
+
129
+ get_coeffs = lambda names : dict (zip (names .split (), coeffs ))
130
+
131
+ model = 'OPENCV'
132
+ if cam ['model' ] == 'brown-conrady' :
133
+ r = get_coeffs ('k1 k2 p1 p2 k3 k4 k5 k6' )
134
+ elif cam ['model' ] == 'pinhole' :
135
+ r = get_coeffs ('k1 k2 k3' )
136
+ r ['p1' ] = 0
137
+ r ['p2' ] = 0
138
+ elif cam ['model' ] == 'kannala-brandt4' :
139
+ model = 'OPENCV_FISHEYE'
140
+ r = get_coeffs ('k1 k2 k3 k4' )
141
+ else :
142
+ raise RuntimeError (f"unsupported camera model: { cam ['model' ]} " )
143
+ r ['model' ] = model
144
+ r ['cx' ] = cam ['principalPointX' ]
145
+ r ['cy' ] = cam ['principalPointY' ]
146
+ r ['fx' ] = cam ['focalLengthX' ]
147
+ r ['fy' ] = cam ['focalLengthY' ]
148
+ return r
149
+
100
150
def convert_json_taichi_to_nerfstudio (d ):
101
151
import numpy as np
102
- def transform_camera (c ):
103
- convention_change = np .array ([
104
- [1 , 0 , 0 , 0 ],
105
- [0 ,- 1 , 0 , 0 ],
106
- [0 , 0 ,- 1 , 0 ],
107
- [0 , 0 , 0 , 1 ]
108
- ])
109
- return (np .array (c ) @ convention_change ).tolist ()
152
+ CAM_CONVENTION_CHANGE = np .array ([
153
+ [1 , 0 , 0 , 0 ],
154
+ [0 ,- 1 , 0 , 0 ],
155
+ [0 , 0 ,- 1 , 0 ],
156
+ [0 , 0 , 0 , 1 ]
157
+ ])
158
+
159
+ INV_CAM_CONVENTION_CHANGE = CAM_CONVENTION_CHANGE # works for this particular matrix
160
+
161
+ def transform_matrix_cam_to_world (c ):
162
+ return (np .array (c ) @ CAM_CONVENTION_CHANGE ).tolist ()
163
+
164
+ def transform_camera_dir_vec (c ):
165
+ return (INV_CAM_CONVENTION_CHANGE [:3 , :3 ] @ c ).tolist ()
110
166
111
167
by_camera = {}
112
168
for c in d :
@@ -125,15 +181,29 @@ def transform_camera(c):
125
181
"aabb_scale" : 16 ,
126
182
"frames" : [],
127
183
"orientation_override" : "none" , # stops Nerfstudio from breaking our "up" direction
184
+ "auto_scale_poses_override" : False ,
128
185
"ply_file_path" : "./sparse_pc.ply"
129
186
}
187
+
188
+ distortion = c .get ('camera_distortion' , None )
189
+ if distortion is not None :
190
+ for k , v in distortion .items ():
191
+ params [k ] = v
192
+
193
+ for prop in ['rolling_shutter_time' , 'exposure_time' ]:
194
+ if c [prop ] is not None and c [prop ] != 0 :
195
+ params [prop ] = c [prop ]
196
+
130
197
cam_id = json .dumps (params , sort_keys = True )
131
198
if cam_id not in by_camera :
132
199
by_camera [cam_id ] = params
133
200
134
201
converted = {
135
202
'file_path' : os .path .join ("./images" , c ['image_path' ].split ('/' )[- 1 ]),
136
- "transform_matrix" : transform_camera (c ['T_pointcloud_camera' ])
203
+ "transform_matrix" : transform_matrix_cam_to_world (c ['T_pointcloud_camera' ]),
204
+ "camera_linear_velocity" : transform_camera_dir_vec (c ['camera_linear_velocity' ]),
205
+ "camera_angular_velocity" : transform_camera_dir_vec (c ['camera_angular_velocity' ]),
206
+ "motion_blur_score" : c ["motion_blur_score" ]
137
207
}
138
208
if 'depth_image_path' in c :
139
209
converted ['depth_file_path' ] = os .path .join ("./images" , c ['depth_image_path' ].split ('/' )[- 1 ])
@@ -237,12 +307,16 @@ def process(args):
237
307
savedKeyFrames = {}
238
308
pointClouds = {}
239
309
sparsePointColors = {}
310
+ blurScores = {}
240
311
frameWidth = - 1
241
312
frameHeight = - 1
242
313
intrinsics = None
243
314
visualizer = None
244
315
isTracking = False
245
316
finalMapWritten = False
317
+ exposureTime = 0
318
+ rollingShutterTime = 0
319
+ cameraDistortion = None
246
320
247
321
def post_process_point_clouds (globalPointCloud , sparse_point_cloud_df ):
248
322
# Save point clouds
@@ -277,6 +351,7 @@ def processMappingOutput(output):
277
351
nonlocal savedKeyFrames
278
352
nonlocal pointClouds
279
353
nonlocal sparsePointColors
354
+ nonlocal blurScores
280
355
nonlocal frameWidth
281
356
nonlocal frameHeight
282
357
nonlocal intrinsics
@@ -312,7 +387,11 @@ def processMappingOutput(output):
312
387
frameWidth = targetFrame .image .getWidth ()
313
388
frameHeight = targetFrame .image .getHeight ()
314
389
315
- undistortedFrame = frameSet .getUndistortedFrame (targetFrame )
390
+ frameSet = keyFrame .frameSet
391
+ if args .no_undistort :
392
+ undistortedFrame = targetFrame
393
+ else :
394
+ undistortedFrame = frameSet .getUndistortedFrame (targetFrame )
316
395
if intrinsics is None : intrinsics = undistortedFrame .cameraPose .camera .getIntrinsicMatrix ()
317
396
img = undistortedFrame .image .toArray ()
318
397
@@ -322,15 +401,40 @@ def processMappingOutput(output):
322
401
323
402
# Find colors for sparse features
324
403
SHOW_FEATURE_MARKERS = True
404
+ SHOW_MOTION_BLUR = False
405
+
406
+ WToC = targetFrame .cameraPose .getWorldToCameraMatrix ()
407
+ vCam , vAngCam = compute_cam_velocities (targetFrame , keyFrame .angularVelocity )
408
+
409
+ blurScores [frameId ] = blurScore (WToC , vCam , vAngCam , undistortedFrame , exposureTime )
410
+
325
411
for mpObs in undistortedFrame .sparseFeatures :
412
+ pPix = [mpObs .pixelCoordinates .x , mpObs .pixelCoordinates .y ]
413
+ px = np .clip (round (pPix [0 ]), 0 , img .shape [1 ]- 1 )
414
+ py = np .clip (round (pPix [1 ]), 0 , img .shape [0 ]- 1 )
326
415
if mpObs .id not in sparsePointColors :
327
- px = np .clip (round (mpObs .pixelCoordinates .x ), 0 , img .shape [1 ]- 1 )
328
- py = np .clip (round (mpObs .pixelCoordinates .y ), 0 , img .shape [0 ]- 1 )
329
416
rgb = list (img [py , px , ...].view (np .uint8 ))
330
417
sparsePointColors [mpObs .id ] = rgb
331
- if args .preview and SHOW_FEATURE_MARKERS :
332
- MARKER_COLOR = (0 , 255 , 0 )
333
- cv2 .circle (bgrImage , (px , py ), 5 , MARKER_COLOR , thickness = 1 )
418
+ markerColor = (0 , 255 , 0 )
419
+ else :
420
+ markerColor = (0 , 128 , 0 )
421
+
422
+ if args .preview :
423
+ if SHOW_FEATURE_MARKERS :
424
+ cv2 .circle (bgrImage , (px , py ), 5 , markerColor , thickness = 1 )
425
+ if SHOW_MOTION_BLUR :
426
+ BLUR_COLOR = (128 , 255 , 0 )
427
+ VISU_SCALE = 5
428
+
429
+ pW = mpObs .position
430
+ pCam = (WToC @ [pW .x , pW .y , pW .z , 1 ])[:3 ]
431
+ pointVelCam = vCam + np .cross (vAngCam , pCam )
432
+ vPix = undistortedFrame .cameraPose .camera .getIntrinsicMatrix ()[:2 ,:2 ] @ (pointVelCam [:2 ] / np .maximum (pCam [2 ], 1e-6 ))
433
+ dt = float (VISU_SCALE ) / 30 # visualization only
434
+ vPix *= dt
435
+ blurBegin = [int (c ) for c in pPix - vPix * dt / 2 ]
436
+ blurEnd = [int (c ) for c in pPix + vPix * dt / 2 ]
437
+ cv2 .line (bgrImage , (blurBegin [0 ], blurBegin [1 ]), (blurEnd [0 ], blurEnd [1 ]), BLUR_COLOR , thickness = 1 )
334
438
335
439
# Legacy: support SDK versions which also produced images where frameSet.depthFrame.image was None
336
440
if frameSet .depthFrame is not None and frameSet .depthFrame .image is not None and not useMono :
@@ -357,26 +461,32 @@ def processMappingOutput(output):
357
461
sparseObservations = {}
358
462
# OrderedDict to avoid undefined iteration order = different output files for the same input
359
463
sparsePointCloud = OrderedDict ()
360
- imageSharpness = []
464
+ blurriness = []
361
465
for frameId in output .map .keyFrames :
362
- imageSharpness .append ((frameId , blurScore ( f" { tmp_dir } /frame_ { frameId :05 } . { args . image_format } " )))
466
+ blurriness .append ((frameId , blurScores . get ( frameId , 1e6 )))
363
467
364
468
# Look two images forward and two backwards, if current frame is blurriest, don't use it
365
- for i in range (len (imageSharpness )):
366
- if i + 2 > len (imageSharpness ): break
367
- group = [imageSharpness [j + i ] for j in range (- 2 ,2 )]
368
- group .sort (key = lambda x : x [1 ])
369
- cur = imageSharpness [i ][0 ]
370
- if group [0 ][0 ] == cur :
371
- blurryImages [cur ] = True
469
+ if args .blur_filter_range != 0 :
470
+ assert (args .blur_filter_range > 1 )
471
+ blur_filter_radius_lo = int (math .ceil ((args .blur_filter_range - 1 ) * 0.5 ))
472
+ blur_filter_radius_hi = int (math .floor ((args .blur_filter_range - 1 ) * 0.5 ))
473
+ print ('blur filter range [-%d, %d)' % (blur_filter_radius_lo , blur_filter_radius_hi + 1 ))
474
+ for i in range (blur_filter_radius_lo , max (0 , len (blurriness ) - blur_filter_radius_hi )):
475
+ group = [blurriness [j + i ] for j in range (- blur_filter_radius_lo ,blur_filter_radius_hi + 1 )]
476
+ group .sort (key = lambda x : x [1 ])
477
+ cur = blurriness [i ][0 ]
478
+ if group [0 ][0 ] == cur :
479
+ blurryImages [cur ] = True
372
480
373
481
trainingFrames = []
374
482
validationFrames = []
375
483
globalPointCloud = []
376
- index = 0
484
+ index = 1 # start from 1 to match COLMAP/Nerfstudio frame numbering (fragile!)
377
485
name = os .path .split (args .output )[- 1 ]
378
486
for frameId in output .map .keyFrames :
379
- if blurryImages .get (frameId ): continue # Skip blurry images
487
+ if blurryImages .get (frameId ):
488
+ print ('skipping blurry frame %s' % str (frameId ))
489
+ continue # Skip blurry images
380
490
381
491
# Image and pose data
382
492
keyFrame = output .map .keyFrames .get (frameId )
@@ -397,15 +507,24 @@ def processMappingOutput(output):
397
507
sparseObservations [frameId ] = sparseObsForKeyFrame
398
508
399
509
# Camera data
510
+ vCam , vAngCam = compute_cam_velocities (targetFrame , keyFrame .angularVelocity )
400
511
frame = {
401
512
"image_path" : f"data/{ name } /images/frame_{ index :05} .{ args .image_format } " ,
402
513
"T_pointcloud_camera" : cameraPose .getCameraToWorldMatrix ().tolist (), # 4x4 matrix, the transformation matrix from camera coordinate to point cloud coordinate
403
514
"camera_intrinsics" : intrinsics .tolist (), # 3x3 matrix, the camera intrinsics matrix K
515
+ "camera_linear_velocity" : vCam .tolist (),
516
+ "camera_angular_velocity" : vAngCam .tolist (),
517
+ "rolling_shutter_time" : rollingShutterTime ,
518
+ "motion_blur_score" : blurScores .get (frameId , 1e6 ),
519
+ "exposure_time" : exposureTime ,
404
520
"camera_height" : frameHeight , # image height, in pixel
405
521
"camera_width" : frameWidth , # image width, in pixel
406
522
"camera_id" : index # camera id, not used
407
523
}
408
524
525
+ if cameraDistortion is not None :
526
+ frame ['camera_distortion' ] = cameraDistortion
527
+
409
528
oldImgName = f"{ tmp_dir } /frame_{ frameId :05} .{ args .image_format } "
410
529
newImgName = f"{ args .output } /images/frame_{ index :05} .{ args .image_format } "
411
530
shutil .move (oldImgName , newImgName )
@@ -494,14 +613,14 @@ def onMappingOutput(output):
494
613
print (f"ERROR: { e } " , flush = True )
495
614
raise e
496
615
497
- def detect_device_preset (input_dir ):
616
+ def parse_input_dir (input_dir ):
498
617
cameras = None
499
618
calibrationJson = f"{ input_dir } /calibration.json"
500
619
if os .path .exists (calibrationJson ):
501
620
with open (calibrationJson ) as f :
502
621
calibration = json .load (f )
503
622
if "cameras" in calibration :
504
- cameras = len ( calibration ["cameras" ])
623
+ cameras = calibration ["cameras" ]
505
624
device = None
506
625
metadataJson = f"{ input_dir } /metadata.json"
507
626
if os .path .exists (metadataJson ):
@@ -540,9 +659,16 @@ def detect_device_preset(input_dir):
540
659
541
660
tmp_dir = tempfile .mkdtemp ()
542
661
543
- device_preset , cameras = detect_device_preset (args .input )
662
+ device_preset , cameras = parse_input_dir (args .input )
663
+
664
+ if cameras is not None :
665
+ cam = cameras [0 ]
666
+ exposureTime = cam .get ('exposureTimeSeconds' , 0 )
667
+ rollingShutterTime = cam .get ('shutterRollTimeSeconds' , 0 )
668
+ if args .no_undistort :
669
+ cameraDistortion = convert_distortion (cam )
544
670
545
- useMono = args .mono or (cameras != None and cameras == 1 )
671
+ useMono = args .mono or (cameras != None and len ( cameras ) == 1 )
546
672
547
673
if useMono : config ['useStereo' ] = False
548
674
0 commit comments