Skip to content

Commit bb05ee4

Browse files
committed
Use mapping API velocity outputs
* Add undistorted mode to sai-cli process * Parametrize and allow turning off the blurry frames filter * Compute blur score from features and frame velocities * Change image name indexing 0->1 to match Nerfstudio convention * Read rolling shutter and exposure times from calibration.json (if available)
1 parent 823c886 commit bb05ee4

File tree

1 file changed

+164
-38
lines changed

1 file changed

+164
-38
lines changed

python/cli/process/process.py

Lines changed: 164 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55
import json
66
import os
7+
import math
78
from collections import OrderedDict
89

910
# --- The following mechanism allows using this both as a stand-alone
@@ -21,6 +22,8 @@ def define_args(parser):
2122
parser.add_argument('--fast', action='store_true', help='Fast but lower quality settings')
2223
parser.add_argument('--mono', action='store_true', help='Monocular mode: disable ToF and stereo data')
2324
parser.add_argument('--internal', action='append', type=str, help='Internal override parameters in the form --internal=name:value')
25+
parser.add_argument('--blur_filter_range', type=int, default=4, help='Remove key frames that are the blurriest in a neighborhood of this size (0=disabled)')
26+
parser.add_argument('--no_undistort', action='store_true', help='Do not undistort output images (only supported with certain devices)')
2427
parser.add_argument('--image_format', type=str, default='jpg', help="Color image format (use 'png' for top quality)")
2528
parser.add_argument("--preview", help="Show latest primary image as a preview", action="store_true")
2629
parser.add_argument("--preview3d", help="Show 3D visualization", action="store_true")
@@ -67,15 +70,34 @@ def grouping_function(row):
6770
grouped = df.assign(voxel_index=df.apply(grouping_function, axis=1)).groupby('voxel_index')
6871
return grouped.first().reset_index()[[c for c in df.columns if c != 'voxel_index']]
6972

70-
def blurScore(path):
71-
import cv2
73+
74+
def compute_cam_velocities(targetFrame, angularVelocity):
75+
# Image and pose data
76+
WToC = targetFrame.cameraPose.getWorldToCameraMatrix()
77+
vW = targetFrame.cameraPose.velocity
78+
vCam = WToC[:3, :3] @ [vW.x, vW.y, vW.z]
79+
vAngCam = WToC[:3, :3] @ [angularVelocity.x, angularVelocity.y, angularVelocity.z]
80+
return vCam, vAngCam
81+
82+
def blurScore(WToC, vCam, vAngCam, targetFrame, exposureTime):
7283
import numpy as np
73-
image = cv2.imread(path)
74-
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
75-
f_transform = np.fft.fft2(gray)
76-
f_transform_shifted = np.fft.fftshift(f_transform)
77-
magnitude_spectrum = np.abs(f_transform_shifted)
78-
return np.percentile(magnitude_spectrum, 95)
84+
sumVels = 0
85+
n = 0
86+
for mpObs in targetFrame.sparseFeatures:
87+
pW = mpObs.position
88+
pCam = (WToC @ [pW.x, pW.y, pW.z, 1])[:3]
89+
pointVelCam = vCam + np.cross(vAngCam, pCam)
90+
vPix = targetFrame.cameraPose.camera.getIntrinsicMatrix()[:2,:2] @ (pointVelCam[:2] / np.maximum(pCam[2], 1e-6))
91+
n += 1
92+
sumVels += np.linalg.norm(vPix)
93+
94+
if exposureTime > 0:
95+
sumVels *= exposureTime
96+
97+
# print('blur score %g (n = %d)' % (float(sumVels) / max(n, 1), n))
98+
99+
if n == 0: return 1e6
100+
return sumVels / n
79101

80102
def point_cloud_data_frame_to_ply(df, out_fn):
81103
with open(out_fn, 'wt') as f:
@@ -97,16 +119,50 @@ def point_cloud_data_frame_to_ply(df, out_fn):
97119
for prop in 'rgb': r.append(int(row[prop]))
98120
f.write(' '.join([str(v) for v in r]) + '\n')
99121

122+
def convert_distortion(cam):
123+
coeffs = cam.get('distortionCoefficients', None)
124+
if coeffs is None:
125+
return None
126+
127+
if all([c == 0.0 for c in coeffs]): return None
128+
129+
get_coeffs = lambda names: dict(zip(names.split(), coeffs))
130+
131+
model = 'OPENCV'
132+
if cam['model'] == 'brown-conrady':
133+
r = get_coeffs('k1 k2 p1 p2 k3 k4 k5 k6')
134+
elif cam['model'] == 'pinhole':
135+
r = get_coeffs('k1 k2 k3')
136+
r['p1'] = 0
137+
r['p2'] = 0
138+
elif cam['model'] == 'kannala-brandt4':
139+
model = 'OPENCV_FISHEYE'
140+
r = get_coeffs('k1 k2 k3 k4')
141+
else:
142+
raise RuntimeError(f"unsupported camera model: {cam['model']}")
143+
r['model'] = model
144+
r['cx'] = cam['principalPointX']
145+
r['cy'] = cam['principalPointY']
146+
r['fx'] = cam['focalLengthX']
147+
r['fy'] = cam['focalLengthY']
148+
return r
149+
100150
def convert_json_taichi_to_nerfstudio(d):
101151
import numpy as np
102-
def transform_camera(c):
103-
convention_change = np.array([
104-
[1, 0, 0, 0],
105-
[0,-1, 0, 0],
106-
[0, 0,-1, 0],
107-
[0, 0, 0, 1]
108-
])
109-
return (np.array(c) @ convention_change).tolist()
152+
CAM_CONVENTION_CHANGE = np.array([
153+
[1, 0, 0, 0],
154+
[0,-1, 0, 0],
155+
[0, 0,-1, 0],
156+
[0, 0, 0, 1]
157+
])
158+
159+
INV_CAM_CONVENTION_CHANGE = CAM_CONVENTION_CHANGE # works for this particular matrix
160+
161+
def transform_matrix_cam_to_world(c):
162+
return (np.array(c) @ CAM_CONVENTION_CHANGE).tolist()
163+
164+
def transform_camera_dir_vec(c):
165+
return (INV_CAM_CONVENTION_CHANGE[:3, :3] @ c).tolist()
110166

111167
by_camera = {}
112168
for c in d:
@@ -125,15 +181,29 @@ def transform_camera(c):
125181
"aabb_scale": 16,
126182
"frames": [],
127183
"orientation_override": "none", # stops Nerfstudio from breaking our "up" direction
184+
"auto_scale_poses_override": False,
128185
"ply_file_path": "./sparse_pc.ply"
129186
}
187+
188+
distortion = c.get('camera_distortion', None)
189+
if distortion is not None:
190+
for k, v in distortion.items():
191+
params[k] = v
192+
193+
for prop in ['rolling_shutter_time', 'exposure_time']:
194+
if c[prop] is not None and c[prop] != 0:
195+
params[prop] = c[prop]
196+
130197
cam_id = json.dumps(params, sort_keys=True)
131198
if cam_id not in by_camera:
132199
by_camera[cam_id] = params
133200

134201
converted = {
135202
'file_path': os.path.join("./images", c['image_path'].split('/')[-1]),
136-
"transform_matrix": transform_camera(c['T_pointcloud_camera'])
203+
"transform_matrix": transform_matrix_cam_to_world(c['T_pointcloud_camera']),
204+
"camera_linear_velocity": transform_camera_dir_vec(c['camera_linear_velocity']),
205+
"camera_angular_velocity": transform_camera_dir_vec(c['camera_angular_velocity']),
206+
"motion_blur_score": c["motion_blur_score"]
137207
}
138208
if 'depth_image_path' in c:
139209
converted['depth_file_path'] = os.path.join("./images", c['depth_image_path'].split('/')[-1])
@@ -237,12 +307,16 @@ def process(args):
237307
savedKeyFrames = {}
238308
pointClouds = {}
239309
sparsePointColors = {}
310+
blurScores = {}
240311
frameWidth = -1
241312
frameHeight = -1
242313
intrinsics = None
243314
visualizer = None
244315
isTracking = False
245316
finalMapWritten = False
317+
exposureTime = 0
318+
rollingShutterTime = 0
319+
cameraDistortion = None
246320

247321
def post_process_point_clouds(globalPointCloud, sparse_point_cloud_df):
248322
# Save point clouds
@@ -277,6 +351,7 @@ def processMappingOutput(output):
277351
nonlocal savedKeyFrames
278352
nonlocal pointClouds
279353
nonlocal sparsePointColors
354+
nonlocal blurScores
280355
nonlocal frameWidth
281356
nonlocal frameHeight
282357
nonlocal intrinsics
@@ -312,7 +387,11 @@ def processMappingOutput(output):
312387
frameWidth = targetFrame.image.getWidth()
313388
frameHeight = targetFrame.image.getHeight()
314389

315-
undistortedFrame = frameSet.getUndistortedFrame(targetFrame)
390+
frameSet = keyFrame.frameSet
391+
if args.no_undistort:
392+
undistortedFrame = targetFrame
393+
else:
394+
undistortedFrame = frameSet.getUndistortedFrame(targetFrame)
316395
if intrinsics is None: intrinsics = undistortedFrame.cameraPose.camera.getIntrinsicMatrix()
317396
img = undistortedFrame.image.toArray()
318397

@@ -322,15 +401,40 @@ def processMappingOutput(output):
322401

323402
# Find colors for sparse features
324403
SHOW_FEATURE_MARKERS = True
404+
SHOW_MOTION_BLUR = False
405+
406+
WToC = targetFrame.cameraPose.getWorldToCameraMatrix()
407+
vCam, vAngCam = compute_cam_velocities(targetFrame, keyFrame.angularVelocity)
408+
409+
blurScores[frameId] = blurScore(WToC, vCam, vAngCam, undistortedFrame, exposureTime)
410+
325411
for mpObs in undistortedFrame.sparseFeatures:
412+
pPix = [mpObs.pixelCoordinates.x, mpObs.pixelCoordinates.y]
413+
px = np.clip(round(pPix[0]), 0, img.shape[1]-1)
414+
py = np.clip(round(pPix[1]), 0, img.shape[0]-1)
326415
if mpObs.id not in sparsePointColors:
327-
px = np.clip(round(mpObs.pixelCoordinates.x), 0, img.shape[1]-1)
328-
py = np.clip(round(mpObs.pixelCoordinates.y), 0, img.shape[0]-1)
329416
rgb = list(img[py, px, ...].view(np.uint8))
330417
sparsePointColors[mpObs.id] = rgb
331-
if args.preview and SHOW_FEATURE_MARKERS:
332-
MARKER_COLOR = (0, 255, 0)
333-
cv2.circle(bgrImage, (px, py), 5, MARKER_COLOR, thickness=1)
418+
markerColor = (0, 255, 0)
419+
else:
420+
markerColor = (0, 128, 0)
421+
422+
if args.preview:
423+
if SHOW_FEATURE_MARKERS:
424+
cv2.circle(bgrImage, (px, py), 5, markerColor, thickness=1)
425+
if SHOW_MOTION_BLUR:
426+
BLUR_COLOR = (128, 255, 0)
427+
VISU_SCALE = 5
428+
429+
pW = mpObs.position
430+
pCam = (WToC @ [pW.x, pW.y, pW.z, 1])[:3]
431+
pointVelCam = vCam + np.cross(vAngCam, pCam)
432+
vPix = undistortedFrame.cameraPose.camera.getIntrinsicMatrix()[:2,:2] @ (pointVelCam[:2] / np.maximum(pCam[2], 1e-6))
433+
dt = float(VISU_SCALE) / 30 # visualization only
434+
vPix *= dt
435+
blurBegin = [int(c) for c in pPix - vPix*dt/2]
436+
blurEnd = [int(c) for c in pPix + vPix*dt/2]
437+
cv2.line(bgrImage, (blurBegin[0], blurBegin[1]), (blurEnd[0], blurEnd[1]), BLUR_COLOR, thickness=1)
334438

335439
# Legacy: support SDK versions which also produced images where frameSet.depthFrame.image was None
336440
if frameSet.depthFrame is not None and frameSet.depthFrame.image is not None and not useMono:
@@ -357,26 +461,32 @@ def processMappingOutput(output):
357461
sparseObservations = {}
358462
# OrderedDict to avoid undefined iteration order = different output files for the same input
359463
sparsePointCloud = OrderedDict()
360-
imageSharpness = []
464+
blurriness = []
361465
for frameId in output.map.keyFrames:
362-
imageSharpness.append((frameId, blurScore(f"{tmp_dir}/frame_{frameId:05}.{args.image_format}")))
466+
blurriness.append((frameId, blurScores.get(frameId, 1e6)))
363467

364468
# Look two images forward and two backwards, if current frame is blurriest, don't use it
365-
for i in range(len(imageSharpness)):
366-
if i + 2 > len(imageSharpness): break
367-
group = [imageSharpness[j+i] for j in range(-2,2)]
368-
group.sort(key=lambda x : x[1])
369-
cur = imageSharpness[i][0]
370-
if group[0][0] == cur:
371-
blurryImages[cur] = True
469+
if args.blur_filter_range != 0:
470+
assert(args.blur_filter_range > 1)
471+
blur_filter_radius_lo = int(math.ceil((args.blur_filter_range - 1) * 0.5))
472+
blur_filter_radius_hi = int(math.floor((args.blur_filter_range - 1) * 0.5))
473+
print('blur filter range [-%d, %d)' % (blur_filter_radius_lo, blur_filter_radius_hi+1))
474+
for i in range(blur_filter_radius_lo, max(0, len(blurriness) - blur_filter_radius_hi)):
475+
group = [blurriness[j+i] for j in range(-blur_filter_radius_lo,blur_filter_radius_hi+1)]
476+
group.sort(key=lambda x : x[1])
477+
cur = blurriness[i][0]
478+
if group[0][0] == cur:
479+
blurryImages[cur] = True
372480

373481
trainingFrames = []
374482
validationFrames = []
375483
globalPointCloud = []
376-
index = 0
484+
index = 1 # start from 1 to match COLMAP/Nerfstudio frame numbering (fragile!)
377485
name = os.path.split(args.output)[-1]
378486
for frameId in output.map.keyFrames:
379-
if blurryImages.get(frameId): continue # Skip blurry images
487+
if blurryImages.get(frameId):
488+
print('skipping blurry frame %s' % str(frameId))
489+
continue # Skip blurry images
380490

381491
# Image and pose data
382492
keyFrame = output.map.keyFrames.get(frameId)
@@ -397,15 +507,24 @@ def processMappingOutput(output):
397507
sparseObservations[frameId] = sparseObsForKeyFrame
398508

399509
# Camera data
510+
vCam, vAngCam = compute_cam_velocities(targetFrame, keyFrame.angularVelocity)
400511
frame = {
401512
"image_path": f"data/{name}/images/frame_{index:05}.{args.image_format}",
402513
"T_pointcloud_camera": cameraPose.getCameraToWorldMatrix().tolist(), # 4x4 matrix, the transformation matrix from camera coordinate to point cloud coordinate
403514
"camera_intrinsics": intrinsics.tolist(), # 3x3 matrix, the camera intrinsics matrix K
515+
"camera_linear_velocity": vCam.tolist(),
516+
"camera_angular_velocity": vAngCam.tolist(),
517+
"rolling_shutter_time": rollingShutterTime,
518+
"motion_blur_score": blurScores.get(frameId, 1e6),
519+
"exposure_time": exposureTime,
404520
"camera_height": frameHeight, # image height, in pixel
405521
"camera_width": frameWidth, # image width, in pixel
406522
"camera_id": index # camera id, not used
407523
}
408524

525+
if cameraDistortion is not None:
526+
frame['camera_distortion'] = cameraDistortion
527+
409528
oldImgName = f"{tmp_dir}/frame_{frameId:05}.{args.image_format}"
410529
newImgName = f"{args.output}/images/frame_{index:05}.{args.image_format}"
411530
shutil.move(oldImgName, newImgName)
@@ -494,14 +613,14 @@ def onMappingOutput(output):
494613
print(f"ERROR: {e}", flush=True)
495614
raise e
496615

497-
def detect_device_preset(input_dir):
616+
def parse_input_dir(input_dir):
498617
cameras = None
499618
calibrationJson = f"{input_dir}/calibration.json"
500619
if os.path.exists(calibrationJson):
501620
with open(calibrationJson) as f:
502621
calibration = json.load(f)
503622
if "cameras" in calibration:
504-
cameras = len(calibration["cameras"])
623+
cameras = calibration["cameras"]
505624
device = None
506625
metadataJson = f"{input_dir}/metadata.json"
507626
if os.path.exists(metadataJson):
@@ -540,9 +659,16 @@ def detect_device_preset(input_dir):
540659

541660
tmp_dir = tempfile.mkdtemp()
542661

543-
device_preset, cameras = detect_device_preset(args.input)
662+
device_preset, cameras = parse_input_dir(args.input)
663+
664+
if cameras is not None:
665+
cam = cameras[0]
666+
exposureTime = cam.get('exposureTimeSeconds', 0)
667+
rollingShutterTime = cam.get('shutterRollTimeSeconds', 0)
668+
if args.no_undistort:
669+
cameraDistortion = convert_distortion(cam)
544670

545-
useMono = args.mono or (cameras != None and cameras == 1)
671+
useMono = args.mono or (cameras != None and len(cameras) == 1)
546672

547673
if useMono: config['useStereo'] = False
548674

0 commit comments

Comments
 (0)