2
2
Post-process data in Spectacular AI format and convert it to input
3
3
for NeRF or Gaussian Splatting methods, or export optimized pointclouds in ply and pcd formats.
4
4
"""
5
+ import json
6
+ import os
7
+ from collections import OrderedDict
5
8
6
9
# --- The following mechanism allows using this both as a stand-alone
7
10
# script and as a subcommand in sai-cli.
@@ -27,16 +30,176 @@ def define_subparser(subparsers):
27
30
sub .set_defaults (func = process )
28
31
return define_args (sub )
29
32
33
+ def interpolate_missing_properties (df_source , df_query , k_nearest = 3 ):
34
+ from scipy .spatial import KDTree
35
+ xyz = list ('xyz' )
36
+
37
+ print ('generating a simplified point cloud (this may take a while...)' )
38
+
39
+ tree = KDTree (df_source [xyz ].values )
40
+ _ , ii = tree .query (df_query [xyz ], k = k_nearest )
41
+ n = df_query .shape [0 ]
42
+
43
+ df_result = pd .DataFrame (0 , index = range (n ), columns = df_source .columns )
44
+ df_result [xyz ] = df_query [xyz ]
45
+ other_cols = [c for c in df_source .columns if c not in xyz ]
46
+
47
+ for i in range (n ):
48
+ m = df_source .loc [ii [i ].tolist (), other_cols ].mean (axis = 0 )
49
+ df_result .loc [i , other_cols ] = m
50
+
51
+ return df_result
52
+
53
+ def exclude_points (df_source , df_exclude , radius ):
54
+ from scipy .spatial import KDTree
55
+ xyz = list ('xyz' )
56
+ tree = KDTree (df_exclude [xyz ].values )
57
+ ii = tree .query_ball_point (df_source [xyz ], r = radius , return_length = True )
58
+ mask = [l == 0 for l in ii ]
59
+ df_result = df_source .iloc [mask ]
60
+ return df_result
61
+
62
+ def voxel_decimate (df , cell_size ):
63
+ def grouping_function (row ):
64
+ return tuple ([round (row [c ] / cell_size ) for c in 'xyz' ])
65
+ grouped = df .assign (voxel_index = df .apply (grouping_function , axis = 1 )).groupby ('voxel_index' )
66
+ return grouped .first ().reset_index ()[[c for c in df .columns if c != 'voxel_index' ]]
67
+
68
+ def blurScore (path ):
69
+ import cv2
70
+ import numpy as np
71
+ image = cv2 .imread (path )
72
+ gray = cv2 .cvtColor (image , cv2 .COLOR_BGR2GRAY )
73
+ f_transform = np .fft .fft2 (gray )
74
+ f_transform_shifted = np .fft .fftshift (f_transform )
75
+ magnitude_spectrum = np .abs (f_transform_shifted )
76
+ return np .percentile (magnitude_spectrum , 95 )
77
+
78
+ def convert_json_taichi_to_nerfstudio (d ):
79
+ import numpy as np
80
+ def transform_camera (c ):
81
+ convention_change = np .array ([
82
+ [1 , 0 , 0 , 0 ],
83
+ [0 ,- 1 , 0 , 0 ],
84
+ [0 , 0 ,- 1 , 0 ],
85
+ [0 , 0 , 0 , 1 ]
86
+ ])
87
+ return (np .array (c ) @ convention_change ).tolist ()
88
+
89
+ by_camera = {}
90
+ for c in d :
91
+ k = c ['camera_intrinsics' ]
92
+ params = {
93
+ "fl_x" : k [0 ][0 ],
94
+ "fl_y" : k [1 ][1 ],
95
+ "k1" : 0 ,
96
+ "k2" : 0 ,
97
+ "p1" : 0 ,
98
+ "p2" : 0 ,
99
+ "cx" : k [0 ][2 ],
100
+ "cy" : k [1 ][2 ],
101
+ "w" : c ['camera_width' ],
102
+ "h" : c ['camera_height' ],
103
+ "aabb_scale" : 16 ,
104
+ 'frames' : []
105
+ }
106
+ cam_id = json .dumps (params , sort_keys = True )
107
+ if cam_id not in by_camera :
108
+ by_camera [cam_id ] = params
109
+
110
+ converted = {
111
+ 'file_path' : os .path .join ("./images" , c ['image_path' ].split ('/' )[- 1 ]),
112
+ "transform_matrix" : transform_camera (c ['T_pointcloud_camera' ])
113
+ }
114
+ if 'depth_image_path' in c :
115
+ converted ['depth_file_path' ] = os .path .join ("./images" , c ['depth_image_path' ].split ('/' )[- 1 ])
116
+
117
+ by_camera [cam_id ]['frames' ].append (converted )
118
+
119
+ if len (by_camera ) != 1 :
120
+ raise RuntimeError ("unexpected number of cameras" )
121
+
122
+ key , value = list (by_camera .items ())[0 ]
123
+ return value
124
+
125
+ # TODO: don't use "Taichi" as the intermediate format
126
+ def convert_json_taichi_to_colmap (pose_data , points_df , sparse_observations , nerfstudio_fake_obs = True ):
127
+ from scipy .spatial .transform import Rotation as R
128
+ import numpy as np
129
+
130
+ images = []
131
+ cameras = []
132
+ camera_id = 0
133
+ max_pt_id = 0
134
+ for image_id , c in enumerate (pose_data ):
135
+ k = c ['camera_intrinsics' ]
136
+ mat = np .linalg .inv (np .array (c ['T_pointcloud_camera' ]))
137
+ qx ,qy ,qz ,qw = R .from_matrix (mat [:3 ,:3 ]).as_quat ()
138
+ q = [qw , qx , qy , qz ]
139
+ p = list (mat [:3 , 3 ])
140
+ images .append ([image_id ] + list (q ) + list (p ) + [camera_id , os .path .split (c ['image_path' ])[- 1 ]])
141
+
142
+ points = []
143
+ for pt in sparse_observations .get (image_id , {}):
144
+ max_pt_id = max (max_pt_id , pt .id )
145
+ points .extend ([pt .pixelCoordinates .x , pt .pixelCoordinates .y , pt .id ])
146
+
147
+ if nerfstudio_fake_obs and len (points ) == 0 :
148
+ points = [100 ,100 ,0 ,200 ,200 ,1 ] # NeRFstudio loader will crash without this
149
+
150
+ images .append (points )
151
+
152
+ # TODO: variable intrinsics
153
+ if len (cameras ) == 0 :
154
+ cameras = [[
155
+ camera_id ,
156
+ 'PINHOLE' ,
157
+ c ['camera_width' ],
158
+ c ['camera_height' ],
159
+ k [0 ][0 ],
160
+ k [1 ][1 ],
161
+ k [0 ][2 ],
162
+ k [1 ][2 ]
163
+ ]]
164
+
165
+ points = []
166
+ for _ , row in points_df .iterrows ():
167
+ if 'id' in row :
168
+ point_id = row ['id' ]
169
+ else :
170
+ point_id = 0
171
+
172
+ if point_id == 0 :
173
+ point_id = max_pt_id + 1
174
+ max_pt_id += 1
175
+
176
+ point = [
177
+ int (point_id ),
178
+ row ['x' ],
179
+ row ['y' ],
180
+ row ['z' ],
181
+ round (row ['r' ]),
182
+ round (row ['g' ]),
183
+ round (row ['b' ])
184
+ ]
185
+
186
+ # TODO: compute reprojection errors here if really necessary for some use case
187
+ if nerfstudio_fake_obs :
188
+ fake_err = 1
189
+ img_id , point_id = 0 , 0
190
+ point .extend ([fake_err , img_id , point_id ])
191
+
192
+ points .append (point )
193
+
194
+ return points , images , cameras
195
+
30
196
def process (args ):
31
197
import spectacularAI
32
198
import cv2
33
- import json
34
- import os
35
199
import shutil
36
200
import tempfile
37
201
import numpy as np
38
202
import pandas as pd
39
- from collections import OrderedDict
40
203
41
204
# Overwrite format if output is set to pointcloud
42
205
if args .output .endswith (".ply" ):
@@ -46,157 +209,6 @@ def process(args):
46
209
47
210
useMono = None
48
211
49
- def interpolate_missing_properties (df_source , df_query , k_nearest = 3 ):
50
- from scipy .spatial import KDTree
51
- xyz = list ('xyz' )
52
-
53
- print ('generating a simplified point cloud (this may take a while...)' )
54
-
55
- tree = KDTree (df_source [xyz ].values )
56
- _ , ii = tree .query (df_query [xyz ], k = k_nearest )
57
- n = df_query .shape [0 ]
58
-
59
- df_result = pd .DataFrame (0 , index = range (n ), columns = df_source .columns )
60
- df_result [xyz ] = df_query [xyz ]
61
- other_cols = [c for c in df_source .columns if c not in xyz ]
62
-
63
- for i in range (n ):
64
- m = df_source .loc [ii [i ].tolist (), other_cols ].mean (axis = 0 )
65
- df_result .loc [i , other_cols ] = m
66
-
67
- return df_result
68
-
69
- def exclude_points (df_source , df_exclude , radius ):
70
- from scipy .spatial import KDTree
71
- xyz = list ('xyz' )
72
- tree = KDTree (df_exclude [xyz ].values )
73
- ii = tree .query_ball_point (df_source [xyz ], r = radius , return_length = True )
74
- mask = [l == 0 for l in ii ]
75
- df_result = df_source .iloc [mask ]
76
- return df_result
77
-
78
- def voxel_decimate (df , cell_size ):
79
- def grouping_function (row ):
80
- return tuple ([round (row [c ] / cell_size ) for c in 'xyz' ])
81
- grouped = df .assign (voxel_index = df .apply (grouping_function , axis = 1 )).groupby ('voxel_index' )
82
- return grouped .first ().reset_index ()[[c for c in df .columns if c != 'voxel_index' ]]
83
-
84
- def convert_json_taichi_to_nerfstudio (d ):
85
- def transform_camera (c ):
86
- convention_change = np .array ([
87
- [1 , 0 , 0 , 0 ],
88
- [0 ,- 1 , 0 , 0 ],
89
- [0 , 0 ,- 1 , 0 ],
90
- [0 , 0 , 0 , 1 ]
91
- ])
92
- return (np .array (c ) @ convention_change ).tolist ()
93
-
94
- by_camera = {}
95
- for c in d :
96
- k = c ['camera_intrinsics' ]
97
- params = {
98
- "fl_x" : k [0 ][0 ],
99
- "fl_y" : k [1 ][1 ],
100
- "k1" : 0 ,
101
- "k2" : 0 ,
102
- "p1" : 0 ,
103
- "p2" : 0 ,
104
- "cx" : k [0 ][2 ],
105
- "cy" : k [1 ][2 ],
106
- "w" : c ['camera_width' ],
107
- "h" : c ['camera_height' ],
108
- "aabb_scale" : 16 ,
109
- 'frames' : []
110
- }
111
- cam_id = json .dumps (params , sort_keys = True )
112
- if cam_id not in by_camera :
113
- by_camera [cam_id ] = params
114
-
115
- converted = {
116
- 'file_path' : os .path .join ("./images" , c ['image_path' ].split ('/' )[- 1 ]),
117
- "transform_matrix" : transform_camera (c ['T_pointcloud_camera' ])
118
- }
119
- if 'depth_image_path' in c :
120
- converted ['depth_file_path' ] = os .path .join ("./images" , c ['depth_image_path' ].split ('/' )[- 1 ])
121
-
122
- by_camera [cam_id ]['frames' ].append (converted )
123
-
124
- if len (by_camera ) != 1 :
125
- raise RuntimeError ("unexpected number of cameras" )
126
-
127
- key , value = list (by_camera .items ())[0 ]
128
- return value
129
-
130
- # TODO: don't use "Taichi" as the intermediate format
131
- def convert_json_taichi_to_colmap (pose_data , points_df , sparse_observations , nerfstudio_fake_obs = True ):
132
- from scipy .spatial .transform import Rotation as R
133
-
134
- images = []
135
- cameras = []
136
- camera_id = 0
137
- max_pt_id = 0
138
- for image_id , c in enumerate (pose_data ):
139
- k = c ['camera_intrinsics' ]
140
- mat = np .linalg .inv (np .array (c ['T_pointcloud_camera' ]))
141
- qx ,qy ,qz ,qw = R .from_matrix (mat [:3 ,:3 ]).as_quat ()
142
- q = [qw , qx , qy , qz ]
143
- p = list (mat [:3 , 3 ])
144
- images .append ([image_id ] + list (q ) + list (p ) + [camera_id , os .path .split (c ['image_path' ])[- 1 ]])
145
-
146
- points = []
147
- for pt in sparse_observations .get (image_id , {}):
148
- max_pt_id = max (max_pt_id , pt .id )
149
- points .extend ([pt .pixelCoordinates .x , pt .pixelCoordinates .y , pt .id ])
150
-
151
- if nerfstudio_fake_obs and len (points ) == 0 :
152
- points = [100 ,100 ,0 ,200 ,200 ,1 ] # NeRFstudio loader will crash without this
153
-
154
- images .append (points )
155
-
156
- # TODO: variable intrinsics
157
- if len (cameras ) == 0 :
158
- cameras = [[
159
- camera_id ,
160
- 'PINHOLE' ,
161
- c ['camera_width' ],
162
- c ['camera_height' ],
163
- k [0 ][0 ],
164
- k [1 ][1 ],
165
- k [0 ][2 ],
166
- k [1 ][2 ]
167
- ]]
168
-
169
- points = []
170
- for _ , row in points_df .iterrows ():
171
- if 'id' in row :
172
- point_id = row ['id' ]
173
- else :
174
- point_id = 0
175
-
176
- if point_id == 0 :
177
- point_id = max_pt_id + 1
178
- max_pt_id += 1
179
-
180
- point = [
181
- int (point_id ),
182
- row ['x' ],
183
- row ['y' ],
184
- row ['z' ],
185
- round (row ['r' ]),
186
- round (row ['g' ]),
187
- round (row ['b' ])
188
- ]
189
-
190
- # TODO: compute reprojection errors here if really necessary for some use case
191
- if nerfstudio_fake_obs :
192
- fake_err = 1
193
- img_id , point_id = 0 , 0
194
- point .extend ([fake_err , img_id , point_id ])
195
-
196
- points .append (point )
197
-
198
- return points , images , cameras
199
-
200
212
# Globals
201
213
savedKeyFrames = {}
202
214
pointClouds = {}
@@ -208,14 +220,6 @@ def convert_json_taichi_to_colmap(pose_data, points_df, sparse_observations, ner
208
220
isTracking = False
209
221
finalMapWritten = False
210
222
211
- def blurScore (path ):
212
- image = cv2 .imread (path )
213
- gray = cv2 .cvtColor (image , cv2 .COLOR_BGR2GRAY )
214
- f_transform = np .fft .fft2 (gray )
215
- f_transform_shifted = np .fft .fftshift (f_transform )
216
- magnitude_spectrum = np .abs (f_transform_shifted )
217
- return np .percentile (magnitude_spectrum , 95 )
218
-
219
223
def post_process_point_clouds (globalPointCloud , sparse_point_cloud_df ):
220
224
# Save point clouds
221
225
if len (globalPointCloud ) == 0 :
0 commit comments