Skip to content

Commit 9dadff2

Browse files
author
zzjlxzr2012
committed
feat(colmap): integrate EXIF pose prior, GPS alignment, model normalization, and geo transforms export
1 parent 50e0e3c commit 9dadff2

File tree

5 files changed

+289
-35
lines changed

5 files changed

+289
-35
lines changed

nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset):
3131

3232
camera_type: Literal["perspective", "fisheye", "equirectangular", "pinhole", "simple_pinhole"] = "perspective"
3333
"""Camera model to use."""
34-
matching_method: Literal["exhaustive", "sequential", "vocab_tree"] = "vocab_tree"
34+
matching_method: Literal["exhaustive", "sequential", "vocab_tree", "spatial"] = "vocab_tree"
3535
"""Feature matching method to use. Vocab tree is recommended for a balance of speed
3636
and accuracy. Exhaustive is slower but more accurate. Sequential is faster but
37-
should only be used for videos."""
37+
should only be used for videos. Spatial can leverage EXIF GPS priors for pairing."""
3838
sfm_tool: Literal["any", "colmap", "hloc"] = "any"
3939
"""Structure from motion tool to use. Colmap will use sift features, hloc can use
4040
many modern methods such as superpoint features and superglue matcher"""
@@ -104,6 +104,26 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset):
104104
use_single_camera_mode: bool = True
105105
"""Whether to assume all images taken with the same camera characteristics, set to False for multiple cameras in colmap (only works with hloc sfm_tool).
106106
"""
107+
# New options for pose priors and alignment
108+
use_pose_prior: bool = False
109+
"""If True, use EXIF pose priors by running pose_prior_mapper and optionally align to priors."""
110+
prior_position_std: float = 2.0
111+
"""Standard deviation (meters) for x/y/z prior used by pose_prior_mapper."""
112+
overwrite_priors_covariance: bool = True
113+
"""Whether to overwrite priors covariance in database when running pose_prior_mapper."""
114+
align_model_to_priors: bool = False
115+
"""If True, run model_aligner to align the reconstruction to GPS priors (writes back into sparse/0)."""
116+
alignment_max_error: Optional[float] = None
117+
"""Max alignment error for model_aligner. Defaults to prior_position_std if not set."""
118+
# Normalization options
119+
normalize_model: bool = False
120+
"""If True, apply model_transformer to center and scale the reconstructed model for numeric stability."""
121+
normalization_center: Literal["bbox", "mean"] = "bbox"
122+
"""How to compute the model center for normalization (bbox center or mean point)."""
123+
normalization_target_diagonal: float = 4.0
124+
"""Target diagonal length (meters) for the normalized model if no explicit scale is provided."""
125+
normalization_scale: Optional[float] = None
126+
"""Explicit normalization scale; if set, overrides normalization_target_diagonal."""
107127

108128
@staticmethod
109129
def default_colmap_path() -> Path:
@@ -219,6 +239,15 @@ def _run_colmap(self, mask_path: Optional[Path] = None):
219239
matching_method=self.matching_method,
220240
refine_intrinsics=self.refine_intrinsics,
221241
colmap_cmd=self.colmap_cmd,
242+
use_pose_prior=self.use_pose_prior,
243+
prior_position_std=self.prior_position_std,
244+
overwrite_priors_covariance=self.overwrite_priors_covariance,
245+
align_model_to_priors=self.align_model_to_priors,
246+
alignment_max_error=self.alignment_max_error,
247+
normalize_model=self.normalize_model,
248+
normalization_center=self.normalization_center,
249+
normalization_target_diagonal=self.normalization_target_diagonal,
250+
normalization_scale=self.normalization_scale,
222251
)
223252
elif sfm_tool == "hloc":
224253
if mask_path is not None:
@@ -227,12 +256,14 @@ def _run_colmap(self, mask_path: Optional[Path] = None):
227256
assert feature_type is not None
228257
assert matcher_type is not None
229258
assert matcher_type != "NN" # Only used for colmap.
259+
# hloc does not support 'spatial' matching_method; map it to 'vocab_tree' for compatibility
260+
hloc_matching_method = self.matching_method if self.matching_method != "spatial" else "vocab_tree"
230261
hloc_utils.run_hloc(
231262
image_dir=image_dir,
232263
colmap_dir=self.absolute_colmap_path,
233264
camera_model=CAMERA_MODELS[self.camera_type],
234265
verbose=self.verbose,
235-
matching_method=self.matching_method,
266+
matching_method=hloc_matching_method,
236267
feature_type=feature_type,
237268
matcher_type=matcher_type,
238269
refine_pixsfm=self.refine_pixsfm,

nerfstudio/process_data/colmap_utils.py

Lines changed: 98 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def get_vocab_tree() -> Path:
7373
vocab_tree_filename = Path(appdirs.user_data_dir("nerfstudio")) / "vocab_tree.fbow"
7474

7575
if not vocab_tree_filename.exists():
76-
r = requests.get("https://demuc.de/colmap/vocab_tree_flickr100K_words32K.bin", stream=True)
76+
r = requests.get("http://10.126.13.216:9000/root/firmware/vocab_tree_faiss_flickr100K_words256K.bin", stream=True)
7777
vocab_tree_filename.parent.mkdir(parents=True, exist_ok=True)
7878
with open(vocab_tree_filename, "wb") as f:
7979
total_length = r.headers.get("content-length")
@@ -96,9 +96,18 @@ def run_colmap(
9696
camera_mask_path: Optional[Path] = None,
9797
gpu: bool = True,
9898
verbose: bool = False,
99-
matching_method: Literal["vocab_tree", "exhaustive", "sequential"] = "vocab_tree",
99+
matching_method: Literal["vocab_tree", "exhaustive", "sequential", "spatial"] = "vocab_tree",
100100
refine_intrinsics: bool = True,
101101
colmap_cmd: str = "colmap",
102+
use_pose_prior: bool = False,
103+
prior_position_std: float = 2.0,
104+
overwrite_priors_covariance: bool = True,
105+
align_model_to_priors: bool = False,
106+
alignment_max_error: Optional[float] = None,
107+
normalize_model: bool = False,
108+
normalization_center: Literal["bbox", "mean"] = "bbox",
109+
normalization_target_diagonal: float = 4.0,
110+
normalization_scale: Optional[float] = None,
102111
) -> None:
103112
"""Runs COLMAP on the images.
104113
@@ -112,6 +121,15 @@ def run_colmap(
112121
matching_method: Matching method to use.
113122
refine_intrinsics: If True, refine intrinsics.
114123
colmap_cmd: Path to the COLMAP executable.
124+
use_pose_prior: If True, use pose_prior_mapper to incorporate EXIF pose priors.
125+
prior_position_std: Prior position standard deviation in meters for x/y/z.
126+
overwrite_priors_covariance: If True, overwrite priors covariance in database when mapping.
127+
align_model_to_priors: If True, run model_aligner to align the reconstruction to GPS priors.
128+
alignment_max_error: Max alignment error (falls back to prior_position_std if None).
129+
normalize_model: If True, apply a similarity transform to center and scale the model with model_transformer.
130+
normalization_center: How to compute center (bbox center or mean point).
131+
normalization_target_diagonal: Target diagonal length (meters) to scale the model to (if normalization_scale not given).
132+
normalization_scale: Explicit scale factor. If provided, overrides normalization_target_diagonal.
115133
"""
116134

117135
colmap_version = get_colmap_version(colmap_cmd)
@@ -126,7 +144,7 @@ def run_colmap(
126144
f"--image_path {image_dir}",
127145
"--ImageReader.single_camera 1",
128146
f"--ImageReader.camera_model {camera_model.value}",
129-
f"--SiftExtraction.use_gpu {int(gpu)}",
147+
# f"--SiftExtraction.use_gpu={bool(gpu)}",
130148
]
131149
if camera_mask_path is not None:
132150
feature_extractor_cmd.append(f"--ImageReader.camera_mask_path {camera_mask_path}")
@@ -140,7 +158,7 @@ def run_colmap(
140158
feature_matcher_cmd = [
141159
f"{colmap_cmd} {matching_method}_matcher",
142160
f"--database_path {colmap_dir / 'database.db'}",
143-
f"--SiftMatching.use_gpu {int(gpu)}",
161+
# f"--SiftMatching.use_gpu={bool(gpu)}",
144162
]
145163
if matching_method == "vocab_tree":
146164
vocab_tree_filename = get_vocab_tree()
@@ -150,19 +168,32 @@ def run_colmap(
150168
run_command(feature_matcher_cmd, verbose=verbose)
151169
CONSOLE.log("[bold green]:tada: Done matching COLMAP features.")
152170

153-
# Bundle adjustment
171+
# Mapping / bundle adjustment
154172
sparse_dir = colmap_dir / "sparse"
155173
sparse_dir.mkdir(parents=True, exist_ok=True)
156-
mapper_cmd = [
157-
f"{colmap_cmd} mapper",
174+
175+
# Choose mapper variant
176+
mapper_command_name = "pose_prior_mapper" if use_pose_prior else "mapper"
177+
178+
mapper_cmd_parts = [
179+
f"{colmap_cmd} {mapper_command_name}",
158180
f"--database_path {colmap_dir / 'database.db'}",
159181
f"--image_path {image_dir}",
160182
f"--output_path {sparse_dir}",
161183
]
162-
if colmap_version >= Version("3.7"):
163-
mapper_cmd.append("--Mapper.ba_global_function_tolerance=1e-6")
164184

165-
mapper_cmd = " ".join(mapper_cmd)
185+
if not use_pose_prior and colmap_version >= Version("3.7"):
186+
mapper_cmd_parts.append("--Mapper.ba_global_function_tolerance=1e-6")
187+
188+
if use_pose_prior:
189+
# Set symmetric priors std for x/y/z and optionally overwrite covariance
190+
mapper_cmd_parts.append(f"--prior_position_std_x {prior_position_std}")
191+
mapper_cmd_parts.append(f"--prior_position_std_y {prior_position_std}")
192+
mapper_cmd_parts.append(f"--prior_position_std_z {prior_position_std}")
193+
if overwrite_priors_covariance:
194+
mapper_cmd_parts.append("--overwrite_priors_covariance 1")
195+
196+
mapper_cmd = " ".join(mapper_cmd_parts)
166197

167198
with status(
168199
msg="[bold yellow]Running COLMAP bundle adjustment... (This may take a while)",
@@ -172,6 +203,63 @@ def run_colmap(
172203
run_command(mapper_cmd, verbose=verbose)
173204
CONSOLE.log("[bold green]:tada: Done COLMAP bundle adjustment.")
174205

206+
# Optional alignment to GPS priors; write back into sparse/0 to keep downstream unchanged
207+
if align_model_to_priors:
208+
align_cmd_parts = [
209+
f"{colmap_cmd} model_aligner",
210+
f"--input_path {sparse_dir}/0",
211+
f"--output_path {sparse_dir}/0",
212+
f"--database_path {colmap_dir / 'database.db'}",
213+
]
214+
max_err = alignment_max_error if alignment_max_error is not None else prior_position_std
215+
align_cmd_parts.append(f"--alignment_max_error {max_err}")
216+
align_cmd = " ".join(align_cmd_parts)
217+
with status(msg="[bold yellow]Aligning model to pose priors...", spinner="dots", verbose=verbose):
218+
run_command(align_cmd, verbose=verbose)
219+
CONSOLE.log("[bold green]:tada: Done aligning model to pose priors.")
220+
221+
# Optional normalization to human scale and centered coordinates using model_transformer
222+
if normalize_model:
223+
recon_dir = sparse_dir / "0"
224+
try:
225+
ptid_to_info = read_points3D_binary(recon_dir / "points3D.bin")
226+
except Exception as e:
227+
CONSOLE.print(f"[bold yellow]Warning: Could not read points3D for normalization: {e}")
228+
ptid_to_info = {}
229+
if len(ptid_to_info) == 0:
230+
CONSOLE.print("[bold yellow]Warning: No 3D points to estimate normalization. Skipping normalization.")
231+
else:
232+
import numpy as np # local import to avoid overhead unless needed
233+
pts = np.array([p.xyz for p in ptid_to_info.values()], dtype=np.float64)
234+
if normalization_center == "mean":
235+
Cx, Cy, Cz = pts.mean(axis=0).tolist()
236+
else:
237+
mins = pts.min(axis=0)
238+
maxs = pts.max(axis=0)
239+
Cx, Cy, Cz = ((mins + maxs) * 0.5).tolist()
240+
diag = float(np.linalg.norm(pts.max(axis=0) - pts.min(axis=0)))
241+
if normalization_scale is not None:
242+
s = float(normalization_scale)
243+
else:
244+
eps = 1e-9
245+
s = float(normalization_target_diagonal) / max(diag, eps)
246+
# Forward transform desired: x' = s * (x - C) = s*x + t, with t = -s*C
247+
tx, ty, tz = (-s * Cx, -s * Cy, -s * Cz)
248+
# Write transform in format: scale qw qx qy qz tx ty tz (identity rotation)
249+
transform_path = recon_dir / "normalization_transform.txt"
250+
with open(transform_path, "w", encoding="utf-8") as f:
251+
f.write(f"{s:.12g} 1 0 0 0 {tx:.12g} {ty:.12g} {tz:.12g}\n")
252+
transform_cmd_parts = [
253+
f"{colmap_cmd} model_transformer",
254+
f"--input_path {recon_dir}",
255+
f"--output_path {recon_dir}",
256+
f"--transform_path {transform_path}",
257+
]
258+
transform_cmd = " ".join(transform_cmd_parts)
259+
with status(msg="[bold yellow]Normalizing model scale and center...", spinner="dots", verbose=verbose):
260+
run_command(transform_cmd, verbose=verbose)
261+
CONSOLE.log("[bold green]:tada: Done normalizing model (model_transformer).")
262+
175263
if refine_intrinsics:
176264
with status(msg="[bold yellow]Refine intrinsics...", spinner="dqpb", verbose=verbose):
177265
bundle_adjuster_cmd = [

nerfstudio/process_data/process_data_utils.py

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,7 @@ def convert_video_to_images(
188188
for dir in downscale_dirs:
189189
dir.mkdir(parents=True, exist_ok=True)
190190

191-
downscale_chain = (
192-
f"split={num_downscales + 1}"
193-
+ "".join([f"[t{i}]" for i in range(num_downscales + 1)])
194-
+ ";"
195-
+ ";".join(downscale_chains)
196-
)
191+
# We will construct the split size and outputs later per-frame based on whether [out0] is needed
197192

198193
ffmpeg_cmd += " -vsync vfr"
199194

@@ -212,7 +207,7 @@ def convert_video_to_images(
212207
ffmpeg_cmd += " -pix_fmt bgr8"
213208
select_cmd = ""
214209

215-
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_chain}"' + "".join(
210+
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}"' + "".join(
216211
[f' -map "[out{i}]" "{downscale_paths[i]}"' for i in range(num_downscales + 1)]
217212
)
218213

@@ -294,19 +289,30 @@ def copy_images_list(
294289
pass
295290
copied_image_paths.append(copied_image_path)
296291

292+
# Early return: if there is no transformation/downscale requested, avoid re-encoding to preserve EXIF
293+
no_transform_requested = (
294+
num_downscales == 0
295+
and crop_border_pixels is None
296+
and (crop_factor == (0.0, 0.0, 0.0, 0.0))
297+
and upscale_factor is None
298+
and same_dimensions
299+
)
300+
if no_transform_requested:
301+
if len(image_paths) == 0:
302+
CONSOLE.log("[bold red]:skull: No usable images in the data folder.")
303+
else:
304+
CONSOLE.log(f"[bold green]:tada: Done copying images with prefix '{image_prefix}'.")
305+
return copied_image_paths
306+
297307
nn_flag = "" if not nearest_neighbor else ":flags=neighbor"
308+
# Build downscale graph labels. We will decide later whether to emit [out0] (base) depending on whether base transform is needed.
298309
downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}{nn_flag}[out{i}]" for i in range(num_downscales + 1)]
299310
downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]
300311

301312
for dir in downscale_dirs:
302313
dir.mkdir(parents=True, exist_ok=True)
303314

304-
downscale_chain = (
305-
f"split={num_downscales + 1}"
306-
+ "".join([f"[t{i}]" for i in range(num_downscales + 1)])
307-
+ ";"
308-
+ ";".join(downscale_chains)
309-
)
315+
# We will construct the split size and outputs later per-frame based on whether [out0] is needed
310316

311317
num_frames = len(image_paths)
312318
# ffmpeg batch commands assume all images are the same dimensions.
@@ -330,13 +336,54 @@ def copy_images_list(
330336
if upscale_factor is not None:
331337
select_cmd = f"[0:v]scale=iw*{upscale_factor}:ih*{upscale_factor}:flags=neighbor[upscaled];[upscaled]"
332338

333-
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_chain}"' + "".join(
334-
[
335-
f' -map "[out{i}]" -q:v 2 "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"'
336-
for i in range(num_downscales + 1)
337-
]
339+
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}"' + "".join(
340+
[f' -map "[out{i}]" "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"' for i in range(num_downscales + 1)]
338341
)
339342

343+
# Decide whether to overwrite base images ([out0])
344+
need_transform_base = (
345+
crop_border_pixels is not None or (crop_factor != (0.0, 0.0, 0.0, 0.0)) or upscale_factor is not None or not same_dimensions
346+
)
347+
348+
# Build filter graph: if base not needed, split only into downscaled outputs [out1..outN]; otherwise include [out0]
349+
if num_downscales > 0:
350+
if need_transform_base:
351+
split_targets = [f"[t{i}]" for i in range(num_downscales + 1)] # include base
352+
chains = ";".join(downscale_chains) # [out0..outN]
353+
downscale_graph = (
354+
f"split={num_downscales + 1}" + "".join(split_targets) + ";" + chains
355+
)
356+
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_graph}"'
357+
mapping_entries = [
358+
f' -map "[out0]" -map_metadata 0 -q:v 2 "{downscale_dirs[0] / f"{framename}{copied_image_paths[0].suffix}"}"'
359+
]
360+
for i in range(1, num_downscales + 1):
361+
mapping_entries.append(
362+
f' -map "[out{i}]" -map_metadata 0 -q:v 2 "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"'
363+
)
364+
downscale_cmd += "".join(mapping_entries)
365+
else:
366+
# Only emit downscaled outputs; reindex to start from out0 to avoid gaps and empty maps
367+
# Build chains for i=1..N, then relabel [out{i}] -> [out{i-1}] via mapping labels
368+
split_targets = [f"[t{i}]" for i in range(1, num_downscales + 1)]
369+
chains = ";".join([f"[t{i}]scale=iw/{2**i}:ih/{2**i}{nn_flag}[out{i-1}]" for i in range(1, num_downscales + 1)])
370+
downscale_graph = (
371+
f"split={num_downscales}" + "".join(split_targets) + ";" + chains
372+
)
373+
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_graph}"'
374+
mapping_entries = []
375+
for i in range(num_downscales):
376+
# map out{i} to images_{2**(i+1)}
377+
out_dir = downscale_dirs[i + 1]
378+
mapping_entries.append(
379+
f' -map "[out{i}]" -map_metadata 0 -q:v 2 "{out_dir / f"{framename}{copied_image_paths[0].suffix}"}"'
380+
)
381+
downscale_cmd += "".join(mapping_entries)
382+
else:
383+
# No downscales requested but we got here due to other transforms; keep single output
384+
downscale_graph = ""
385+
downscale_cmd = ""
386+
340387
ffmpeg_cmd += downscale_cmd
341388
if verbose:
342389
CONSOLE.log(f"... {ffmpeg_cmd}")

0 commit comments

Comments
 (0)