fix bugs in COLMAP images text loading

NotMorven · NotMorven · commit 923159a64418 · 2026-01-28T14:58:03.000+08:00
Signed-off-by: NotMorven &lt;morven126@163.com&gt;
diff --git a/fvdb_reality_capture/sfm_scene/_colmap_utils/scene_manager.py b/fvdb_reality_capture/sfm_scene/_colmap_utils/scene_manager.py
@@ -74,7 +74,9 @@ def load_colmap_project_file(self, project_file=None, image_path=None):
                 pass
 
         if self.image_path is None:
-            logger = logging.getLogger(f"{self.__class__.__module__}.{self.__class__.__name__}")
+            logger = logging.getLogger(
+                f"{self.__class__.__module__}.{self.__class__.__name__}"
+            )
             logger.debug("Warning: image_path not found for reconstruction")
         elif not self.image_path.endswith("/"):
             self.image_path += "/"
@@ -123,7 +125,9 @@ def _load_cameras_txt(self, input_file):
 
                 data = line.split()
                 camera_id = int(data[0])
-                self.cameras[camera_id] = Camera(data[1], int(data[2]), int(data[3]), map(float, data[4:]))
+                self.cameras[camera_id] = Camera(
+                    data[1], int(data[2]), int(data[3]), map(float, data[4:])
+                )
                 self.last_camera_id = max(self.last_camera_id, camera_id)
 
     # ---------------------------------------------------------------------------
@@ -169,7 +173,9 @@ def _load_images_bin(self, input_file):
 
                 ids_array = array.array("Q")
                 ids_array.frombytes(points_elements[:, 2].tobytes())
-                image.point3D_ids = np.array(ids_array, dtype=np.uint64).reshape((num_points2D,))
+                image.point3D_ids = np.array(ids_array, dtype=np.uint64).reshape(
+                    (num_points2D,)
+                )
 
                 # automatically remove points without an associated 3D point
                 # mask = (image.point3D_ids != SceneManager.INVALID_POINT3D)
@@ -184,42 +190,29 @@ def _load_images_bin(self, input_file):
     def _load_images_txt(self, input_file):
         self.images = OrderedDict()
 
-        with open(input_file, "r") as f:
-            is_camera_description_line = False
-
-            for line in iter(lambda: f.readline().strip(), ""):
-                if not line or line.startswith("#"):
-                    continue
-
-                is_camera_description_line = not is_camera_description_line
-
-                data = line.split()
-
-                if is_camera_description_line:
-                    read_quat = np.array(list(map(float, data[1:5])))
-                    read_pos = np.array(list(map(float, data[5:8])))
-                    image_id = int(data[0])
-                    image = Image(
-                        data[-1],
-                        int(data[-2]),
-                        Quaternion(read_quat),
-                        read_pos,
+        with open(input_file, "r") as fid:
+            while True:
+                line = fid.readline()
+                if not line:
+                    break
+                line = line.strip()
+                if len(line) > 0 and line[0] != "#":
+                    elems = line.split()
+                    image_id = int(elems[0])
+                    qvec = np.array(list(map(float, elems[1:5])))
+                    tvec = np.array(list(map(float, elems[5:8])))
+                    camera_id = int(elems[8])
+                    image_name = elems[9]
+                    elems = fid.readline().split()
+                    xys = np.column_stack(
+                        [tuple(map(float, elems[0::3])), tuple(map(float, elems[1::3]))]
                     )
-                else:
-                    points_2d_x = [float(x) for x in data[::3]]
-                    points_2d_y = [float(y) for y in data[1::3]]
-                    point3d_ids = [np.uint64(pid) for pid in data[2::3]]
-                    image.points2D = np.array([points_2d_x, points_2d_y]).T
-                    image.point3D_ids = np.array(point3d_ids, dtype=np.uint64)
-
-                    # automatically remove points without an associated 3D point
-                    # mask = (image.point3D_ids != SceneManager.INVALID_POINT3D)
-                    # image.points2D = image.points2D[mask]
-                    # image.point3D_ids = image.point3D_ids[mask]
-
-                    self.images[image_id] = image
-                    self.name_to_image_id[image.name] = image_id
-
+                    point3D_ids = np.array(tuple(map(int, elems[2::3])))
+                    self.images[image_id] = Image(
+                        image_name, camera_id, Quaternion(qvec), tvec
+                    )
+                    self.images[image_id].points2D = xys
+                    self.images[image_id].point3D_ids = point3D_ids
                     self.last_image_id = max(self.last_image_id, image_id)
 
     # ---------------------------------------------------------------------------
@@ -261,7 +254,9 @@ def _load_points3D_bin(self, input_file):
 
                 data = struct.unpack(f"{2*track_len}I", f.read(2 * track_len * 4))
 
-                self.point3D_id_to_images[self.point3D_ids[i]] = np.array(data, dtype=np.uint32).reshape(track_len, 2)
+                self.point3D_id_to_images[self.point3D_ids[i]] = np.array(
+                    data, dtype=np.uint32
+                ).reshape(track_len, 2)
 
     def _load_points3D_txt(self, input_file):
         self.points3D = []
@@ -328,7 +323,11 @@ def _save_cameras_bin(self, output_file):
             camera_struct = struct.Struct("IiLL")
 
             for camera_id, camera in sorted(self.cameras.items()):
-                fid.write(camera_struct.pack(camera_id, camera.camera_type, camera.width, camera.height))
+                fid.write(
+                    camera_struct.pack(
+                        camera_id, camera.camera_type, camera.width, camera.height
+                    )
+                )
                 # TODO (True): should move this into the Camera class
                 fid.write(camera.get_params().tobytes())
 
@@ -368,7 +367,9 @@ def _save_images_bin(self, output_file):
                 fid.write(struct.pack("I", image.camera_id))
                 fid.write((image.name + "\0").encode("utf-8"))
                 fid.write(struct.pack("L", len(image.points2D)))
-                data = np.rec.fromarrays((image.points2D[:, 0], image.points2D[:, 1], image.point3D_ids))
+                data = np.rec.fromarrays(
+                    (image.points2D[:, 0], image.points2D[:, 1], image.point3D_ids)
+                )
                 fid.write(data.tobytes())
 
     def _save_images_txt(self, output_file):
@@ -386,7 +387,11 @@ def _save_images_txt(self, output_file):
                 print >> fid, image.camera_id, image.name
 
                 data = np.rec.fromarrays(
-                    (image.points2D[:, 0], image.points2D[:, 1], image.point3D_ids.astype(np.int64))
+                    (
+                        image.points2D[:, 0],
+                        image.points2D[:, 1],
+                        image.point3D_ids.astype(np.int64),
+                    )
                 )
                 if len(data) > 0:
                     np.savetxt(fid, data, "%.2f %.2f %d", newline=" ")
@@ -411,7 +416,9 @@ def save_points3D(self, output_folder, output_file=None, binary=True):
 
     def _save_points3D_bin(self, output_file):
         num_valid_points3D = sum(
-            1 for point3D_idx in self.point3D_id_to_point3D_idx.values() if point3D_idx != SceneManager.INVALID_POINT3D
+            1
+            for point3D_idx in self.point3D_id_to_point3D_idx.values()
+            if point3D_idx != SceneManager.INVALID_POINT3D
         )
 
         iter_point3D_id_to_point3D_idx = self.point3D_id_to_point3D_idx.items()
@@ -432,7 +439,9 @@ def _save_points3D_bin(self, output_file):
 
     def _save_points3D_txt(self, output_file):
         num_valid_points3D = sum(
-            1 for point3D_idx in self.point3D_id_to_point3D_idx.values() if point3D_idx != SceneManager.INVALID_POINT3D
+            1
+            for point3D_idx in self.point3D_id_to_point3D_idx.values()
+            if point3D_idx != SceneManager.INVALID_POINT3D
         )
 
         array_to_string = lambda arr: " ".join(str(x) for x in arr)
@@ -454,7 +463,9 @@ def _save_points3D_txt(self, output_file):
                 print >> fid, array_to_string(self.points3D[point3D_idx]),
                 print >> fid, array_to_string(self.point3D_colors[point3D_idx]),
                 print >> fid, self.point3D_errors[point3D_idx],
-                print >> fid, array_to_string(self.point3D_id_to_images[point3D_id].flat)
+                print >> fid, array_to_string(
+                    self.point3D_id_to_images[point3D_id].flat
+                )
 
     # ---------------------------------------------------------------------------
 
@@ -475,7 +486,12 @@ def get_points3D(self, image_id, return_points2D=True, return_colors=False):
 
         mask = image.point3D_ids != SceneManager.INVALID_POINT3D
 
-        point3D_idxs = np.array([self.point3D_id_to_point3D_idx[point3D_id] for point3D_id in image.point3D_ids[mask]])
+        point3D_idxs = np.array(
+            [
+                self.point3D_id_to_point3D_idx[point3D_id]
+                for point3D_id in image.point3D_ids[mask]
+            ]
+        )
         # detect filtered points
         filter_mask = point3D_idxs != SceneManager.INVALID_POINT3D
         point3D_idxs = point3D_idxs[filter_mask]
@@ -492,12 +508,18 @@ def get_points3D(self, image_id, return_points2D=True, return_colors=False):
     # ---------------------------------------------------------------------------
 
     def point3D_valid(self, point3D_id):
-        return self.point3D_id_to_point3D_idx[point3D_id] != SceneManager.INVALID_POINT3D
+        return (
+            self.point3D_id_to_point3D_idx[point3D_id] != SceneManager.INVALID_POINT3D
+        )
 
     # ---------------------------------------------------------------------------
 
     def get_filtered_points3D(self, return_colors=False):
-        point3D_idxs = [idx for idx in self.point3D_id_to_point3D_idx.values() if idx != SceneManager.INVALID_POINT3D]
+        point3D_idxs = [
+            idx
+            for idx in self.point3D_id_to_point3D_idx.values()
+            if idx != SceneManager.INVALID_POINT3D
+        ]
         result = [self.points3D[point3D_idxs, :]]
 
         if return_colors:
@@ -509,10 +531,14 @@ def get_filtered_points3D(self, return_colors=False):
 
     # return 3D points shared by two images
     def get_shared_points3D(self, image_id1, image_id2):
-        point3D_ids = set(self.images[image_id1].point3D_ids) & set(self.images[image_id2].point3D_ids)
+        point3D_ids = set(self.images[image_id1].point3D_ids) & set(
+            self.images[image_id2].point3D_ids
+        )
         point3D_ids.discard(SceneManager.INVALID_POINT3D)
 
-        point3D_idxs = np.array([self.point3D_id_to_point3D_idx[point3D_id] for point3D_id in point3D_ids])
+        point3D_idxs = np.array(
+            [self.point3D_id_to_point3D_idx[point3D_id] for point3D_id in point3D_ids]
+        )
 
         return self.points3D[point3D_idxs, :]
 
@@ -581,17 +607,33 @@ def delete_images(self, image_list):
             if point3D_idx == SceneManager.INVALID_POINT3D:
                 continue
 
-            mask = np.array([image_id in keep_set for image_id in self.point3D_id_to_images[point3D_id][:, 0]])
+            mask = np.array(
+                [
+                    image_id in keep_set
+                    for image_id in self.point3D_id_to_images[point3D_id][:, 0]
+                ]
+            )
             if np.any(mask):
-                self.point3D_id_to_images[point3D_id] = self.point3D_id_to_images[point3D_id][mask]
+                self.point3D_id_to_images[point3D_id] = self.point3D_id_to_images[
+                    point3D_id
+                ][mask]
             else:
-                self.point3D_id_to_point3D_idx[point3D_id] = SceneManager.INVALID_POINT3D
+                self.point3D_id_to_point3D_idx[point3D_id] = (
+                    SceneManager.INVALID_POINT3D
+                )
 
     # ---------------------------------------------------------------------------
 
     # camera_list: set of cameras whose points we'd like to keep
     # min/max triangulation angle: in degrees
-    def filter_points3D(self, min_track_len=0, max_error=np.inf, min_tri_angle=0, max_tri_angle=180, image_set=set()):
+    def filter_points3D(
+        self,
+        min_track_len=0,
+        max_error=np.inf,
+        min_tri_angle=0,
+        max_tri_angle=180,
+        image_set=set(),
+    ):
 
         image_set = set(image_set)
 
@@ -619,12 +661,16 @@ def filter_points3D(self, min_track_len=0, max_error=np.inf, min_tri_angle=0, ma
                 or image_set
                 and image_set.isdisjoint(image_ids)
             ):
-                self.point3D_id_to_point3D_idx[point3D_id] = SceneManager.INVALID_POINT3D
+                self.point3D_id_to_point3D_idx[point3D_id] = (
+                    SceneManager.INVALID_POINT3D
+                )
 
             # find dot product between all camera viewing rays
             elif check_triangulation_angles:
                 xyz = self.points3D[point3D_idx, :]
-                tvecs = np.array([(self.images[image_id].tvec - xyz) for image_id in image_ids])
+                tvecs = np.array(
+                    [(self.images[image_id].tvec - xyz) for image_id in image_ids]
+                )
                 tvecs /= np.linalg.norm(tvecs, axis=-1)[:, np.newaxis]
 
                 cos_theta = np.array([u.dot(v) for u, v in combinations(tvecs, 2)])
@@ -633,13 +679,16 @@ def filter_points3D(self, min_track_len=0, max_error=np.inf, min_tri_angle=0, ma
                 # if maximum viewing angle is too small or too large,
                 # don't add this point
                 if np.min(cos_theta) > max_tri_prod or np.max(cos_theta) < min_tri_prod:
-                    self.point3D_id_to_point3D_idx[point3D_id] = SceneManager.INVALID_POINT3D
+                    self.point3D_id_to_point3D_idx[point3D_id] = (
+                        SceneManager.INVALID_POINT3D
+                    )
 
         # apply the filters to the image point3D_ids
         for image in self.images.itervalues():
             mask = np.array(
                 [
-                    self.point3D_id_to_point3D_idx.get(point3D_id, 0) == SceneManager.INVALID_POINT3D
+                    self.point3D_id_to_point3D_idx.get(point3D_id, 0)
+                    == SceneManager.INVALID_POINT3D
                     for point3D_id in image.point3D_ids
                 ]
             )