simplying code and adding GPU serialization fix for both datamanagers

AntonioMacaronio · AntonioMacaronio · commit e1938e4d7a12 · 2025-04-17T00:41:02.000-07:00
diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -46,7 +46,7 @@
 from nerfstudio.data.datasets.base_dataset import InputDataset
 from nerfstudio.data.utils.data_utils import identity_collate
 from nerfstudio.data.utils.dataloaders import ImageBatchStream, _undistort_image
-from nerfstudio.utils.misc import get_orig_class
+from nerfstudio.utils.misc import get_dict_to_torch, get_orig_class
 from nerfstudio.utils.rich_utils import CONSOLE
 
 
@@ -390,9 +390,7 @@ def next_train(self, step: int) -> Tuple[Cameras, Dict]:
         if self.config.cache_images == "disk":
             camera, data = next(self.iter_train_image_dataloader)[0]
             camera = camera.to(self.device)
-            for k in data.keys():
-                if isinstance(data[k], torch.Tensor):
-                    data[k] = data[k].to(self.device)
+            data = get_dict_to_torch(data, self.device)
             return camera, data
 
         image_idx = self.train_unseen_cameras.pop(0)
@@ -420,9 +418,7 @@ def next_eval(self, step: int) -> Tuple[Cameras, Dict]:
         if self.config.cache_images == "disk":
             camera, data = next(self.iter_eval_image_dataloader)[0]
             camera = camera.to(self.device)
-            for k in data.keys():
-                if isinstance(data[k], torch.Tensor):
-                    data[k] = data[k].to(self.device)
+            data = get_dict_to_torch(data, self.device)
             return camera, data
 
         return self.next_eval_image(step=step)
diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py
@@ -40,7 +40,7 @@
     RayBatchStream,
     variable_res_collate,
 )
-from nerfstudio.utils.misc import get_orig_class
+from nerfstudio.utils.misc import get_dict_to_torch, get_orig_class
 from nerfstudio.utils.rich_utils import CONSOLE
 
 
@@ -241,12 +241,16 @@ def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
         """Returns the next batch of data from the train dataloader."""
         self.train_count += 1
         ray_bundle, batch = next(self.iter_train_raybundles)[0]
+        ray_bundle = ray_bundle.to(self.device)
+        batch = get_dict_to_torch(batch, self.device)
         return ray_bundle, batch
 
     def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
         """Returns the next batch of data from the eval dataloader."""
         self.eval_count += 1
         ray_bundle, batch = next(self.iter_train_raybundles)[0]
+        ray_bundle = ray_bundle.to(self.device)
+        batch = get_dict_to_torch(batch, self.device)
         return ray_bundle, batch
 
     def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
diff --git a/nerfstudio/data/utils/dataloaders.py b/nerfstudio/data/utils/dataloaders.py
@@ -574,18 +574,18 @@ def __iter__(self):
             """
             Here, the variable 'batch' refers to the output of our pixel sampler.
                 - batch is a dict_keys(['image', 'indices'])
-                - batch['image'] returns a pytorch tensor with shape `torch.Size([4096, 3])` , where 4096 = num_rays_per_batch. 
+                - batch['image'] returns a `torch.Size([4096, 3])` tensor on CPU, where 4096 = num_rays_per_batch. 
                     - Note: each row in this tensor represents the RGB values as floats in [0, 1] of the pixel the ray goes through. 
                     - The info of what specific image index that pixel belongs to is stored within batch[’indices’]
-                - batch['indices'] returns a pytorch tensor `torch.Size([4096, 3])` tensor where each row represents (image_idx, pixelRow, pixelCol)
+                - batch['indices'] returns a `torch.Size([4096, 3])` tensor on CPU where each row represents (image_idx, pixelRow, pixelCol)
             pixel_sampler (for variable_res_collate) will loop though each image, samples pixel within the mask, and returns 
             them as the variable `indices` which has shape torch.Size([4096, 3]), where each row represents a pixel (image_idx, pixelRow, pixelCol)
             """
             batch = worker_pixel_sampler.sample(collated_batch)  # type: ignore
             # Note: collated_batch["image"].get_device() will return CPU if self.exclude_batch_keys_from_device contains 'image'
             ray_indices = batch["indices"]
-            # the ray_bundle is on the GPU; batch["image"] is on the CPU, here we move it to the GPU
-            ray_bundle = self.ray_generator(ray_indices).to(self.device)
+            # Both ray_bundle and batch["image"] are on the CPU and will be moved to the GPU in the main process (parallel_datamanager.py)
+            ray_bundle = self.ray_generator(ray_indices)
             if self.custom_ray_processor:
                 ray_bundle, batch = self.custom_ray_processor(ray_bundle, batch)