nerfstudio-project
diff --git a/‎docs/developer_guides/pipelines/datamanagers.md‎
Lines changed: 26 additions & 0 deletions b/‎docs/developer_guides/pipelines/datamanagers.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-dark.png‎
526 KB b/‎docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-dark.png‎
526 KB
diff --git a/‎docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-light.png‎
528 KB b/‎docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-light.png‎
528 KB
diff --git a/‎docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-dark.png‎
532 KB b/‎docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-dark.png‎
532 KB
diff --git a/‎docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-light.png‎
532 KB b/‎docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-light.png‎
532 KB
diff --git a/‎nerfstudio/configs/method_configs.py‎
Lines changed: 1 addition & 1 deletion b/‎nerfstudio/configs/method_configs.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎nerfstudio/data/datamanagers/full_images_datamanager.py‎
Lines changed: 9 additions & 4 deletions b/‎nerfstudio/data/datamanagers/full_images_datamanager.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎nerfstudio/data/datamanagers/parallel_datamanager.py‎
Lines changed: 6 additions & 2 deletions b/‎nerfstudio/data/datamanagers/parallel_datamanager.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎nerfstudio/data/pixel_samplers.py‎
Lines changed: 12 additions & 18 deletions b/‎nerfstudio/data/pixel_samplers.py‎
Lines changed: 12 additions & 18 deletions
diff --git a/‎nerfstudio/data/utils/dataloaders.py‎
Lines changed: 4 additions & 8 deletions b/‎nerfstudio/data/utils/dataloaders.py‎
Lines changed: 4 additions & 8 deletions
@@ -115,6 +115,32 @@ To train splatfacto with a large dataset that's unable to fit in memory, please
 ns-train splatfacto --data {PROCESSED_DATA_DIR} --pipeline.datamanager.cache-images disk
 ```
 
+Checkout these flowcharts for more customization on large datasets!
+
+```{image} imgs/DatamanagerGuide-LargeNeRF-light.png
+:align: center
+:class: only-light
+:width: 600
+```
+
+```{image} imgs/DatamanagerGuide-LargeNeRF-dark.png
+:align: center
+:class: only-dark
+:width: 600
+```
+
+```{image} imgs/DatamanagerGuide-Large3DGS-light.png
+:align: center
+:class: only-light
+:width: 600
+```
+
+```{image} imgs/DatamanagerGuide-Large3DGS-dark.png
+:align: center
+:class: only-dark
+:width: 600
+```
+
 ## Migrating Your DataManager to the new DataManager 
 Many methods subclass a DataManager and add extra data to it. If you would like your custom datamanager to also support new parallel features, you can migrate any custom dataloading logic to the new `custom_ray_processor()` API. This function takes in a full training batch (either image or ray bundle) and allows the user to modify or add to it. Let's take a look at an example for the LERF method, which was built on Nerfstudio's VanillaDataManager. This API provides an interface to attach new information to the RayBundle (for ray based methods), Cameras object (for splatting based methods), or ground truth dictionary. It runs in a background process if disk caching is enabled, otherwise it runs in the main process.
 
 
@@ -219,7 +219,7 @@
     max_num_iterations=30000,
     mixed_precision=True,
     pipeline=VanillaPipelineConfig(
-        datamanager=VanillaDataManagerConfig(
+        datamanager=ParallelDataManagerConfig(
             _target=ParallelDataManager[DepthDataset],
             dataparser=NerfstudioDataParserConfig(),
             train_num_rays_per_batch=4096,
 
@@ -26,6 +26,7 @@
 from copy import deepcopy
 from dataclasses import dataclass, field
 from functools import cached_property
+from itertools import islice
 from pathlib import Path
 from typing import Dict, ForwardRef, Generic, List, Literal, Optional, Tuple, Type, Union, cast, get_args, get_origin
 
@@ -45,7 +46,7 @@
 from nerfstudio.data.datasets.base_dataset import InputDataset
 from nerfstudio.data.utils.data_utils import identity_collate
 from nerfstudio.data.utils.dataloaders import ImageBatchStream, _undistort_image
-from nerfstudio.utils.misc import get_orig_class
+from nerfstudio.utils.misc import get_dict_to_torch, get_orig_class
 from nerfstudio.utils.rich_utils import CONSOLE
 
 
@@ -84,7 +85,7 @@ class FullImageDatamanagerConfig(DataManagerConfig):
     dataloader_num_workers: int = 4
     """The number of workers performing the dataloading from either disk/RAM, which 
     includes collating, pixel sampling, unprojecting, ray generation etc."""
-    prefetch_factor: int = 4
+    prefetch_factor: Optional[int] = 4
     """The limit number of batches a worker will start loading once an iterator is created. 
     More details are described here: https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader"""
     cache_compressed_images: bool = False
@@ -356,9 +357,9 @@ def fixed_indices_eval_dataloader(self) -> List[Tuple[Cameras, Dict]]:
                 self.eval_imagebatch_stream,
                 batch_size=1,
                 num_workers=0,
-                collate_fn=identity_collate,
+                collate_fn=lambda x: x[0],
             )
-            return [batch[0] for batch in dataloader]
+            return list(islice(dataloader, len(self.eval_dataset)))
 
         image_indices = [i for i in range(len(self.eval_dataset))]
         data = [d.copy() for d in self.cached_eval]
@@ -388,6 +389,8 @@ def next_train(self, step: int) -> Tuple[Cameras, Dict]:
         self.train_count += 1
         if self.config.cache_images == "disk":
             camera, data = next(self.iter_train_image_dataloader)[0]
+            camera = camera.to(self.device)
+            data = get_dict_to_torch(data, self.device)
             return camera, data
 
         image_idx = self.train_unseen_cameras.pop(0)
@@ -414,6 +417,8 @@ def next_eval(self, step: int) -> Tuple[Cameras, Dict]:
         self.eval_count += 1
         if self.config.cache_images == "disk":
             camera, data = next(self.iter_eval_image_dataloader)[0]
+            camera = camera.to(self.device)
+            data = get_dict_to_torch(data, self.device)
             return camera, data
 
         return self.next_eval_image(step=step)
 
@@ -40,7 +40,7 @@
     RayBatchStream,
     variable_res_collate,
 )
-from nerfstudio.utils.misc import get_orig_class
+from nerfstudio.utils.misc import get_dict_to_torch, get_orig_class
 from nerfstudio.utils.rich_utils import CONSOLE
 
 
@@ -56,7 +56,7 @@ class ParallelDataManagerConfig(VanillaDataManagerConfig):
     dataloader_num_workers: int = 4
     """The number of workers performing the dataloading from either disk/RAM, which 
     includes collating, pixel sampling, unprojecting, ray generation etc."""
-    prefetch_factor: int = 10
+    prefetch_factor: Optional[int] = 10
     """The limit number of batches a worker will start loading once an iterator is created. 
     More details are described here: https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader"""
     cache_compressed_images: bool = False
@@ -241,12 +241,16 @@ def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
         """Returns the next batch of data from the train dataloader."""
         self.train_count += 1
         ray_bundle, batch = next(self.iter_train_raybundles)[0]
+        ray_bundle = ray_bundle.to(self.device)
+        batch = get_dict_to_torch(batch, self.device)
         return ray_bundle, batch
 
     def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
         """Returns the next batch of data from the eval dataloader."""
         self.eval_count += 1
         ray_bundle, batch = next(self.iter_train_raybundles)[0]
+        ray_bundle = ray_bundle.to(self.device)
+        batch = get_dict_to_torch(batch, self.device)
         return ray_bundle, batch
 
     def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
 
@@ -18,6 +18,7 @@
 
 import random
 import warnings
+from collections import defaultdict
 from dataclasses import dataclass, field
 from typing import Dict, Optional, Type, Union
 
@@ -335,8 +336,7 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int,
 
         # only sample within the mask, if the mask is in the batch
         all_indices = []
-        all_images = []
-        all_depth_images = []
+        all_images = defaultdict(list)
 
         assert num_rays_per_batch % 2 == 0, "num_rays_per_batch must be divisible by 2"
         num_rays_per_image = divide_rays_per_image(num_rays_per_batch, num_images)
@@ -350,10 +350,11 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int,
                 )
                 indices[:, 0] = i
                 all_indices.append(indices)
-                all_images.append(batch["image"][i][indices[:, 1], indices[:, 2]])
-                if "depth_image" in batch:
-                    all_depth_images.append(batch["depth_image"][i][indices[:, 1], indices[:, 2]])
 
+                for key, value in batch.items():
+                    if key in ["image_idx", "mask"]:
+                        continue
+                    all_images[key].append(value[i][indices[:, 1], indices[:, 2]])
         else:
             for i, num_rays in enumerate(num_rays_per_image):
                 image_height, image_width, _ = batch["image"][i].shape
@@ -363,26 +364,19 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int,
                     indices = self.sample_method(num_rays, 1, image_height, image_width, device=device)
                 indices[:, 0] = i
                 all_indices.append(indices)
-                all_images.append(batch["image"][i][indices[:, 1], indices[:, 2]])
-                if "depth_image" in batch:
-                    all_depth_images.append(batch["depth_image"][i][indices[:, 1], indices[:, 2]])
+                for key, value in batch.items():
+                    if key in ["image_idx", "mask"]:
+                        continue
+                    all_images[key].append(value[i][indices[:, 1], indices[:, 2]])
 
         indices = torch.cat(all_indices, dim=0)
 
-        c, y, x = (i.flatten() for i in torch.split(indices, 1, dim=-1))
-        collated_batch = {
-            key: value[c, y, x]
-            for key, value in batch.items()
-            if key not in ("image_idx", "image", "mask", "depth_image") and value is not None
-        }
-
-        collated_batch["image"] = torch.cat(all_images, dim=0)
-        if "depth_image" in batch:
-            collated_batch["depth_image"] = torch.cat(all_depth_images, dim=0)
+        collated_batch = {key: torch.cat(all_images[key], dim=0) for key in all_images}
 
         assert collated_batch["image"].shape[0] == num_rays_per_batch
 
         # Needed to correct the random indices to their actual camera idx locations.
+        c = indices[..., 0].flatten()
         indices[:, 0] = batch["image_idx"][c]
         collated_batch["indices"] = indices  # with the abs camera indices
 
 
@@ -574,18 +574,18 @@ def __iter__(self):
             """
             Here, the variable 'batch' refers to the output of our pixel sampler.
                 - batch is a dict_keys(['image', 'indices'])
-                - batch['image'] returns a pytorch tensor with shape `torch.Size([4096, 3])` , where 4096 = num_rays_per_batch. 
+                - batch['image'] returns a `torch.Size([4096, 3])` tensor on CPU, where 4096 = num_rays_per_batch. 
                     - Note: each row in this tensor represents the RGB values as floats in [0, 1] of the pixel the ray goes through. 
                     - The info of what specific image index that pixel belongs to is stored within batch[’indices’]
-                - batch['indices'] returns a pytorch tensor `torch.Size([4096, 3])` tensor where each row represents (image_idx, pixelRow, pixelCol)
+                - batch['indices'] returns a `torch.Size([4096, 3])` tensor on CPU where each row represents (image_idx, pixelRow, pixelCol)
             pixel_sampler (for variable_res_collate) will loop though each image, samples pixel within the mask, and returns 
             them as the variable `indices` which has shape torch.Size([4096, 3]), where each row represents a pixel (image_idx, pixelRow, pixelCol)
             """
             batch = worker_pixel_sampler.sample(collated_batch)  # type: ignore
             # Note: collated_batch["image"].get_device() will return CPU if self.exclude_batch_keys_from_device contains 'image'
             ray_indices = batch["indices"]
-            # the ray_bundle is on the GPU; batch["image"] is on the CPU, here we move it to the GPU
-            ray_bundle = self.ray_generator(ray_indices).to(self.device)
+            # Both ray_bundle and batch["image"] are on the CPU and will be moved to the GPU in the main process (parallel_datamanager.py)
+            ray_bundle = self.ray_generator(ray_indices)
             if self.custom_ray_processor:
                 ray_bundle, batch = self.custom_ray_processor(ray_bundle, batch)
 
@@ -645,10 +645,6 @@ def __iter__(self):
                 camera, data = self.custom_image_processor(camera, data)
 
             i += 1
-            camera = camera.to(self.device)
-            for k in data.keys():
-                if isinstance(data[k], torch.Tensor):
-                    data[k] = data[k].to(self.device)
             yield camera, data