JasonLSC
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/publish.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎MANIFEST.in‎
Lines changed: 1 addition & 0 deletions b/‎MANIFEST.in‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/datasets/colmap.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/datasets/colmap.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/simple_trainer.py‎
Lines changed: 42 additions & 1 deletion b/‎examples/simple_trainer.py‎
Lines changed: 42 additions & 1 deletion
diff --git a/‎examples/simple_trainer_2dgs.py‎
Lines changed: 5 additions & 1 deletion b/‎examples/simple_trainer_2dgs.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎examples/simple_viewer.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/simple_viewer.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎gsplat/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎gsplat/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎gsplat/cuda/_backend.py‎
Lines changed: 1 addition & 1 deletion b/‎gsplat/cuda/_backend.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎gsplat/cuda/_wrapper.py‎
Lines changed: 18 additions & 0 deletions b/‎gsplat/cuda/_wrapper.py‎
Lines changed: 18 additions & 0 deletions
@@ -106,5 +106,6 @@ jobs:
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
       run: |         
+        BUILD_NO_CUDA=1 python -m build
         twine upload --username __token__ --password $PYPI_TOKEN dist/*
-      shell: bash
+      shell: bash
@@ -1 +1,2 @@
 recursive-include gsplat/cuda/csrc *
+recursive-include gsplat/cuda/include *
@@ -40,11 +40,12 @@ To build gsplat from source on Windows, please check [this instruction](docs/INS
 This repo comes with a standalone script that reproduces the official Gaussian Splatting with exactly the same performance on PSNR, SSIM, LPIPS, and converged number of Gaussians. Powered by gsplat’s efficient CUDA implementation, the training takes up to **4x less GPU memory** with up to **15% less time** to finish than the official implementation. Full report can be found [here](https://docs.gsplat.studio/main/tests/eval.html).
 
 ```bash
-pip install -r examples/requirements.txt
+cd examples
+pip install -r requirements.txt
 # download mipnerf_360 benchmark data
-python examples/datasets/download_dataset.py
+python datasets/download_dataset.py
 # run batch evaluation
-bash examples/benchmarks/basic.sh
+bash benchmarks/basic.sh
 ```
 
 ## Examples
 
@@ -266,8 +266,8 @@ def __init__(
                     + params[2] * theta**6
                     + params[3] * theta**8
                 )
-                mapx = fx * x1 * r + width // 2
-                mapy = fy * y1 * r + height // 2
+                mapx = (fx * x1 * r + width // 2).astype(np.float32)
+                mapy = (fy * y1 * r + height // 2).astype(np.float32)
 
                 # Use mask to define ROI
                 mask = np.logical_and(
 
@@ -44,6 +44,8 @@
 from gsplat.distributed import cli
 from gsplat.rendering import rasterization
 from gsplat.strategy import DefaultStrategy, MCMCStrategy
+from gsplat.optimizers import SelectiveAdam
+
 from gsplat.compression_simulation import CompressionSimulation
 from gsplat.compression_simulation.entropy_model import Entropy_factorized_optimized_refactor, Entropy_gaussian
 
@@ -196,6 +198,8 @@ class Config:
     packed: bool = False
     # Use sparse gradients for optimization. (experimental)
     sparse_grad: bool = False
+    # Use visible adam from Taming 3DGS. (experimental)
+    visible_adam: bool = False
     # Anti-aliasing in rasterization. Might slightly hurt quantitative metrics.
     antialiased: bool = False
 
@@ -272,6 +276,7 @@ def create_splats_with_optimizers(
     scene_scale: float = 1.0,
     sh_degree: int = 3,
     sparse_grad: bool = False,
+    visible_adam: bool = False,
     batch_size: int = 1,
     feature_dim: Optional[int] = None,
     device: str = "cuda",
@@ -328,8 +333,15 @@ def create_splats_with_optimizers(
     # Note that this would not make the training exactly equivalent, see
     # https://arxiv.org/pdf/2402.18824v1
     BS = batch_size * world_size
+    optimizer_class = None
+    if sparse_grad:
+        optimizer_class = torch.optim.SparseAdam
+    elif visible_adam:
+        optimizer_class = SelectiveAdam
+    else:
+        optimizer_class = torch.optim.Adam
     optimizers = {
-        name: (torch.optim.SparseAdam if sparse_grad else torch.optim.Adam)(
+        name: optimizer_class(
             [{"params": splats[name], "lr": lr * math.sqrt(BS), "name": name}],
             eps=1e-15 / math.sqrt(BS),
             # TODO: check betas logic when BS is larger than 10 betas[0] will be zero.
@@ -397,6 +409,7 @@ def __init__(
             scene_scale=self.scene_scale,
             sh_degree=cfg.sh_degree,
             sparse_grad=cfg.sparse_grad,
+            visible_adam=cfg.visible_adam,
             batch_size=cfg.batch_size,
             feature_dim=feature_dim,
             device=self.device,
@@ -926,6 +939,34 @@ def train(self):
                             is_coalesced=len(Ks) == 1,
                         )
 
+            if cfg.visible_adam:
+                gaussian_cnt = self.splats.means.shape[0]
+                if cfg.packed:
+                    visibility_mask = torch.zeros_like(
+                        self.splats["opacities"], dtype=bool
+                    )
+                    visibility_mask.scatter_(0, info["gaussian_ids"], 1)
+                else:
+                    visibility_mask = (info["radii"] > 0).any(0)
+
+            # optimize
+            for optimizer in self.optimizers.values():
+                if cfg.visible_adam:
+                    optimizer.step(visibility_mask)
+                else:
+                    optimizer.step()
+                optimizer.zero_grad(set_to_none=True)
+            for optimizer in self.pose_optimizers:
+                optimizer.step()
+                optimizer.zero_grad(set_to_none=True)
+            for optimizer in self.app_optimizers:
+                optimizer.step()
+                optimizer.zero_grad(set_to_none=True)
+            for optimizer in self.bil_grid_optimizers:
+                optimizer.step()
+                optimizer.zero_grad(set_to_none=True)
+            for scheduler in schedulers:
+                scheduler.step()
                 # optimize
                 for optimizer in self.optimizers.values():
                     optimizer.step()
 
@@ -436,7 +436,7 @@ def rasterize_splats(
                 **kwargs,
             )
         elif self.model_type == "2dgs-inria":
-            render_colors, render_alphas, info = rasterization_2dgs_inria_wrapper(
+            renders, info = rasterization_2dgs_inria_wrapper(
                 means=means,
                 quats=quats,
                 scales=scales,
@@ -577,6 +577,10 @@ def train(self):
                 step=step,
                 info=info,
             )
+            masks = data["mask"].to(device) if "mask" in data else None
+            if masks is not None:
+                pixels = pixels * masks[..., None]
+                colors = colors * masks[..., None]
 
             # loss
             l1loss = F.l1_loss(colors, pixels)
 
@@ -68,7 +68,7 @@ def main(local_rank: int, world_rank, world_size: int, args):
                 quats,  # [N, 4]
                 scales,  # [N, 3]
                 opacities,  # [N]
-                colors,  # [N, 3]
+                colors,  # [N, S, 3]
                 viewmats,  # [C, 4, 4]
                 Ks,  # [C, 3, 3]
                 width,
@@ -181,7 +181,7 @@ def viewer_render_fn(camera_state: nerfview.CameraState, img_wh: Tuple[int, int]
             quats,  # [N, 4]
             scales,  # [N, 3]
             opacities,  # [N]
-            colors,  # [N, 3]
+            colors,  # [N, S, 3]
             viewmat[None],  # [1, 4, 4]
             K[None],  # [1, 3, 3]
             width,
 
@@ -1,6 +1,7 @@
 import warnings
 
 from .compression import PngCompression
+from .optimizers import SelectiveAdam
 from .cuda._torch_impl import accumulate
 from .cuda._torch_impl_2dgs import accumulate_2dgs
 from .cuda._wrapper import (
 
@@ -89,7 +89,7 @@ def cuda_toolkit_version():
         current_dir = os.path.dirname(os.path.abspath(__file__))
         glm_path = os.path.join(current_dir, "csrc", "third_party", "glm")
 
-        extra_include_paths = [os.path.join(PATH, "csrc/"), glm_path]
+        extra_include_paths = [os.path.join(PATH, "include/"), glm_path]
         extra_cflags = ["-O3"]
         if NO_FAST_MATH:
             extra_cuda_cflags = ["-O3"]
 
@@ -16,6 +16,24 @@ def call_cuda(*args, **kwargs):
     return call_cuda
 
 
+def selective_adam_update(
+    param: Tensor,
+    param_grad: Tensor,
+    exp_avg: Tensor,
+    exp_avg_sq: Tensor,
+    tiles_touched: Tensor,
+    lr: float,
+    b1: float,
+    b2: float,
+    eps: float,
+    N: int,
+    M: int,
+) -> None:
+    _make_lazy_cuda_func("selective_adam_update")(
+        param, param_grad, exp_avg, exp_avg_sq, tiles_touched, lr, b1, b2, eps, N, M
+    )
+
+
 def _make_lazy_cuda_obj(name: str) -> Any:
     # pylint: disable=import-outside-toplevel
     from ._backend import _C
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`recursive-include gsplat/cuda/csrc *`
	`2`	`+recursive-include gsplat/cuda/include *`