Skip to content

Commit 11f8d8d

Browse files
feat(gsplat): add from-world rasterization path and UT integration
Introduce differentiable dense from-world Gaussian rasterization with UT-aligned camera/projection APIs, gradient-capable render paths, and focused C++/Python coverage for correctness and regression safety. Signed-off-by: Francis Williams <francis@fwilliams.info> Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent b67b227 commit 11f8d8d

35 files changed

+3869
-512
lines changed

fvdb/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def gaussian_render_jagged(
126126

127127
from .convolution_plan import ConvolutionPlan
128128
from .gaussian_splatting import GaussianSplat3d, ProjectedGaussianSplats
129-
from .enums import ProjectionType, ShOrderingMode
129+
from .enums import CameraModel, ProjectionType, RollingShutterType, ShOrderingMode
130130

131131
# Import torch-compatible functions that work with both Tensor and JaggedTensor
132132
from .torch_jagged import (
@@ -190,6 +190,8 @@ def gaussian_render_jagged(
190190
"JaggedTensor",
191191
"GaussianSplat3d",
192192
"ProjectedGaussianSplats",
193+
"CameraModel",
194+
"RollingShutterType",
193195
"ProjectionType",
194196
"ShOrderingMode",
195197
"ConvolutionPlan",

fvdb/__init__.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def _parse_device_string(device_string: str | torch.device) -> torch.device: ...
1919
from . import nn, utils, viz
2020
from ._fvdb_cpp import config, hilbert, morton, volume_render
2121
from .convolution_plan import ConvolutionPlan
22-
from .enums import ProjectionType, ShOrderingMode
22+
from .enums import CameraModel, ProjectionType, RollingShutterType, ShOrderingMode
2323
from .gaussian_splatting import GaussianSplat3d, ProjectedGaussianSplats
2424
from .grid import Grid
2525
from .grid_batch import GridBatch, gcat
@@ -109,6 +109,8 @@ __all__ = [
109109
"GaussianSplat3d",
110110
"ProjectedGaussianSplats",
111111
"ConvolutionPlan",
112+
"CameraModel",
113+
"RollingShutterType",
112114
"ProjectionType",
113115
"ShOrderingMode",
114116
"Grid",

fvdb/_fvdb_cpp.pyi

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,24 @@ class GaussianSplat3d:
196196
antialias: bool = ...,
197197
backgrounds: Optional[torch.Tensor] = ...,
198198
) -> tuple[torch.Tensor, torch.Tensor]: ...
199+
def render_images_from_world(
200+
self,
201+
world_to_camera_matrices: torch.Tensor,
202+
projection_matrices: torch.Tensor,
203+
image_width: int,
204+
image_height: int,
205+
near: float,
206+
far: float,
207+
camera_model: "CameraModel" = ...,
208+
distortion_coeffs: Optional[torch.Tensor] = ...,
209+
sh_degree_to_use: int = ...,
210+
tile_size: int = ...,
211+
min_radius_2d: float = ...,
212+
eps_2d: float = ...,
213+
antialias: bool = ...,
214+
backgrounds: Optional[torch.Tensor] = ...,
215+
masks: Optional[torch.Tensor] = ...,
216+
) -> tuple[torch.Tensor, torch.Tensor]: ...
199217
def sparse_render_images(
200218
self,
201219
pixels_to_render: JaggedTensor,
@@ -1178,3 +1196,16 @@ def volume_render(
11781196
packInfo: torch.Tensor,
11791197
transmittanceThresh: float,
11801198
) -> list[torch.Tensor]: ...
1199+
1200+
class RollingShutterType(Enum):
1201+
NONE = ...
1202+
VERTICAL = ...
1203+
HORIZONTAL = ...
1204+
1205+
class CameraModel(Enum):
1206+
PINHOLE = ...
1207+
OPENCV_RADTAN_5 = ...
1208+
OPENCV_RATIONAL_8 = ...
1209+
OPENCV_RADTAN_THIN_PRISM_9 = ...
1210+
OPENCV_THIN_PRISM_12 = ...
1211+
ORTHOGRAPHIC = ...

fvdb/enums.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# SPDX-License-Identifier: Apache-2.0
33
#
44

5-
from enum import Enum
5+
from enum import Enum, IntEnum
66

77

88
class ProjectionType(str, Enum):
@@ -48,3 +48,73 @@ class ShOrderingMode(str, Enum):
4848
The feature channels of spherical harmonics are stored in separate blocks for each coefficient. *i.e.* The spherical harmonics
4949
tensor corresponds to a (row-major) contiguous tensor of shape ``[num_coefficients, channels, num_sh_bases]``, where channels=3 for RGB.
5050
"""
51+
52+
53+
class RollingShutterType(IntEnum):
54+
"""
55+
Rolling shutter policy for camera projection / ray generation.
56+
57+
Rolling shutter models treat different image rows/columns as having different exposure times.
58+
FVDB uses this to interpolate between per-camera start/end poses when generating rays.
59+
"""
60+
61+
NONE = 0
62+
"""
63+
No rolling shutter: the start pose is used for all pixels.
64+
"""
65+
66+
VERTICAL = 1
67+
"""
68+
Vertical rolling shutter: exposure time varies with image row (y).
69+
"""
70+
71+
HORIZONTAL = 2
72+
"""
73+
Horizontal rolling shutter: exposure time varies with image column (x).
74+
"""
75+
76+
77+
class CameraModel(IntEnum):
78+
"""
79+
Camera model for projection / ray generation.
80+
81+
Notes:
82+
- ``PINHOLE`` and ``ORTHOGRAPHIC`` ignore distortion coefficients.
83+
- ``OPENCV_*`` variants use pinhole intrinsics plus OpenCV-style distortion. When distortion
84+
coefficients are provided, FVDB expects a packed layout:
85+
86+
``[k1,k2,k3,k4,k5,k6,p1,p2,s1,s2,s3,s4]``
87+
88+
Unused coefficients for a given model should be set to 0.
89+
"""
90+
91+
PINHOLE = 0
92+
"""
93+
Ideal pinhole camera model (no distortion).
94+
"""
95+
96+
OPENCV_RADTAN_5 = 1
97+
"""
98+
OpenCV radial-tangential distortion with 5 parameters (k1,k2,p1,p2,k3).
99+
"""
100+
101+
OPENCV_RATIONAL_8 = 2
102+
"""
103+
OpenCV rational radial-tangential distortion with 8 parameters (k1..k6,p1,p2).
104+
"""
105+
106+
OPENCV_RADTAN_THIN_PRISM_9 = 3
107+
"""
108+
OpenCV radial-tangential + thin-prism distortion with 9 parameters (k1,k2,p1,p2,k3,s1..s4).
109+
"""
110+
111+
OPENCV_THIN_PRISM_12 = 4
112+
"""
113+
OpenCV rational radial-tangential + thin-prism distortion with 12 parameters
114+
(k1..k6,p1,p2,s1..s4).
115+
"""
116+
117+
ORTHOGRAPHIC = 5
118+
"""
119+
Orthographic camera model (no distortion).
120+
"""

fvdb/gaussian_splatting.py

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
from typing import Any, Mapping, Sequence, TypeVar, overload
66

77
import torch
8-
from fvdb.enums import ProjectionType
8+
from fvdb.enums import CameraModel, ProjectionType
99

10+
from . import _fvdb_cpp as _C
1011
from ._fvdb_cpp import GaussianSplat3d as GaussianSplat3dCpp
1112
from ._fvdb_cpp import JaggedTensor as JaggedTensorCpp
1213
from ._fvdb_cpp import ProjectedGaussianSplats as ProjectedGaussianSplatsCpp
@@ -1877,6 +1878,117 @@ def render_images(
18771878
backgrounds=backgrounds,
18781879
)
18791880

1881+
def render_images_from_world(
1882+
self,
1883+
world_to_camera_matrices: torch.Tensor,
1884+
projection_matrices: torch.Tensor,
1885+
image_width: int,
1886+
image_height: int,
1887+
near: float,
1888+
far: float,
1889+
camera_model: CameraModel = CameraModel.PINHOLE,
1890+
distortion_coeffs: torch.Tensor | None = None,
1891+
sh_degree_to_use: int = -1,
1892+
tile_size: int = 16,
1893+
min_radius_2d: float = 0.0,
1894+
eps_2d: float = 0.3,
1895+
antialias: bool = False,
1896+
backgrounds: torch.Tensor | None = None,
1897+
masks: torch.Tensor | None = None,
1898+
) -> tuple[torch.Tensor, torch.Tensor]:
1899+
"""
1900+
Render dense images by rasterizing directly from world-space 3D Gaussians.
1901+
1902+
This is similar to :meth:`render_images`, but the rasterization step is performed in 3D
1903+
using per-pixel rays against the Gaussian ellipsoids (instead of rasterizing 2D conics
1904+
produced by a projection step). This enables gradients w.r.t. Gaussian geometry
1905+
(``means``, ``quats``, ``log_scales``) through rasterization, which is useful for
1906+
Unscented Transform (UT)-based OpenCV camera models.
1907+
1908+
Notes:
1909+
- This is **dense-only**: outputs are dense tensors of shape ``(C, H, W, ...)``.
1910+
- Tile intersection data is still computed from a (non-differentiable) projection
1911+
step, so gradients can be discontinuous when small parameter changes cause a Gaussian
1912+
to enter/leave a tile (or switch which tiles it overlaps).
1913+
- Background compositing follows standard "over" alpha compositing. If
1914+
``backgrounds`` is provided, the output color is:
1915+
1916+
``color = sum_i (feat_i * alpha_i * T_i) + T_final * background``
1917+
1918+
where ``T_final`` is the remaining transmittance at the end of rasterization, and
1919+
``alpha = 1 - T_final``.
1920+
- ``masks`` is a **per-tile** boolean mask (parity with the classic rasterizer).
1921+
Tiles where ``masks[c, th, tw] == False`` are skipped entirely: the output is
1922+
background with ``alpha=0`` and the tile contributes **zero gradients**.
1923+
1924+
Example:
1925+
1926+
.. code-block:: python
1927+
1928+
images, alphas = gaussian_splat_3d.render_images_from_world(
1929+
world_to_camera_matrices, # [C,4,4]
1930+
projection_matrices, # [C,3,3]
1931+
image_width=640,
1932+
image_height=480,
1933+
near=0.01,
1934+
far=1e10,
1935+
camera_model=fvdb.CameraModel.OPENCV_RATIONAL_8,
1936+
distortion_coeffs=dist_coeffs, # [C,12]
1937+
backgrounds=bg, # [C,D]
1938+
masks=tile_mask, # [C,tileH,tileW] (optional)
1939+
)
1940+
1941+
Args:
1942+
world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)``.
1943+
projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)``.
1944+
image_width (int): Output image width ``W``.
1945+
image_height (int): Output image height ``H``.
1946+
near (float): Near clipping plane.
1947+
far (float): Far clipping plane.
1948+
camera_model (CameraModel): Camera model used for ray generation and distortion.
1949+
distortion_coeffs (torch.Tensor | None): Distortion coefficients for OpenCV camera
1950+
models. Use ``None`` for no distortion. Expected shape is ``(C, 12)`` with packed
1951+
layout ``[k1,k2,k3,k4,k5,k6,p1,p2,s1,s2,s3,s4]``. For camera models that use fewer
1952+
coefficients, unused entries should be set to 0.
1953+
sh_degree_to_use (int): SH degree to use. ``-1`` means use all available SH bases.
1954+
tile_size (int): Tile size (in pixels). ``tileH = ceil(H / tile_size)``,
1955+
``tileW = ceil(W / tile_size)``.
1956+
min_radius_2d (float): Minimum projected radius (in pixels) used for tiling/culling.
1957+
eps_2d (float): Padding used during tiling/projection to avoid numerical issues.
1958+
antialias (bool): If ``True``, applies opacity correction (when available) when using
1959+
``eps_2d > 0.0``.
1960+
backgrounds (torch.Tensor | None): Optional background colors of shape ``(C, D)``,
1961+
where ``D`` is :attr:`num_channels`. If ``None``, background is treated as 0.
1962+
masks (torch.Tensor | None): Optional per-tile boolean mask of shape
1963+
``(C, tileH, tileW)``. Masked tiles are skipped and filled with background.
1964+
1965+
Returns:
1966+
images (torch.Tensor): Rendered images of shape ``(C, H, W, D)``.
1967+
alpha_images (torch.Tensor): Alpha images of shape ``(C, H, W, 1)``.
1968+
"""
1969+
if isinstance(camera_model, CameraModel):
1970+
camera_model_cpp = getattr(_C.CameraModel, camera_model.name)
1971+
else:
1972+
camera_model_cpp = camera_model
1973+
1974+
return self._impl.render_images_from_world(
1975+
world_to_camera_matrices=world_to_camera_matrices,
1976+
projection_matrices=projection_matrices,
1977+
image_width=image_width,
1978+
image_height=image_height,
1979+
near=near,
1980+
far=far,
1981+
camera_model=camera_model_cpp,
1982+
distortion_coeffs=distortion_coeffs,
1983+
sh_degree_to_use=sh_degree_to_use,
1984+
tile_size=tile_size,
1985+
min_radius_2d=min_radius_2d,
1986+
eps_2d=eps_2d,
1987+
antialias=antialias,
1988+
backgrounds=backgrounds,
1989+
masks=masks,
1990+
)
1991+
18801992
def sparse_render_images(
18811993
self,
18821994
pixels_to_render: JaggedTensorOrTensorT,

src/CMakeLists.txt

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,10 @@ option(FVDB_BUILD_TESTS "Configure CMake to build tests" ON)
1010
option(FVDB_BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" OFF)
1111
option(FVDB_STRIP_SYMBOLS "Strip symbols from the build" OFF)
1212
option(FVDB_LINEINFO "Enable lineinfo in the build" OFF)
13-
option(FVDB_USE_OPENMP "Enable OpenMP for CPU parallelization" ON)
1413

1514
message(STATUS "FVDB: Configure CMake to build tests: ${FVDB_BUILD_TESTS}")
1615
message(STATUS "FVDB: Configure CMake to build (google & nvbench) benchmarks: ${FVDB_BUILD_BENCHMARKS}")
1716
message(STATUS "FVDB_STRIP_SYMBOLS: ${FVDB_STRIP_SYMBOLS}")
18-
message(STATUS "FVDB_USE_OPENMP: ${FVDB_USE_OPENMP}")
1917

2018
# Get dependencies
2119
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/get_cpm.cmake)
@@ -51,13 +49,15 @@ set(FVDB_CPP_FILES
5149
fvdb/detail/autograd/Inject.cpp
5250
fvdb/detail/autograd/GaussianProjection.cpp
5351
fvdb/detail/autograd/GaussianRasterize.cpp
52+
fvdb/detail/autograd/GaussianRasterizeFromWorld.cpp
5453
fvdb/detail/autograd/GaussianRasterizeSparse.cpp
5554
fvdb/detail/autograd/JaggedReduce.cpp
5655
fvdb/detail/autograd/MaxPoolGrid.cpp
5756
fvdb/detail/autograd/ReadFromDense.cpp
5857
fvdb/detail/autograd/ReadIntoDense.cpp
5958
fvdb/detail/autograd/SampleGrid.cpp
6059
fvdb/detail/autograd/SparseConvolutionHalo.cpp
60+
fvdb/detail/autograd/SparseConvolutionImplicitGEMM.cpp
6161
fvdb/detail/autograd/SparseConvolutionKernelMap.cpp
6262
fvdb/detail/autograd/SplatIntoGrid.cpp
6363
fvdb/detail/autograd/TransformPoints.cpp
@@ -73,6 +73,7 @@ set(FVDB_CPP_FILES
7373
fvdb/GaussianSplat3d.cpp
7474
fvdb/GridBatch.cpp
7575
fvdb/JaggedTensor.cpp
76+
fvdb/SparseConvPackInfo.cpp
7677
)
7778

7879
set(FVDB_CU_FILES
@@ -92,10 +93,19 @@ set(FVDB_CU_FILES
9293
fvdb/detail/ops/BuildPaddedGrid.cu
9394
fvdb/detail/ops/BuildPrunedGrid.cu
9495
fvdb/detail/ops/CoarseIjkForFineGrid.cu
96+
fvdb/detail/ops/convolution/backend/MESparseConvolution.cu
97+
fvdb/detail/ops/convolution/backend/SparseConvolutionCutlass.cu
9598
fvdb/detail/ops/convolution/backend/SparseConvolutionHalo.cu
9699
fvdb/detail/ops/convolution/backend/SparseConvolutionHaloGrad.cu
100+
fvdb/detail/ops/convolution/backend/SparseConvolutionImplicitGEMM.cu
101+
fvdb/detail/ops/convolution/backend/SparseConvolutionImplicitGEMMGrad.cu
102+
fvdb/detail/ops/convolution/backend/SparseConvolutionImplicitGEMMGradSorted.cu
103+
fvdb/detail/ops/convolution/backend/SparseConvolutionImplicitGEMMSorted.cu
97104
fvdb/detail/ops/convolution/backend/SparseConvolutionKernelMap.cu
105+
fvdb/detail/ops/convolution/backend/SparseConvolutionLggs.cu
106+
fvdb/detail/ops/convolution/pack_info/BrickHaloBuffer.cu
98107
fvdb/detail/ops/convolution/pack_info/ConvolutionKernelMap.cu
108+
fvdb/detail/ops/convolution/pack_info/IGEMMBitOperations.cu
99109
fvdb/detail/ops/CoordsInGrid.cu
100110
fvdb/detail/ops/CubesInGrid.cu
101111
fvdb/detail/ops/DownsampleGridAvgPool.cu
@@ -112,6 +122,8 @@ set(FVDB_CU_FILES
112122
fvdb/detail/ops/gsplat/GaussianProjectionJaggedForward.cu
113123
fvdb/detail/ops/gsplat/GaussianRasterizeBackward.cu
114124
fvdb/detail/ops/gsplat/GaussianRasterizeForward.cu
125+
fvdb/detail/ops/gsplat/GaussianRasterizeFromWorldBackward.cu
126+
fvdb/detail/ops/gsplat/GaussianRasterizeFromWorldForward.cu
115127
fvdb/detail/ops/gsplat/GaussianRasterizeNumContributingGaussians.cu
116128
fvdb/detail/ops/gsplat/GaussianRasterizeTopContributingGaussianIds.cu
117129
fvdb/detail/ops/gsplat/GaussianRasterizeContributingGaussianIds.cu

0 commit comments

Comments
 (0)