fwilliams
diff --git a/‎fvdb/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎fvdb/__init__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎fvdb/__init__.pyi‎
Lines changed: 3 additions & 1 deletion b/‎fvdb/__init__.pyi‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎fvdb/_fvdb_cpp.pyi‎
Lines changed: 31 additions & 0 deletions b/‎fvdb/_fvdb_cpp.pyi‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎fvdb/enums.py‎
Lines changed: 71 additions & 1 deletion b/‎fvdb/enums.py‎
Lines changed: 71 additions & 1 deletion
diff --git a/‎fvdb/gaussian_splatting.py‎
Lines changed: 113 additions & 1 deletion b/‎fvdb/gaussian_splatting.py‎
Lines changed: 113 additions & 1 deletion
diff --git a/‎src/CMakeLists.txt‎
Lines changed: 14 additions & 2 deletions b/‎src/CMakeLists.txt‎
Lines changed: 14 additions & 2 deletions
@@ -126,7 +126,7 @@ def gaussian_render_jagged(
 
 from .convolution_plan import ConvolutionPlan
 from .gaussian_splatting import GaussianSplat3d, ProjectedGaussianSplats
-from .enums import ProjectionType, ShOrderingMode
+from .enums import CameraModel, ProjectionType, RollingShutterType, ShOrderingMode
 
 # Import torch-compatible functions that work with both Tensor and JaggedTensor
 from .torch_jagged import (
@@ -190,6 +190,8 @@ def gaussian_render_jagged(
     "JaggedTensor",
     "GaussianSplat3d",
     "ProjectedGaussianSplats",
+    "CameraModel",
+    "RollingShutterType",
     "ProjectionType",
     "ShOrderingMode",
     "ConvolutionPlan",
 
@@ -19,7 +19,7 @@ def _parse_device_string(device_string: str | torch.device) -> torch.device: ...
 from . import nn, utils, viz
 from ._fvdb_cpp import config, hilbert, morton, volume_render
 from .convolution_plan import ConvolutionPlan
-from .enums import ProjectionType, ShOrderingMode
+from .enums import CameraModel, ProjectionType, RollingShutterType, ShOrderingMode
 from .gaussian_splatting import GaussianSplat3d, ProjectedGaussianSplats
 from .grid import Grid
 from .grid_batch import GridBatch, gcat
@@ -109,6 +109,8 @@ __all__ = [
     "GaussianSplat3d",
     "ProjectedGaussianSplats",
     "ConvolutionPlan",
+    "CameraModel",
+    "RollingShutterType",
     "ProjectionType",
     "ShOrderingMode",
     "Grid",
 
@@ -196,6 +196,24 @@ class GaussianSplat3d:
         antialias: bool = ...,
         backgrounds: Optional[torch.Tensor] = ...,
     ) -> tuple[torch.Tensor, torch.Tensor]: ...
+    def render_images_from_world(
+        self,
+        world_to_camera_matrices: torch.Tensor,
+        projection_matrices: torch.Tensor,
+        image_width: int,
+        image_height: int,
+        near: float,
+        far: float,
+        camera_model: "CameraModel" = ...,
+        distortion_coeffs: Optional[torch.Tensor] = ...,
+        sh_degree_to_use: int = ...,
+        tile_size: int = ...,
+        min_radius_2d: float = ...,
+        eps_2d: float = ...,
+        antialias: bool = ...,
+        backgrounds: Optional[torch.Tensor] = ...,
+        masks: Optional[torch.Tensor] = ...,
+    ) -> tuple[torch.Tensor, torch.Tensor]: ...
     def sparse_render_images(
         self,
         pixels_to_render: JaggedTensor,
@@ -1178,3 +1196,16 @@ def volume_render(
     packInfo: torch.Tensor,
     transmittanceThresh: float,
 ) -> list[torch.Tensor]: ...
+
+class RollingShutterType(Enum):
+    NONE = ...
+    VERTICAL = ...
+    HORIZONTAL = ...
+
+class CameraModel(Enum):
+    PINHOLE = ...
+    OPENCV_RADTAN_5 = ...
+    OPENCV_RATIONAL_8 = ...
+    OPENCV_RADTAN_THIN_PRISM_9 = ...
+    OPENCV_THIN_PRISM_12 = ...
+    ORTHOGRAPHIC = ...
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-from enum import Enum
+from enum import Enum, IntEnum
 
 
 class ProjectionType(str, Enum):
@@ -48,3 +48,73 @@ class ShOrderingMode(str, Enum):
     The feature channels of spherical harmonics are stored in separate blocks for each coefficient. *i.e.* The spherical harmonics
     tensor corresponds to a (row-major) contiguous tensor of shape ``[num_coefficients, channels, num_sh_bases]``, where channels=3 for RGB.
     """
+
+
+class RollingShutterType(IntEnum):
+    """
+    Rolling shutter policy for camera projection / ray generation.
+
+    Rolling shutter models treat different image rows/columns as having different exposure times.
+    FVDB uses this to interpolate between per-camera start/end poses when generating rays.
+    """
+
+    NONE = 0
+    """
+    No rolling shutter: the start pose is used for all pixels.
+    """
+
+    VERTICAL = 1
+    """
+    Vertical rolling shutter: exposure time varies with image row (y).
+    """
+
+    HORIZONTAL = 2
+    """
+    Horizontal rolling shutter: exposure time varies with image column (x).
+    """
+
+
+class CameraModel(IntEnum):
+    """
+    Camera model for projection / ray generation.
+
+    Notes:
+    - ``PINHOLE`` and ``ORTHOGRAPHIC`` ignore distortion coefficients.
+    - ``OPENCV_*`` variants use pinhole intrinsics plus OpenCV-style distortion. When distortion
+      coefficients are provided, FVDB expects a packed layout:
+
+      ``[k1,k2,k3,k4,k5,k6,p1,p2,s1,s2,s3,s4]``
+
+      Unused coefficients for a given model should be set to 0.
+    """
+
+    PINHOLE = 0
+    """
+    Ideal pinhole camera model (no distortion).
+    """
+
+    OPENCV_RADTAN_5 = 1
+    """
+    OpenCV radial-tangential distortion with 5 parameters (k1,k2,p1,p2,k3).
+    """
+
+    OPENCV_RATIONAL_8 = 2
+    """
+    OpenCV rational radial-tangential distortion with 8 parameters (k1..k6,p1,p2).
+    """
+
+    OPENCV_RADTAN_THIN_PRISM_9 = 3
+    """
+    OpenCV radial-tangential + thin-prism distortion with 9 parameters (k1,k2,p1,p2,k3,s1..s4).
+    """
+
+    OPENCV_THIN_PRISM_12 = 4
+    """
+    OpenCV rational radial-tangential + thin-prism distortion with 12 parameters
+    (k1..k6,p1,p2,s1..s4).
+    """
+
+    ORTHOGRAPHIC = 5
+    """
+    Orthographic camera model (no distortion).
+    """
@@ -5,8 +5,9 @@
 from typing import Any, Mapping, Sequence, TypeVar, overload
 
 import torch
-from fvdb.enums import ProjectionType
+from fvdb.enums import CameraModel, ProjectionType
 
+from . import _fvdb_cpp as _C
 from ._fvdb_cpp import GaussianSplat3d as GaussianSplat3dCpp
 from ._fvdb_cpp import JaggedTensor as JaggedTensorCpp
 from ._fvdb_cpp import ProjectedGaussianSplats as ProjectedGaussianSplatsCpp
@@ -1877,6 +1878,117 @@ def render_images(
             backgrounds=backgrounds,
         )
 
+    def render_images_from_world(
+        self,
+        world_to_camera_matrices: torch.Tensor,
+        projection_matrices: torch.Tensor,
+        image_width: int,
+        image_height: int,
+        near: float,
+        far: float,
+        camera_model: CameraModel = CameraModel.PINHOLE,
+        distortion_coeffs: torch.Tensor | None = None,
+        sh_degree_to_use: int = -1,
+        tile_size: int = 16,
+        min_radius_2d: float = 0.0,
+        eps_2d: float = 0.3,
+        antialias: bool = False,
+        backgrounds: torch.Tensor | None = None,
+        masks: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        Render dense images by rasterizing directly from world-space 3D Gaussians.
+
+        This is similar to :meth:`render_images`, but the rasterization step is performed in 3D
+        using per-pixel rays against the Gaussian ellipsoids (instead of rasterizing 2D conics
+        produced by a projection step). This enables gradients w.r.t. Gaussian geometry
+        (``means``, ``quats``, ``log_scales``) through rasterization, which is useful for
+        Unscented Transform (UT)-based OpenCV camera models.
+
+        Notes:
+            - This is **dense-only**: outputs are dense tensors of shape ``(C, H, W, ...)``.
+            - Tile intersection data is still computed from a (non-differentiable) projection
+              step, so gradients can be discontinuous when small parameter changes cause a Gaussian
+              to enter/leave a tile (or switch which tiles it overlaps).
+            - Background compositing follows standard "over" alpha compositing. If
+              ``backgrounds`` is provided, the output color is:
+
+              ``color = sum_i (feat_i * alpha_i * T_i) + T_final * background``
+
+              where ``T_final`` is the remaining transmittance at the end of rasterization, and
+              ``alpha = 1 - T_final``.
+            - ``masks`` is a **per-tile** boolean mask (parity with the classic rasterizer).
+              Tiles where ``masks[c, th, tw] == False`` are skipped entirely: the output is
+              background with ``alpha=0`` and the tile contributes **zero gradients**.
+
+        Example:
+
+        .. code-block:: python
+
+            images, alphas = gaussian_splat_3d.render_images_from_world(
+                world_to_camera_matrices,  # [C,4,4]
+                projection_matrices,       # [C,3,3]
+                image_width=640,
+                image_height=480,
+                near=0.01,
+                far=1e10,
+                camera_model=fvdb.CameraModel.OPENCV_RATIONAL_8,
+                distortion_coeffs=dist_coeffs,  # [C,12]
+                backgrounds=bg,                 # [C,D]
+                masks=tile_mask,                # [C,tileH,tileW] (optional)
+            )
+
+        Args:
+            world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)``.
+            projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)``.
+            image_width (int): Output image width ``W``.
+            image_height (int): Output image height ``H``.
+            near (float): Near clipping plane.
+            far (float): Far clipping plane.
+            camera_model (CameraModel): Camera model used for ray generation and distortion.
+            distortion_coeffs (torch.Tensor | None): Distortion coefficients for OpenCV camera
+                models. Use ``None`` for no distortion. Expected shape is ``(C, 12)`` with packed
+                layout ``[k1,k2,k3,k4,k5,k6,p1,p2,s1,s2,s3,s4]``. For camera models that use fewer
+                coefficients, unused entries should be set to 0.
+            sh_degree_to_use (int): SH degree to use. ``-1`` means use all available SH bases.
+            tile_size (int): Tile size (in pixels). ``tileH = ceil(H / tile_size)``,
+                ``tileW = ceil(W / tile_size)``.
+            min_radius_2d (float): Minimum projected radius (in pixels) used for tiling/culling.
+            eps_2d (float): Padding used during tiling/projection to avoid numerical issues.
+            antialias (bool): If ``True``, applies opacity correction (when available) when using
+                ``eps_2d > 0.0``.
+            backgrounds (torch.Tensor | None): Optional background colors of shape ``(C, D)``,
+                where ``D`` is :attr:`num_channels`. If ``None``, background is treated as 0.
+            masks (torch.Tensor | None): Optional per-tile boolean mask of shape
+                ``(C, tileH, tileW)``. Masked tiles are skipped and filled with background.
+
+        Returns:
+            images (torch.Tensor): Rendered images of shape ``(C, H, W, D)``.
+            alpha_images (torch.Tensor): Alpha images of shape ``(C, H, W, 1)``.
+        """
+        if isinstance(camera_model, CameraModel):
+            camera_model_cpp = getattr(_C.CameraModel, camera_model.name)
+        else:
+            camera_model_cpp = camera_model
+
+        return self._impl.render_images_from_world(
+            world_to_camera_matrices=world_to_camera_matrices,
+            projection_matrices=projection_matrices,
+            image_width=image_width,
+            image_height=image_height,
+            near=near,
+            far=far,
+            camera_model=camera_model_cpp,
+            distortion_coeffs=distortion_coeffs,
+            sh_degree_to_use=sh_degree_to_use,
+            tile_size=tile_size,
+            min_radius_2d=min_radius_2d,
+            eps_2d=eps_2d,
+            antialias=antialias,
+            backgrounds=backgrounds,
+            masks=masks,
+        )
+
     def sparse_render_images(
         self,
         pixels_to_render: JaggedTensorOrTensorT,
 
@@ -10,12 +10,10 @@ option(FVDB_BUILD_TESTS "Configure CMake to build tests" ON)
 option(FVDB_BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" OFF)
 option(FVDB_STRIP_SYMBOLS "Strip symbols from the build" OFF)
 option(FVDB_LINEINFO "Enable lineinfo in the build" OFF)
-option(FVDB_USE_OPENMP "Enable OpenMP for CPU parallelization" ON)
 
 message(STATUS "FVDB: Configure CMake to build tests: ${FVDB_BUILD_TESTS}")
 message(STATUS "FVDB: Configure CMake to build (google & nvbench) benchmarks: ${FVDB_BUILD_BENCHMARKS}")
 message(STATUS "FVDB_STRIP_SYMBOLS: ${FVDB_STRIP_SYMBOLS}")
-message(STATUS "FVDB_USE_OPENMP: ${FVDB_USE_OPENMP}")
 
 # Get dependencies
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/get_cpm.cmake)
@@ -51,13 +49,15 @@ set(FVDB_CPP_FILES
     fvdb/detail/autograd/Inject.cpp
     fvdb/detail/autograd/GaussianProjection.cpp
     fvdb/detail/autograd/GaussianRasterize.cpp
+    fvdb/detail/autograd/GaussianRasterizeFromWorld.cpp
     fvdb/detail/autograd/GaussianRasterizeSparse.cpp
     fvdb/detail/autograd/JaggedReduce.cpp
     fvdb/detail/autograd/MaxPoolGrid.cpp
     fvdb/detail/autograd/ReadFromDense.cpp
     fvdb/detail/autograd/ReadIntoDense.cpp
     fvdb/detail/autograd/SampleGrid.cpp
     fvdb/detail/autograd/SparseConvolutionHalo.cpp
+    fvdb/detail/autograd/SparseConvolutionImplicitGEMM.cpp
     fvdb/detail/autograd/SparseConvolutionKernelMap.cpp
     fvdb/detail/autograd/SplatIntoGrid.cpp
     fvdb/detail/autograd/TransformPoints.cpp
@@ -73,6 +73,7 @@ set(FVDB_CPP_FILES
     fvdb/GaussianSplat3d.cpp
     fvdb/GridBatch.cpp
     fvdb/JaggedTensor.cpp
+    fvdb/SparseConvPackInfo.cpp
 )
 
 set(FVDB_CU_FILES
@@ -92,10 +93,19 @@ set(FVDB_CU_FILES
     fvdb/detail/ops/BuildPaddedGrid.cu
     fvdb/detail/ops/BuildPrunedGrid.cu
     fvdb/detail/ops/CoarseIjkForFineGrid.cu
+    fvdb/detail/ops/convolution/backend/MESparseConvolution.cu
+    fvdb/detail/ops/convolution/backend/SparseConvolutionCutlass.cu
     fvdb/detail/ops/convolution/backend/SparseConvolutionHalo.cu
     fvdb/detail/ops/convolution/backend/SparseConvolutionHaloGrad.cu
+    fvdb/detail/ops/convolution/backend/SparseConvolutionImplicitGEMM.cu
+    fvdb/detail/ops/convolution/backend/SparseConvolutionImplicitGEMMGrad.cu
+    fvdb/detail/ops/convolution/backend/SparseConvolutionImplicitGEMMGradSorted.cu
+    fvdb/detail/ops/convolution/backend/SparseConvolutionImplicitGEMMSorted.cu
     fvdb/detail/ops/convolution/backend/SparseConvolutionKernelMap.cu
+    fvdb/detail/ops/convolution/backend/SparseConvolutionLggs.cu
+    fvdb/detail/ops/convolution/pack_info/BrickHaloBuffer.cu
     fvdb/detail/ops/convolution/pack_info/ConvolutionKernelMap.cu
+    fvdb/detail/ops/convolution/pack_info/IGEMMBitOperations.cu
     fvdb/detail/ops/CoordsInGrid.cu
     fvdb/detail/ops/CubesInGrid.cu
     fvdb/detail/ops/DownsampleGridAvgPool.cu
@@ -112,6 +122,8 @@ set(FVDB_CU_FILES
     fvdb/detail/ops/gsplat/GaussianProjectionJaggedForward.cu
     fvdb/detail/ops/gsplat/GaussianRasterizeBackward.cu
     fvdb/detail/ops/gsplat/GaussianRasterizeForward.cu
+    fvdb/detail/ops/gsplat/GaussianRasterizeFromWorldBackward.cu
+    fvdb/detail/ops/gsplat/GaussianRasterizeFromWorldForward.cu
     fvdb/detail/ops/gsplat/GaussianRasterizeNumContributingGaussians.cu
     fvdb/detail/ops/gsplat/GaussianRasterizeTopContributingGaussianIds.cu
     fvdb/detail/ops/gsplat/GaussianRasterizeContributingGaussianIds.cu