Skip to content
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion fvdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def gaussian_render_jagged(

from .convolution_plan import ConvolutionPlan
from .gaussian_splatting import GaussianSplat3d, ProjectedGaussianSplats
from .enums import ProjectionType, ShOrderingMode
from .enums import CameraModel, ProjectionType, RollingShutterType, ShOrderingMode

# Import torch-compatible functions that work with both Tensor and JaggedTensor
from .torch_jagged import (
Expand Down Expand Up @@ -191,6 +191,8 @@ def gaussian_render_jagged(
"JaggedTensor",
"GaussianSplat3d",
"ProjectedGaussianSplats",
"CameraModel",
"RollingShutterType",
"ProjectionType",
"ShOrderingMode",
"ConvolutionPlan",
Expand Down
4 changes: 3 additions & 1 deletion fvdb/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _parse_device_string(device_string: str | torch.device) -> torch.device: ...
from . import nn, utils, viz
from ._fvdb_cpp import ConvPackBackend, config, hilbert, morton, volume_render
from .convolution_plan import ConvolutionPlan
from .enums import ProjectionType, ShOrderingMode
from .enums import CameraModel, ProjectionType, RollingShutterType, ShOrderingMode
from .gaussian_splatting import GaussianSplat3d, ProjectedGaussianSplats
from .grid import Grid
from .grid_batch import GridBatch, gcat
Expand Down Expand Up @@ -109,6 +109,8 @@ __all__ = [
"GaussianSplat3d",
"ProjectedGaussianSplats",
"ConvolutionPlan",
"CameraModel",
"RollingShutterType",
"ProjectionType",
"ShOrderingMode",
"Grid",
Expand Down
31 changes: 31 additions & 0 deletions fvdb/_fvdb_cpp.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,24 @@ class GaussianSplat3d:
antialias: bool = ...,
backgrounds: Optional[torch.Tensor] = ...,
) -> tuple[torch.Tensor, torch.Tensor]: ...
def render_images_from_world(
self,
world_to_camera_matrices: torch.Tensor,
projection_matrices: torch.Tensor,
image_width: int,
image_height: int,
near: float,
far: float,
camera_model: "CameraModel" = ...,
distortion_coeffs: Optional[torch.Tensor] = ...,
sh_degree_to_use: int = ...,
tile_size: int = ...,
min_radius_2d: float = ...,
eps_2d: float = ...,
antialias: bool = ...,
backgrounds: Optional[torch.Tensor] = ...,
masks: Optional[torch.Tensor] = ...,
) -> tuple[torch.Tensor, torch.Tensor]: ...
def sparse_render_images(
self,
pixels_to_render: JaggedTensor,
Expand Down Expand Up @@ -1267,3 +1285,16 @@ def volume_render(
packInfo: torch.Tensor,
transmittanceThresh: float,
) -> list[torch.Tensor]: ...

class RollingShutterType(Enum):
NONE = ...
VERTICAL = ...
HORIZONTAL = ...

class CameraModel(Enum):
PINHOLE = ...
OPENCV_RADTAN_5 = ...
OPENCV_RATIONAL_8 = ...
OPENCV_RADTAN_THIN_PRISM_9 = ...
OPENCV_THIN_PRISM_12 = ...
ORTHOGRAPHIC = ...
72 changes: 71 additions & 1 deletion fvdb/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#

from enum import Enum
from enum import Enum, IntEnum


class ProjectionType(str, Enum):
Expand Down Expand Up @@ -48,3 +48,73 @@ class ShOrderingMode(str, Enum):
The feature channels of spherical harmonics are stored in separate blocks for each coefficient. *i.e.* The spherical harmonics
tensor corresponds to a (row-major) contiguous tensor of shape ``[num_coefficients, channels, num_sh_bases]``, where channels=3 for RGB.
"""


class RollingShutterType(IntEnum):
"""
Rolling shutter policy for camera projection / ray generation.

Rolling shutter models treat different image rows/columns as having different exposure times.
FVDB uses this to interpolate between per-camera start/end poses when generating rays.
"""

NONE = 0
"""
No rolling shutter: the start pose is used for all pixels.
"""

VERTICAL = 1
"""
Vertical rolling shutter: exposure time varies with image row (y).
"""

HORIZONTAL = 2
"""
Horizontal rolling shutter: exposure time varies with image column (x).
"""


class CameraModel(IntEnum):
"""
Camera model for projection / ray generation.

Notes:
- ``PINHOLE`` and ``ORTHOGRAPHIC`` ignore distortion coefficients.
- ``OPENCV_*`` variants use pinhole intrinsics plus OpenCV-style distortion. When distortion
coefficients are provided, FVDB expects a packed layout:

``[k1,k2,k3,k4,k5,k6,p1,p2,s1,s2,s3,s4]``

Unused coefficients for a given model should be set to 0.
"""

PINHOLE = 0
"""
Ideal pinhole camera model (no distortion).
"""

OPENCV_RADTAN_5 = 1
"""
OpenCV radial-tangential distortion with 5 parameters (k1,k2,p1,p2,k3).
"""

OPENCV_RATIONAL_8 = 2
"""
OpenCV rational radial-tangential distortion with 8 parameters (k1..k6,p1,p2).
"""

OPENCV_RADTAN_THIN_PRISM_9 = 3
"""
OpenCV radial-tangential + thin-prism distortion with 9 parameters (k1,k2,p1,p2,k3,s1..s4).
"""

OPENCV_THIN_PRISM_12 = 4
"""
OpenCV rational radial-tangential + thin-prism distortion with 12 parameters
(k1..k6,p1,p2,s1..s4).
"""

ORTHOGRAPHIC = 5
"""
Orthographic camera model (no distortion).
"""
113 changes: 112 additions & 1 deletion fvdb/gaussian_splatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from typing import Any, Mapping, Sequence, TypeVar, overload

import torch
from fvdb.enums import ProjectionType
from fvdb.enums import CameraModel, ProjectionType

from . import _fvdb_cpp as _C
from ._fvdb_cpp import GaussianSplat3d as GaussianSplat3dCpp
from ._fvdb_cpp import JaggedTensor as JaggedTensorCpp
from ._fvdb_cpp import ProjectedGaussianSplats as ProjectedGaussianSplatsCpp
Expand Down Expand Up @@ -1877,6 +1878,116 @@ def render_images(
backgrounds=backgrounds,
)

def render_images_from_world(
self,
world_to_camera_matrices: torch.Tensor,
projection_matrices: torch.Tensor,
image_width: int,
image_height: int,
near: float,
far: float,
camera_model: CameraModel = CameraModel.PINHOLE,
distortion_coeffs: torch.Tensor | None = None,
sh_degree_to_use: int = -1,
tile_size: int = 16,
min_radius_2d: float = 0.0,
eps_2d: float = 0.3,
antialias: bool = False,
backgrounds: torch.Tensor | None = None,
masks: torch.Tensor | None = None,
) -> tuple[torch.Tensor, torch.Tensor]:
"""
Render dense images by rasterizing directly from world-space 3D Gaussians.

This is similar to :meth:`render_images`, but the rasterization step is performed in 3D
using per-pixel rays against the Gaussian ellipsoids (instead of rasterizing 2D conics
produced by a projection step). This enables gradients w.r.t. Gaussian geometry
(``means``, ``quats``, ``log_scales``) through rasterization, which is useful for
UT-based OpenCV camera models.

Notes:
- This is **dense-only**: outputs are dense tensors of shape ``(C, H, W, ...)``.
- Tile intersection data is still computed from a (non-differentiable) projection
step, so gradients may be discontinuous when Gaussians move between tiles.
- Background compositing follows standard "over" alpha compositing. If
``backgrounds`` is provided, the output color is:

``color = sum_i (feat_i * alpha_i * T_i) + T_final * background``

where ``T_final`` is the remaining transmittance at the end of rasterization, and
``alpha = 1 - T_final``.
- ``masks`` is a **per-tile** boolean mask (parity with the classic rasterizer).
Tiles where ``masks[c, th, tw] == False`` are skipped entirely: the output is
background with ``alpha=0`` and the tile contributes **zero gradients**.

Example:

.. code-block:: python

images, alphas = gaussian_splat_3d.render_images_from_world(
world_to_camera_matrices, # [C,4,4]
projection_matrices, # [C,3,3]
image_width=640,
image_height=480,
near=0.01,
far=1e10,
camera_model=fvdb.CameraModel.OPENCV_RATIONAL_8,
distortion_coeffs=dist_coeffs, # [C,12]
backgrounds=bg, # [C,D]
masks=tile_mask, # [C,tileH,tileW] (optional)
)

Args:
world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)``.
projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)``.
image_width (int): Output image width ``W``.
image_height (int): Output image height ``H``.
near (float): Near clipping plane.
far (float): Far clipping plane.
camera_model (CameraModel): Camera model used for ray generation and distortion.
distortion_coeffs (torch.Tensor | None): Distortion coefficients for OpenCV camera
models. Use ``None`` for no distortion. Expected shape is ``(C, 12)`` with packed
layout ``[k1,k2,k3,k4,k5,k6,p1,p2,s1,s2,s3,s4]``. For camera models that use fewer
coefficients, unused entries should be set to 0.
sh_degree_to_use (int): SH degree to use. ``-1`` means use all available SH bases.
tile_size (int): Tile size (in pixels). ``tileH = ceil(H / tile_size)``,
``tileW = ceil(W / tile_size)``.
min_radius_2d (float): Minimum projected radius (in pixels) used for tiling/culling.
eps_2d (float): Padding used during tiling/projection to avoid numerical issues.
antialias (bool): If ``True``, applies opacity correction (when available) when using
``eps_2d > 0.0``.
backgrounds (torch.Tensor | None): Optional background colors of shape ``(C, D)``,
where ``D`` is :attr:`num_channels`. If ``None``, background is treated as 0.
masks (torch.Tensor | None): Optional per-tile boolean mask of shape
``(C, tileH, tileW)``. Masked tiles are skipped and filled with background.

Returns:
images (torch.Tensor): Rendered images of shape ``(C, H, W, D)``.
alpha_images (torch.Tensor): Alpha images of shape ``(C, H, W, 1)``.
"""
if isinstance(camera_model, CameraModel):
camera_model_cpp = getattr(_C.CameraModel, camera_model.name)
else:
camera_model_cpp = camera_model

return self._impl.render_images_from_world(
world_to_camera_matrices=world_to_camera_matrices,
projection_matrices=projection_matrices,
image_width=image_width,
image_height=image_height,
near=near,
far=far,
camera_model=camera_model_cpp,
distortion_coeffs=distortion_coeffs,
sh_degree_to_use=sh_degree_to_use,
tile_size=tile_size,
min_radius_2d=min_radius_2d,
eps_2d=eps_2d,
antialias=antialias,
backgrounds=backgrounds,
masks=masks,
)

def sparse_render_images(
self,
pixels_to_render: JaggedTensorOrTensorT,
Expand Down
3 changes: 3 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ set(FVDB_CPP_FILES
fvdb/detail/autograd/Inject.cpp
fvdb/detail/autograd/GaussianProjection.cpp
fvdb/detail/autograd/GaussianRasterize.cpp
fvdb/detail/autograd/GaussianRasterizeFromWorld.cpp
fvdb/detail/autograd/GaussianRasterizeSparse.cpp
fvdb/detail/autograd/JaggedReduce.cpp
fvdb/detail/autograd/MaxPoolGrid.cpp
Expand Down Expand Up @@ -121,6 +122,8 @@ set(FVDB_CU_FILES
fvdb/detail/ops/gsplat/GaussianProjectionJaggedForward.cu
fvdb/detail/ops/gsplat/GaussianRasterizeBackward.cu
fvdb/detail/ops/gsplat/GaussianRasterizeForward.cu
fvdb/detail/ops/gsplat/GaussianRasterizeFromWorldBackward.cu
fvdb/detail/ops/gsplat/GaussianRasterizeFromWorldForward.cu
fvdb/detail/ops/gsplat/GaussianRasterizeNumContributingGaussians.cu
fvdb/detail/ops/gsplat/GaussianRasterizeTopContributingGaussianIds.cu
fvdb/detail/ops/gsplat/GaussianRasterizeContributingGaussianIds.cu
Expand Down
Loading
Loading