Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ set(FVDB_CU_FILES
fvdb/detail/ops/gsplat/GaussianRasterizeNumContributingGaussians.cu
fvdb/detail/ops/gsplat/GaussianRasterizeTopContributingGaussianIds.cu
fvdb/detail/ops/gsplat/GaussianRasterizeContributingGaussianIds.cu
fvdb/detail/ops/gsplat/GaussianRelocation.cu
fvdb/detail/ops/gsplat/GaussianSphericalHarmonicsBackward.cu
fvdb/detail/ops/gsplat/GaussianSphericalHarmonicsForward.cu
fvdb/detail/ops/gsplat/GaussianSplatSparse.cu
Expand Down
139 changes: 139 additions & 0 deletions src/fvdb/detail/ops/gsplat/GaussianRelocation.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Copyright Contributors to the OpenVDB Project
// SPDX-License-Identifier: Apache-2.0
//

#include <fvdb/detail/ops/gsplat/GaussianRelocation.h>
#include <fvdb/detail/utils/AccessorHelpers.cuh>
#include <fvdb/detail/utils/Nvtx.h>
#include <fvdb/detail/utils/cuda/GridDim.h>

#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>

#include <complex.h>

namespace fvdb::detail::ops {
namespace {

template <typename ScalarType>
__global__ void
gaussianRelocationKernel(fvdb::TorchRAcc64<ScalarType, 1> opacities,
fvdb::TorchRAcc64<ScalarType, 2> scales,
fvdb::TorchRAcc64<int32_t, 1> ratios,
fvdb::TorchRAcc64<ScalarType, 2> binomialCoeffs,
fvdb::TorchRAcc64<ScalarType, 1> opacitiesNew,
fvdb::TorchRAcc64<ScalarType, 2> scalesNew,
std::size_t nMax) {
const auto N = opacities.size(0);
for (uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < N;
idx += blockDim.x * gridDim.x) {
const int32_t nIdx = ratios[idx];

// compute new opacity
opacitiesNew[idx] = ScalarType(1.0) - powf(1 - opacities[idx], ScalarType(1.0) / nIdx);

// compute new scale
ScalarType denomSum = 0.0f;
for (int32_t i = 1; i <= nIdx; ++i) {
for (int32_t k = 0; k <= (i - 1); ++k) {
const ScalarType binomialCoefficient = binomialCoeffs[i - 1][k];
denomSum += binomialCoefficient * pow(-1, k) * powf(opacitiesNew[idx], k + 1) *
rsqrtf(k + 1);
}
}

const ScalarType coeff = (opacities[idx] / denomSum);
for (int i = 0; i < 3; ++i) {
scalesNew[idx][i] = coeff * scales[idx][i];
}
}
}

template <typename ScalarType>
std::tuple<torch::Tensor, torch::Tensor>
launchGaussianRelocation(const torch::Tensor &opacities, // [N]
const torch::Tensor &scales, // [N, 3]
const torch::Tensor &ratios, // [N]
const torch::Tensor &binomialCoeffs, // [nMax, nMax]
const int nMax) {
const auto N = opacities.size(0);

auto opacitiesNew = torch::empty_like(opacities);
auto scalesNew = torch::empty_like(scales);

const int blockDim = DEFAULT_BLOCK_DIM;
const int gridDim = fvdb::GET_BLOCKS(N, blockDim);
const at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();

gaussianRelocationKernel<ScalarType><<<gridDim, blockDim, 0, stream>>>(
opacities.packed_accessor64<ScalarType, 1, torch::RestrictPtrTraits>(),
scales.packed_accessor64<ScalarType, 2, torch::RestrictPtrTraits>(),
ratios.packed_accessor64<int32_t, 1, torch::RestrictPtrTraits>(),
binomialCoeffs.packed_accessor64<ScalarType, 2, torch::RestrictPtrTraits>(),
opacitiesNew.packed_accessor64<ScalarType, 1, torch::RestrictPtrTraits>(),
scalesNew.packed_accessor64<ScalarType, 2, torch::RestrictPtrTraits>(),
nMax);

C10_CUDA_KERNEL_LAUNCH_CHECK();
return std::make_tuple(opacitiesNew, scalesNew);
}

} // namespace

template <>
std::tuple<torch::Tensor, torch::Tensor>
dispatchGaussianRelocation<torch::kCUDA>(const torch::Tensor &opacities, // [N]
const torch::Tensor &scales, // [N, 3]
const torch::Tensor &ratios, // [N]
const torch::Tensor &binomialCoeffs, // [nMax, nMax]
const int nMax) {
FVDB_FUNC_RANGE();
const at::cuda::OptionalCUDAGuard device_guard(device_of(opacities));

const auto N = opacities.size(0);

TORCH_CHECK_VALUE(binomialCoeffs.is_cuda(), "binomialCoeffs must be a CUDA tensor");
TORCH_CHECK_VALUE(binomialCoeffs.dim() == 2, "binomialCoeffs must have shape [nMax, nMax]");
TORCH_CHECK_VALUE(binomialCoeffs.size(0) == nMax,
"binomialCoeffs must have shape [nMax, nMax]");
TORCH_CHECK_VALUE(binomialCoeffs.size(1) == nMax,
"binomialCoeffs must have shape [nMax, nMax]");
TORCH_CHECK_VALUE(opacities.is_cuda(), "opacities must be a CUDA tensor");
TORCH_CHECK_VALUE(opacities.dim() == 1, "opacities must have shape [N]");
TORCH_CHECK_VALUE(scales.is_cuda(), "scales must be a CUDA tensor");
TORCH_CHECK_VALUE(scales.dim() == 2, "scales must have shape [N, 3]");
TORCH_CHECK_VALUE(scales.size(0) == N, "scales must have shape [N, 3]");
TORCH_CHECK_VALUE(scales.size(1) == 3, "scales must have shape [N, 3]");
TORCH_CHECK_VALUE(ratios.is_cuda(), "ratios must be a CUDA tensor");
TORCH_CHECK_VALUE(ratios.dim() == 1, "ratios must have shape [N]");
TORCH_CHECK_VALUE(ratios.size(0) == N, "ratios must have shape [N]");
TORCH_CHECK_VALUE(ratios.dtype() == torch::kInt32, "ratios must be an int32 tensor");

// Only float is supported for now, matching the kernel math (powf/rsqrtf).
return launchGaussianRelocation<float>(
opacities.contiguous(), scales.contiguous(), ratios.contiguous(), binomialCoeffs, nMax);
}

template <>
std::tuple<torch::Tensor, torch::Tensor>
dispatchGaussianRelocation<torch::kPrivateUse1>(const torch::Tensor &opacities, // [N]
const torch::Tensor &scales, // [N, 3]
const torch::Tensor &ratios, // [N]
const torch::Tensor &binomialCoeffs, // [nMax, nMax]
const int nMax) {
// TODO: Implement PrivateUse1
TORCH_CHECK_NOT_IMPLEMENTED(false, "PrivateUse1 implementation not available");
}

template <>
std::tuple<torch::Tensor, torch::Tensor>
dispatchGaussianRelocation<torch::kCPU>(const torch::Tensor &opacities, // [N]
const torch::Tensor &scales, // [N, 3]
const torch::Tensor &ratios, // [N]
const torch::Tensor &binomialCoeffs, // [nMax, nMax]
const int nMax) {
// CPU path intentionally unsupported; keep signature for clearer error messaging in tests.
TORCH_CHECK_NOT_IMPLEMENTED(false, "GaussianRelocation is not implemented for CPU");
}

} // namespace fvdb::detail::ops
37 changes: 37 additions & 0 deletions src/fvdb/detail/ops/gsplat/GaussianRelocation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright Contributors to the OpenVDB Project
// SPDX-License-Identifier: Apache-2.0
//

#ifndef FVDB_DETAIL_OPS_GSPLAT_GAUSSIANRELOCATION_H
#define FVDB_DETAIL_OPS_GSPLAT_GAUSSIANRELOCATION_H

#include <torch/types.h>

#include <tuple>

namespace fvdb {
namespace detail {
namespace ops {

/// @brief Relocate Gaussians by adjusting opacity and scale based on replication ratio.
///
/// @param opacities Input opacities [N]
/// @param scales Input scales [N, 3]
/// @param ratios Replication ratios per Gaussian [N] (int32)
/// @param binomialCoeffs Binomial coefficients table [nMax, nMax]
/// @param nMax Maximum replication ratio (size of binomial table)
///
/// @return tuple of (opacitiesNew [N], scalesNew [N, 3])
template <torch::DeviceType DeviceType>
std::tuple<torch::Tensor, torch::Tensor>
dispatchGaussianRelocation(const torch::Tensor &opacities, // [N]
const torch::Tensor &scales, // [N, 3]
const torch::Tensor &ratios, // [N]
const torch::Tensor &binomialCoeffs, // [nMax, nMax]
const int nMax);

} // namespace ops
} // namespace detail
} // namespace fvdb

#endif // FVDB_DETAIL_OPS_GSPLAT_GAUSSIANRELOCATION_H
1 change: 1 addition & 0 deletions src/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ ConfigureTest(GaussianProjectionForwardTest "GaussianProjectionForwardTest.cpp")
ConfigureTest(GaussianProjectionBackwardTest "GaussianProjectionBackwardTest.cpp")
ConfigureTest(GaussianRasterizeTopContributorsTest "GaussianRasterizeTopContributorsTest.cpp")
ConfigureTest(GaussianRasterizeContributingGaussianIdsTest "GaussianRasterizeContributingGaussianIdsTest.cpp")
ConfigureTest(GaussianRelocationTest "GaussianRelocationTest.cpp")

if(NOT NANOVDB_EDITOR_SKIP)
#ConfigureTest(ViewerTest "ViewerTest.cpp")
Expand Down
188 changes: 188 additions & 0 deletions src/tests/GaussianRelocationTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
// Copyright Contributors to the OpenVDB Project
// SPDX-License-Identifier: Apache-2.0

#include "utils/Tensor.h"

#include <fvdb/detail/ops/gsplat/GaussianRelocation.h>

#include <torch/torch.h>

#include <gtest/gtest.h>

#include <cmath>
#include <tuple>

namespace {

int64_t
binomial(int n, int k) {
if (k < 0 || k > n)
return 0;
if (k == 0 || k == n)
return 1;
int64_t res = 1;
for (int i = 1; i <= k; ++i) {
res = res * (n - (k - i));
res /= i;
}
return res;
}

torch::Tensor
buildBinomialCoeffsCPU(int nMax) {
auto coeffs = torch::zeros({nMax, nMax}, torch::TensorOptions().dtype(torch::kFloat32));
for (int row = 0; row < nMax; ++row) {
for (int k = 0; k <= row; ++k) {
coeffs[row][k] = static_cast<float>(binomial(row, k));
}
}
return coeffs;
}

std::tuple<torch::Tensor, torch::Tensor>
referenceRelocation(const torch::Tensor &opacities,
const torch::Tensor &scales,
const torch::Tensor &ratios,
const torch::Tensor &binomialCoeffsCPU) {
auto opacitiesNew = torch::empty_like(opacities);
auto scalesNew = torch::empty_like(scales);

const auto N = opacities.size(0);
for (int64_t idx = 0; idx < N; ++idx) {
const int32_t nIdx = ratios[idx].item<int32_t>();
const float opacity =
opacities[idx].item<float>(); // CPU tensor so item<float> is fine for test sizes
const float opacityNew = 1.0f - std::pow(1.0f - opacity, 1.0f / static_cast<float>(nIdx));
opacitiesNew[idx] = opacityNew;

float denomSum = 0.0f;
for (int32_t i = 1; i <= nIdx; ++i) {
for (int32_t k = 0; k <= (i - 1); ++k) {
const float binomCoeff = binomialCoeffsCPU[(i - 1)][k].item<float>();
const float sign = (k % 2 == 0) ? 1.0f : -1.0f;
denomSum += binomCoeff * sign * std::pow(opacityNew, static_cast<float>(k + 1)) /
std::sqrt(static_cast<float>(k + 1));
}
}

const float coeff = opacity / denomSum;
scalesNew[idx] = coeff * scales[idx];
}

return {opacitiesNew, scalesNew};
}

class GaussianRelocationTest : public ::testing::Test {
protected:
void
SetUp() override {
if (!torch::cuda::is_available()) {
GTEST_SKIP() << "CUDA is required for GaussianRelocation tests";
}
torch::manual_seed(0);
}

void
TestRelocation(const torch::Tensor &opacities,
const torch::Tensor &scales,
const torch::Tensor &ratios) {
const int nMax = opacities.size(0);
auto const binomialCoeffsCPU = buildBinomialCoeffsCPU(nMax);
auto const binomialCoeffs = binomialCoeffsCPU.to(opacities.device());

const auto [gpuOpacitiesNew, gpuScalesNew] =
fvdb::detail::ops::dispatchGaussianRelocation<torch::kCUDA>(
opacities, scales, ratios, binomialCoeffs, nMax);

const auto [refOpacitiesNew, refScalesNew] =
referenceRelocation(opacities.cpu(), scales.cpu(), ratios.cpu(), binomialCoeffsCPU);

EXPECT_TRUE(torch::allclose(gpuOpacitiesNew.cpu(), refOpacitiesNew, 1e-6, 1e-6));
EXPECT_TRUE(torch::allclose(gpuScalesNew.cpu(), refScalesNew, 1e-6, 1e-6));
}
};

TEST_F(GaussianRelocationTest, ComputesExpectedValues) {
// Focus on low/dead opacities; include one mildly higher entry to ensure mixed behavior.
auto opacities =
torch::tensor({1e-6f, 5e-4f, 1e-2f, 0.2f}, fvdb::test::tensorOpts<float>(torch::kCUDA));
auto scales = torch::tensor(
{{1.0f, 0.8f, 1.2f}, {0.5f, 0.25f, 0.125f}, {1.5f, 0.6f, 0.9f}, {0.8f, 1.1f, 0.7f}},
fvdb::test::tensorOpts<float>(torch::kCUDA));
auto ratios = torch::tensor({1, 2, 3, 4},
torch::TensorOptions().device(torch::kCUDA).dtype(torch::kInt32));

TestRelocation(opacities, scales, ratios);
}

TEST_F(GaussianRelocationTest, HandlesEdgeOpacitiesAndRatios) {
auto opacities =
torch::tensor({1e-7f, 1e-5f, 5e-4f, 1e-2f}, fvdb::test::tensorOpts<float>(torch::kCUDA));
auto scales = torch::tensor(
{{1.0f, 1.0f, 1.0f}, {0.4f, 0.3f, 0.2f}, {1.8f, 0.6f, 0.9f}, {0.9f, 1.1f, 0.7f}},
fvdb::test::tensorOpts<float>(torch::kCUDA));
auto ratios = torch::tensor({1, 4, 2, 3},
torch::TensorOptions().device(torch::kCUDA).dtype(torch::kInt32));

TestRelocation(opacities, scales, ratios);
}

TEST_F(GaussianRelocationTest, ValidatesInputs) {
const int nMax = 2;
auto binomialCoeffsCPU = buildBinomialCoeffsCPU(nMax);

auto opacities =
torch::tensor({0.25f, 0.5f}, fvdb::test::tensorOpts<float>(torch::kCUDA)).contiguous();
auto scales = torch::tensor({{1.0f, 2.0f, 3.0f}, {0.5f, 1.0f, 2.0f}},
fvdb::test::tensorOpts<float>(torch::kCUDA))
.contiguous();
auto ratios =
torch::tensor({1, 2}, torch::TensorOptions().device(torch::kCUDA).dtype(torch::kInt32))
.contiguous();
auto binomialCoeffs = binomialCoeffsCPU.to(torch::kCUDA);

// binomialCoeffs on CPU
EXPECT_THROW(fvdb::detail::ops::dispatchGaussianRelocation<torch::kCUDA>(
opacities, scales, ratios, binomialCoeffsCPU, nMax),
c10::Error);

// binomialCoeffs wrong shape
auto badBinomShape = binomialCoeffs.slice(/*dim=*/0, 0, nMax - 1);
EXPECT_THROW(fvdb::detail::ops::dispatchGaussianRelocation<torch::kCUDA>(
opacities, scales, ratios, badBinomShape, nMax),
c10::Error);

// ratios wrong dtype
auto ratiosLong = ratios.to(torch::kInt64);
EXPECT_THROW(fvdb::detail::ops::dispatchGaussianRelocation<torch::kCUDA>(
opacities, scales, ratiosLong, binomialCoeffs, nMax),
c10::Error);

// opacities on CPU
EXPECT_THROW(fvdb::detail::ops::dispatchGaussianRelocation<torch::kCUDA>(
opacities.cpu(), scales, ratios, binomialCoeffs, nMax),
c10::Error);

// scales wrong shape
auto scalesBad = scales.view({2, 3, 1});
EXPECT_THROW(fvdb::detail::ops::dispatchGaussianRelocation<torch::kCUDA>(
opacities, scalesBad, ratios, binomialCoeffs, nMax),
c10::Error);
}

TEST_F(GaussianRelocationTest, CpuNotImplemented) {
const int nMax = 2;
auto binomialCoeffsCPU = buildBinomialCoeffsCPU(nMax);

auto opacities = torch::tensor({0.25f, 0.5f}, fvdb::test::tensorOpts<float>(torch::kCPU));
auto scales = torch::tensor({{1.0f, 2.0f, 3.0f}, {0.5f, 1.0f, 2.0f}},
fvdb::test::tensorOpts<float>(torch::kCPU));
auto ratios =
torch::tensor({1, 2}, torch::TensorOptions().device(torch::kCPU).dtype(torch::kInt32));

EXPECT_THROW(fvdb::detail::ops::dispatchGaussianRelocation<torch::kCPU>(
opacities, scales, ratios, binomialCoeffsCPU, nMax),
c10::Error);
}

} // namespace
Loading