Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 38 additions & 2 deletions genmetaballs/src/cuda/bindings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <nanobind/operators.h>
#include <nanobind/stl/vector.h>

#include "core/blender.cuh"
#include "core/confidence.cuh"
#include "core/geometry.cuh"
#include "core/utils.cuh"
Expand All @@ -27,14 +28,49 @@ NB_MODULE(_genmetaballs_bindings, m) {
nb::module_ confidence = m.def_submodule("confidence");
nb::class_<ZeroParameterConfidence>(confidence, "ZeroParameterConfidence")
.def(nb::init<>())
.def("get_confidence", &ZeroParameterConfidence::get_confidence);
.def("get_confidence", &ZeroParameterConfidence::get_confidence, nb::arg("sumexpd"),
"Get the confidence value for a given sumexpd")
.def("__repr__",
[](const ZeroParameterConfidence& c) { return nb::str("ZeroParameterConfidence()"); });

nb::class_<TwoParameterConfidence>(confidence, "TwoParameterConfidence")
.def(nb::init<float, float>())
.def("get_confidence", &TwoParameterConfidence::get_confidence);
.def_rw("beta4", &TwoParameterConfidence::beta4)
.def_rw("beta5", &TwoParameterConfidence::beta5)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be good to use def_ro for immutable types.

.def("get_confidence", &TwoParameterConfidence::get_confidence, nb::arg("sumexpd"),
"Get the confidence value for a given sumexpd")
.def("__repr__", [](const TwoParameterConfidence& c) {
return nb::str("TwoParameterConfidence(beta4={}, beta5={})").format(c.beta4, c.beta5);
});

// utils submodule
nb::module_ utils = m.def_submodule("utils");
utils.def("sigmoid", sigmoid, nb::arg("x"), "Compute the sigmoid function: 1 / (1 + exp(-x))");

// blender submodule
nb::module_ blender = m.def_submodule("blender");
nb::class_<FourParameterBlender>(blender, "FourParameterBlender")
.def(nb::init<float, float, float, float>())
.def_rw("beta1", &FourParameterBlender::beta1)
.def_rw("beta2", &FourParameterBlender::beta2)
.def_rw("beta3", &FourParameterBlender::beta3)
.def_rw("eta", &FourParameterBlender::eta)
.def("blend", &FourParameterBlender::blend, nb::arg("t"), nb::arg("d"),
"Blend two values with (t,d)")
.def("__repr__", [](const FourParameterBlender& b) {
return nb::str("FourParameterBlender(beta1={}, beta2={}, beta3={}, eta={})")
.format(b.beta1, b.beta2, b.beta3, b.eta);
});

nb::class_<ThreeParameterBlender>(blender, "ThreeParameterBlender")
.def(nb::init<float, float, float>())
.def_rw("beta1", &ThreeParameterBlender::beta1)
.def_rw("beta2", &ThreeParameterBlender::beta2)
.def_rw("eta", &ThreeParameterBlender::eta)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't do shit

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also def_ro

.def("blend", &ThreeParameterBlender::blend, nb::arg("t"), nb::arg("d"),
"Blend two values with (t,d)")
.def("__repr__", [](const ThreeParameterBlender& b) {
return nb::str("ThreeParameterBlender(beta1={}, beta2={}, eta={})")
.format(b.beta1, b.beta2, b.eta);
});
} // NB_MODULE(_genmetaballs_bindings)
23 changes: 18 additions & 5 deletions genmetaballs/src/cuda/core/blender.cuh
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
#pragma once

#include "fmb.h"
#include "geometry.h"
#include <cmath>
#include <cuda_runtime.h>

#include "utils.cuh"

struct FourParameterBlender {
float beta1;
float beta2;
float beta3;
float eta;

CUDA_CALLABLE __forceinline__ float blend(float t, float d) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be nice to have definition of the methods in a blender.cu file :)

return expf((beta1 * d * sigmoid((beta3 / eta) * t)) - ((beta2 / eta) * t));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIL

}
};

struct ThreeParameterBlender {
float beta1;
float beta2;
float eta;

CUDA_CALLABLE __forceinline__ // TODO inline?
float
blend(float t, float d, const FMB& fmb, const Ray& ray) const;
CUDA_CALLABLE __forceinline__ float blend(float t, float d) const {
return expf((beta1 * d) - ((beta2 / eta) * t));
}
};
2 changes: 1 addition & 1 deletion genmetaballs/src/cuda/core/forward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ __global__ render_kernel(const Getter fmb_getter, const Blender blender,
float w0 = 0.0f, tf = 0.0f, sumexpd = 0.0f;
for (const auto& fmb : fmb_getter->get_metaballs(ray)) {
const auto& [t, d] = Intersector::intersect(fmb, ray);
w = blender->blend(t, d, fmb, ray);
w = blender->blend(t, d);
sumexpd += exp(d);
tf += t;
w0 += w;
Expand Down
6 changes: 6 additions & 0 deletions genmetaballs/src/genmetaballs/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from genmetaballs._genmetaballs_bindings.blender import (
FourParameterBlender,
ThreeParameterBlender,
)
from genmetaballs._genmetaballs_bindings.confidence import (
TwoParameterConfidence,
ZeroParameterConfidence,
Expand All @@ -8,4 +12,6 @@
"ZeroParameterConfidence",
"TwoParameterConfidence",
"sigmoid",
"FourParameterBlender",
"ThreeParameterBlender",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of directly exposing the confidence methods, let's have a .blender

]
164 changes: 164 additions & 0 deletions tests/cpp_tests/test_blender.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <cuda_runtime.h>
#include <gtest/gtest.h>
#include <limits>
#include <random>
#include <vector>

#include "core/blender.cuh"
#include "core/utils.cuh"

template <typename Blender>
__global__ void blender_kernel(const float* t, const float* d, float* blended, uint32_t n,
Blender blender) {
uint32_t i = threadIdx.x + (blockIdx.x * blockDim.x);
if (i < n) {
blended[i] = blender.blend(t[i], d[i]);
}
}

constexpr uint32_t GRID_DIM = 256;
constexpr uint32_t BLOCK_DIM = 1024;

template <typename Blender>
std::vector<float> gpu_blend(const std::vector<float>& t_vec, const std::vector<float>& d_vec,
Blender blender) {
auto n = static_cast<uint32_t>(t_vec.size());
auto nbytes = n * sizeof(float);
float *d_t = nullptr, *d_d = nullptr, *d_blended = nullptr;
std::vector<float> result(n);

CUDA_CHECK(cudaMalloc(&d_t, nbytes));
CUDA_CHECK(cudaMalloc(&d_d, nbytes));
CUDA_CHECK(cudaMalloc(&d_blended, nbytes));
CUDA_CHECK(cudaMemcpy(d_t, t_vec.data(), nbytes, cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(d_d, d_vec.data(), nbytes, cudaMemcpyHostToDevice));
Comment on lines +31 to +37
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For readability: you might want to look into some of thrust's utilities (e.g. thrust::device_vector) to reduce the amount of manual memory management :).


auto block_dim = BLOCK_DIM;
auto grid_dim = (n + block_dim - 1) / block_dim;
if (grid_dim > GRID_DIM)
grid_dim = GRID_DIM;

blender_kernel<Blender><<<grid_dim, block_dim>>>(d_t, d_d, d_blended, n, blender);

CUDA_CHECK(cudaMemcpy(result.data(), d_blended, nbytes, cudaMemcpyDeviceToHost));
CUDA_CHECK(cudaFree(d_t));
CUDA_CHECK(cudaFree(d_d));
CUDA_CHECK(cudaFree(d_blended));
return result;
}

constexpr int NUM_RNG_SEEDS_PER_TEST = 5;
constexpr int NUM_N_VALUES_PER_TEST = 5;
constexpr uint32_t MASTER_SEED = 42;

static std::vector<int> blender_test_sizes() {
std::vector<int> sizes;
for (int k = 0; k < NUM_N_VALUES_PER_TEST; ++k)
sizes.push_back(1 << (4 + k)); // 2^(4+k): [16, 32, 64, 128, 256]
return sizes;
}

struct BlenderCase {
float beta1, beta2, beta3, eta;
const char* name;
};

static std::vector<BlenderCase> blender_cases() {
return {
{1.0F, 0.5F, 0.2F, 2.0F, "case1"},
{-2.0F, 1.0F, -1.0F, 1.5F, "case2"},
{0.0F, 0.0F, 1.0F, 1.0F, "case3"},
{0.5F, -0.5F, 0.8F, 0.5F, "case4"},
};
}

// Smoke test for FourParameterBlender
TEST(GpuBlenderTest, Blender_GPU_Smoke_FourParameter) {
auto sizes = blender_test_sizes();
std::mt19937 master_gen(MASTER_SEED);
std::uniform_int_distribution<uint32_t> seed_dist(0, std::numeric_limits<uint32_t>::max());
std::vector<uint32_t> seeds(NUM_RNG_SEEDS_PER_TEST);
for (auto& s : seeds)
s = seed_dist(master_gen);

for (int size_idx = 0; size_idx < static_cast<int>(sizes.size()); ++size_idx) {
int N = sizes[size_idx];

for (const auto& blend_case : blender_cases()) {
for (uint32_t test_seed : seeds) {
SCOPED_TRACE(testing::Message() << "N=" << N << ", seed=" << test_seed
<< ", blend_type=" << blend_case.name);

std::mt19937 rng(test_seed);
std::uniform_real_distribution<float> tdist(0.0F, 10.0F);
std::uniform_real_distribution<float> ddist(0.0F, 10.0F);

std::vector<float> t_vec(N), d_vec(N);
for (int i = 0; i < N; ++i) {
t_vec[i] = tdist(rng);
d_vec[i] = ddist(rng);
}

FourParameterBlender blender{blend_case.beta1, blend_case.beta2, blend_case.beta3,
blend_case.eta};

std::vector<float> actual = gpu_blend(t_vec, d_vec, blender);

ASSERT_EQ(actual.size(), static_cast<size_t>(N));
}
}
}
}

// Smoke test for ThreeParameterBlender
struct ThreeParamBlenderCase {
float beta1, beta2, eta;
const char* name;
};

static std::vector<ThreeParamBlenderCase> threeparam_blender_cases() {
return {
{1.0F, 0.5F, 2.0F, "three_case1"},
{-2.0F, 1.0F, 1.5F, "three_case2"},
{0.0F, 0.0F, 1.0F, "three_case3"},
{0.5F, -0.5F, 0.5F, "three_case4"},
};
}

TEST(GpuBlenderTest, Blender_GPU_Smoke_ThreeParameter) {
auto sizes = blender_test_sizes();
std::mt19937 master_gen(MASTER_SEED);
std::uniform_int_distribution<uint32_t> seed_dist(0, std::numeric_limits<uint32_t>::max());
std::vector<uint32_t> seeds(NUM_RNG_SEEDS_PER_TEST);
for (auto& s : seeds)
s = seed_dist(master_gen);

for (int size_idx = 0; size_idx < static_cast<int>(sizes.size()); ++size_idx) {
int N = sizes[size_idx];
for (const auto& blend_case : threeparam_blender_cases()) {
for (uint32_t test_seed : seeds) {
SCOPED_TRACE(testing::Message() << "N=" << N << ", seed=" << test_seed
<< ", blend_type=" << blend_case.name);

std::mt19937 rng(test_seed);
std::uniform_real_distribution<float> tdist(0.0F, 10.0F);
std::uniform_real_distribution<float> ddist(0.0F, 10.0F);

std::vector<float> t_vec(N), d_vec(N);
for (int i = 0; i < N; ++i) {
t_vec[i] = tdist(rng);
d_vec[i] = ddist(rng);
}

ThreeParameterBlender blender{blend_case.beta1, blend_case.beta2, blend_case.eta};

std::vector<float> actual = gpu_blend(t_vec, d_vec, blender);

ASSERT_EQ(actual.size(), static_cast<size_t>(N));
}
}
}
}
Loading