-
Notifications
You must be signed in to change notification settings - Fork 0
MET-34 Blender #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MET-34 Blender #18
Changes from 5 commits
29506d2
bd16d63
75ddddb
e7ed647
abf7826
1e1af4c
1b8ca71
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| #include <nanobind/operators.h> | ||
| #include <nanobind/stl/vector.h> | ||
|
|
||
| #include "core/blender.cuh" | ||
| #include "core/confidence.cuh" | ||
| #include "core/geometry.cuh" | ||
| #include "core/utils.cuh" | ||
|
|
@@ -27,14 +28,49 @@ NB_MODULE(_genmetaballs_bindings, m) { | |
| nb::module_ confidence = m.def_submodule("confidence"); | ||
| nb::class_<ZeroParameterConfidence>(confidence, "ZeroParameterConfidence") | ||
| .def(nb::init<>()) | ||
| .def("get_confidence", &ZeroParameterConfidence::get_confidence); | ||
| .def("get_confidence", &ZeroParameterConfidence::get_confidence, nb::arg("sumexpd"), | ||
| "Get the confidence value for a given sumexpd") | ||
| .def("__repr__", | ||
| [](const ZeroParameterConfidence& c) { return nb::str("ZeroParameterConfidence()"); }); | ||
|
|
||
| nb::class_<TwoParameterConfidence>(confidence, "TwoParameterConfidence") | ||
| .def(nb::init<float, float>()) | ||
| .def("get_confidence", &TwoParameterConfidence::get_confidence); | ||
| .def_rw("beta4", &TwoParameterConfidence::beta4) | ||
| .def_rw("beta5", &TwoParameterConfidence::beta5) | ||
| .def("get_confidence", &TwoParameterConfidence::get_confidence, nb::arg("sumexpd"), | ||
| "Get the confidence value for a given sumexpd") | ||
| .def("__repr__", [](const TwoParameterConfidence& c) { | ||
| return nb::str("TwoParameterConfidence(beta4={}, beta5={})").format(c.beta4, c.beta5); | ||
| }); | ||
|
|
||
| // utils submodule | ||
| nb::module_ utils = m.def_submodule("utils"); | ||
| utils.def("sigmoid", sigmoid, nb::arg("x"), "Compute the sigmoid function: 1 / (1 + exp(-x))"); | ||
|
|
||
| // blender submodule | ||
| nb::module_ blender = m.def_submodule("blender"); | ||
| nb::class_<FourParameterBlender>(blender, "FourParameterBlender") | ||
| .def(nb::init<float, float, float, float>()) | ||
| .def_rw("beta1", &FourParameterBlender::beta1) | ||
| .def_rw("beta2", &FourParameterBlender::beta2) | ||
| .def_rw("beta3", &FourParameterBlender::beta3) | ||
| .def_rw("eta", &FourParameterBlender::eta) | ||
| .def("blend", &FourParameterBlender::blend, nb::arg("t"), nb::arg("d"), | ||
| "Blend two values with (t,d)") | ||
| .def("__repr__", [](const FourParameterBlender& b) { | ||
| return nb::str("FourParameterBlender(beta1={}, beta2={}, beta3={}, eta={})") | ||
| .format(b.beta1, b.beta2, b.beta3, b.eta); | ||
| }); | ||
|
|
||
| nb::class_<ThreeParameterBlender>(blender, "ThreeParameterBlender") | ||
| .def(nb::init<float, float, float>()) | ||
| .def_rw("beta1", &ThreeParameterBlender::beta1) | ||
| .def_rw("beta2", &ThreeParameterBlender::beta2) | ||
| .def_rw("eta", &ThreeParameterBlender::eta) | ||
|
||
| .def("blend", &ThreeParameterBlender::blend, nb::arg("t"), nb::arg("d"), | ||
| "Blend two values with (t,d)") | ||
| .def("__repr__", [](const ThreeParameterBlender& b) { | ||
| return nb::str("ThreeParameterBlender(beta1={}, beta2={}, eta={})") | ||
| .format(b.beta1, b.beta2, b.eta); | ||
| }); | ||
| } // NB_MODULE(_genmetaballs_bindings) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,27 @@ | ||
| #pragma once | ||
|
|
||
| #include "fmb.h" | ||
| #include "geometry.h" | ||
| #include <cmath> | ||
| #include <cuda_runtime.h> | ||
|
|
||
| #include "utils.cuh" | ||
|
|
||
| struct FourParameterBlender { | ||
| float beta1; | ||
| float beta2; | ||
| float beta3; | ||
| float eta; | ||
|
|
||
| CUDA_CALLABLE __forceinline__ float blend(float t, float d) const { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd be nice to have definition of the methods in a |
||
| return expf((beta1 * d * sigmoid((beta3 / eta) * t)) - ((beta2 / eta) * t)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TIL |
||
| } | ||
| }; | ||
|
|
||
| struct ThreeParameterBlender { | ||
| float beta1; | ||
| float beta2; | ||
| float eta; | ||
|
|
||
| CUDA_CALLABLE __forceinline__ // TODO inline? | ||
| float | ||
| blend(float t, float d, const FMB& fmb, const Ray& ray) const; | ||
| CUDA_CALLABLE __forceinline__ float blend(float t, float d) const { | ||
| return expf((beta1 * d) - ((beta2 / eta) * t)); | ||
| } | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,7 @@ | ||
| from genmetaballs._genmetaballs_bindings.blender import ( | ||
| FourParameterBlender, | ||
| ThreeParameterBlender, | ||
| ) | ||
| from genmetaballs._genmetaballs_bindings.confidence import ( | ||
| TwoParameterConfidence, | ||
| ZeroParameterConfidence, | ||
|
|
@@ -8,4 +12,6 @@ | |
| "ZeroParameterConfidence", | ||
| "TwoParameterConfidence", | ||
| "sigmoid", | ||
| "FourParameterBlender", | ||
| "ThreeParameterBlender", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of directly exposing the confidence methods, let's have a |
||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| #include <algorithm> | ||
| #include <cmath> | ||
| #include <cstdint> | ||
| #include <cuda_runtime.h> | ||
| #include <gtest/gtest.h> | ||
| #include <limits> | ||
| #include <random> | ||
| #include <vector> | ||
|
|
||
| #include "core/blender.cuh" | ||
| #include "core/utils.cuh" | ||
|
|
||
| template <typename Blender> | ||
| __global__ void blender_kernel(const float* t, const float* d, float* blended, uint32_t n, | ||
| Blender blender) { | ||
| uint32_t i = threadIdx.x + (blockIdx.x * blockDim.x); | ||
| if (i < n) { | ||
| blended[i] = blender.blend(t[i], d[i]); | ||
| } | ||
| } | ||
|
|
||
| constexpr uint32_t GRID_DIM = 256; | ||
| constexpr uint32_t BLOCK_DIM = 1024; | ||
|
|
||
| template <typename Blender> | ||
| std::vector<float> gpu_blend(const std::vector<float>& t_vec, const std::vector<float>& d_vec, | ||
| Blender blender) { | ||
| auto n = static_cast<uint32_t>(t_vec.size()); | ||
| auto nbytes = n * sizeof(float); | ||
| float *d_t = nullptr, *d_d = nullptr, *d_blended = nullptr; | ||
| std::vector<float> result(n); | ||
|
|
||
| CUDA_CHECK(cudaMalloc(&d_t, nbytes)); | ||
| CUDA_CHECK(cudaMalloc(&d_d, nbytes)); | ||
| CUDA_CHECK(cudaMalloc(&d_blended, nbytes)); | ||
| CUDA_CHECK(cudaMemcpy(d_t, t_vec.data(), nbytes, cudaMemcpyHostToDevice)); | ||
| CUDA_CHECK(cudaMemcpy(d_d, d_vec.data(), nbytes, cudaMemcpyHostToDevice)); | ||
|
Comment on lines
+31
to
+37
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For readability: you might want to look into some of thrust's utilities (e.g. |
||
|
|
||
| auto block_dim = BLOCK_DIM; | ||
| auto grid_dim = (n + block_dim - 1) / block_dim; | ||
| if (grid_dim > GRID_DIM) | ||
| grid_dim = GRID_DIM; | ||
|
|
||
| blender_kernel<Blender><<<grid_dim, block_dim>>>(d_t, d_d, d_blended, n, blender); | ||
|
|
||
| CUDA_CHECK(cudaMemcpy(result.data(), d_blended, nbytes, cudaMemcpyDeviceToHost)); | ||
| CUDA_CHECK(cudaFree(d_t)); | ||
| CUDA_CHECK(cudaFree(d_d)); | ||
| CUDA_CHECK(cudaFree(d_blended)); | ||
| return result; | ||
| } | ||
|
|
||
| constexpr int NUM_RNG_SEEDS_PER_TEST = 5; | ||
| constexpr int NUM_N_VALUES_PER_TEST = 5; | ||
| constexpr uint32_t MASTER_SEED = 42; | ||
|
|
||
| static std::vector<int> blender_test_sizes() { | ||
| std::vector<int> sizes; | ||
| for (int k = 0; k < NUM_N_VALUES_PER_TEST; ++k) | ||
| sizes.push_back(1 << (4 + k)); // 2^(4+k): [16, 32, 64, 128, 256] | ||
| return sizes; | ||
| } | ||
|
|
||
| struct BlenderCase { | ||
| float beta1, beta2, beta3, eta; | ||
| const char* name; | ||
| }; | ||
|
|
||
| static std::vector<BlenderCase> blender_cases() { | ||
| return { | ||
| {1.0F, 0.5F, 0.2F, 2.0F, "case1"}, | ||
| {-2.0F, 1.0F, -1.0F, 1.5F, "case2"}, | ||
| {0.0F, 0.0F, 1.0F, 1.0F, "case3"}, | ||
| {0.5F, -0.5F, 0.8F, 0.5F, "case4"}, | ||
| }; | ||
| } | ||
|
|
||
| // Smoke test for FourParameterBlender | ||
| TEST(GpuBlenderTest, Blender_GPU_Smoke_FourParameter) { | ||
| auto sizes = blender_test_sizes(); | ||
| std::mt19937 master_gen(MASTER_SEED); | ||
| std::uniform_int_distribution<uint32_t> seed_dist(0, std::numeric_limits<uint32_t>::max()); | ||
| std::vector<uint32_t> seeds(NUM_RNG_SEEDS_PER_TEST); | ||
| for (auto& s : seeds) | ||
| s = seed_dist(master_gen); | ||
|
|
||
| for (int size_idx = 0; size_idx < static_cast<int>(sizes.size()); ++size_idx) { | ||
| int N = sizes[size_idx]; | ||
|
|
||
| for (const auto& blend_case : blender_cases()) { | ||
| for (uint32_t test_seed : seeds) { | ||
| SCOPED_TRACE(testing::Message() << "N=" << N << ", seed=" << test_seed | ||
| << ", blend_type=" << blend_case.name); | ||
|
|
||
| std::mt19937 rng(test_seed); | ||
| std::uniform_real_distribution<float> tdist(0.0F, 10.0F); | ||
| std::uniform_real_distribution<float> ddist(0.0F, 10.0F); | ||
|
|
||
| std::vector<float> t_vec(N), d_vec(N); | ||
| for (int i = 0; i < N; ++i) { | ||
| t_vec[i] = tdist(rng); | ||
| d_vec[i] = ddist(rng); | ||
| } | ||
|
|
||
| FourParameterBlender blender{blend_case.beta1, blend_case.beta2, blend_case.beta3, | ||
| blend_case.eta}; | ||
|
|
||
| std::vector<float> actual = gpu_blend(t_vec, d_vec, blender); | ||
|
|
||
| ASSERT_EQ(actual.size(), static_cast<size_t>(N)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Smoke test for ThreeParameterBlender | ||
| struct ThreeParamBlenderCase { | ||
| float beta1, beta2, eta; | ||
| const char* name; | ||
| }; | ||
|
|
||
| static std::vector<ThreeParamBlenderCase> threeparam_blender_cases() { | ||
| return { | ||
| {1.0F, 0.5F, 2.0F, "three_case1"}, | ||
| {-2.0F, 1.0F, 1.5F, "three_case2"}, | ||
| {0.0F, 0.0F, 1.0F, "three_case3"}, | ||
| {0.5F, -0.5F, 0.5F, "three_case4"}, | ||
| }; | ||
| } | ||
|
|
||
| TEST(GpuBlenderTest, Blender_GPU_Smoke_ThreeParameter) { | ||
| auto sizes = blender_test_sizes(); | ||
| std::mt19937 master_gen(MASTER_SEED); | ||
| std::uniform_int_distribution<uint32_t> seed_dist(0, std::numeric_limits<uint32_t>::max()); | ||
| std::vector<uint32_t> seeds(NUM_RNG_SEEDS_PER_TEST); | ||
| for (auto& s : seeds) | ||
| s = seed_dist(master_gen); | ||
|
|
||
| for (int size_idx = 0; size_idx < static_cast<int>(sizes.size()); ++size_idx) { | ||
| int N = sizes[size_idx]; | ||
| for (const auto& blend_case : threeparam_blender_cases()) { | ||
| for (uint32_t test_seed : seeds) { | ||
| SCOPED_TRACE(testing::Message() << "N=" << N << ", seed=" << test_seed | ||
| << ", blend_type=" << blend_case.name); | ||
|
|
||
| std::mt19937 rng(test_seed); | ||
| std::uniform_real_distribution<float> tdist(0.0F, 10.0F); | ||
| std::uniform_real_distribution<float> ddist(0.0F, 10.0F); | ||
|
|
||
| std::vector<float> t_vec(N), d_vec(N); | ||
| for (int i = 0; i < N; ++i) { | ||
| t_vec[i] = tdist(rng); | ||
| d_vec[i] = ddist(rng); | ||
| } | ||
|
|
||
| ThreeParameterBlender blender{blend_case.beta1, blend_case.beta2, blend_case.eta}; | ||
|
|
||
| std::vector<float> actual = gpu_blend(t_vec, d_vec, blender); | ||
|
|
||
| ASSERT_EQ(actual.size(), static_cast<size_t>(N)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would be good to use
def_rofor immutable types.