-
Notifications
You must be signed in to change notification settings - Fork 0
MET-34 Blender #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MET-34 Blender #18
Changes from all commits
29506d2
bd16d63
75ddddb
e7ed647
abf7826
1e1af4c
1b8ca71
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,27 @@ | ||
| #pragma once | ||
|
|
||
| #include "fmb.h" | ||
| #include "geometry.h" | ||
| #include <cmath> | ||
| #include <cuda_runtime.h> | ||
|
|
||
| #include "utils.cuh" | ||
|
|
||
| struct FourParameterBlender { | ||
| float beta1; | ||
| float beta2; | ||
| float beta3; | ||
| float eta; | ||
|
|
||
| CUDA_CALLABLE __forceinline__ float blend(float t, float d) const { | ||
| return expf((beta1 * d * sigmoid((beta3 / eta) * t)) - ((beta2 / eta) * t)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TIL |
||
| } | ||
| }; | ||
|
|
||
| struct ThreeParameterBlender { | ||
| float beta1; | ||
| float beta2; | ||
| float eta; | ||
|
|
||
| CUDA_CALLABLE __forceinline__ // TODO inline? | ||
| float | ||
| blend(float t, float d, const FMB& fmb, const Ray& ray) const; | ||
| CUDA_CALLABLE __forceinline__ float blend(float t, float d) const { | ||
| return expf((beta1 * d) - ((beta2 / eta) * t)); | ||
| } | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,8 @@ | ||
| from genmetaballs._genmetaballs_bindings import geometry | ||
| from genmetaballs._genmetaballs_bindings.blender import ( | ||
| FourParameterBlender, | ||
| ThreeParameterBlender, | ||
| ) | ||
| from genmetaballs._genmetaballs_bindings.confidence import ( | ||
| TwoParameterConfidence, | ||
| ZeroParameterConfidence, | ||
|
|
@@ -27,4 +31,6 @@ def array2d_float(data, device) -> CPUFloatArray2D | GPUFloatArray2D: | |
| "TwoParameterConfidence", | ||
| "geometry", | ||
| "sigmoid", | ||
| "FourParameterBlender", | ||
| "ThreeParameterBlender", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of directly exposing the confidence methods, let's have a |
||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| #include <algorithm> | ||
| #include <cmath> | ||
| #include <cstdint> | ||
| #include <cuda_runtime.h> | ||
| #include <gtest/gtest.h> | ||
| #include <limits> | ||
| #include <random> | ||
| #include <vector> | ||
|
|
||
| #include "core/blender.cuh" | ||
| #include "core/utils.cuh" | ||
|
|
||
| template <typename Blender> | ||
| __global__ void blender_kernel(const float* t, const float* d, float* blended, uint32_t n, | ||
| Blender blender) { | ||
| uint32_t i = threadIdx.x + (blockIdx.x * blockDim.x); | ||
| if (i < n) { | ||
| blended[i] = blender.blend(t[i], d[i]); | ||
| } | ||
| } | ||
|
|
||
| constexpr uint32_t GRID_DIM = 256; | ||
| constexpr uint32_t BLOCK_DIM = 1024; | ||
|
|
||
| template <typename Blender> | ||
| std::vector<float> gpu_blend(const std::vector<float>& t_vec, const std::vector<float>& d_vec, | ||
| Blender blender) { | ||
| auto n = static_cast<uint32_t>(t_vec.size()); | ||
| auto nbytes = n * sizeof(float); | ||
| float *d_t = nullptr, *d_d = nullptr, *d_blended = nullptr; | ||
| std::vector<float> result(n); | ||
|
|
||
| CUDA_CHECK(cudaMalloc(&d_t, nbytes)); | ||
| CUDA_CHECK(cudaMalloc(&d_d, nbytes)); | ||
| CUDA_CHECK(cudaMalloc(&d_blended, nbytes)); | ||
| CUDA_CHECK(cudaMemcpy(d_t, t_vec.data(), nbytes, cudaMemcpyHostToDevice)); | ||
| CUDA_CHECK(cudaMemcpy(d_d, d_vec.data(), nbytes, cudaMemcpyHostToDevice)); | ||
|
Comment on lines
+31
to
+37
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For readability: you might want to look into some of thrust's utilities (e.g. |
||
|
|
||
| auto block_dim = BLOCK_DIM; | ||
| auto grid_dim = (n + block_dim - 1) / block_dim; | ||
| if (grid_dim > GRID_DIM) | ||
| grid_dim = GRID_DIM; | ||
|
|
||
| blender_kernel<Blender><<<grid_dim, block_dim>>>(d_t, d_d, d_blended, n, blender); | ||
|
|
||
| CUDA_CHECK(cudaMemcpy(result.data(), d_blended, nbytes, cudaMemcpyDeviceToHost)); | ||
| CUDA_CHECK(cudaFree(d_t)); | ||
| CUDA_CHECK(cudaFree(d_d)); | ||
| CUDA_CHECK(cudaFree(d_blended)); | ||
| return result; | ||
| } | ||
|
|
||
| constexpr int NUM_RNG_SEEDS_PER_TEST = 5; | ||
| constexpr int NUM_N_VALUES_PER_TEST = 5; | ||
| constexpr uint32_t MASTER_SEED = 42; | ||
|
|
||
| static std::vector<int> blender_test_sizes() { | ||
| std::vector<int> sizes; | ||
| for (int k = 0; k < NUM_N_VALUES_PER_TEST; ++k) | ||
| sizes.push_back(1 << (4 + k)); // 2^(4+k): [16, 32, 64, 128, 256] | ||
| return sizes; | ||
| } | ||
|
|
||
| struct BlenderCase { | ||
| float beta1, beta2, beta3, eta; | ||
| const char* name; | ||
| }; | ||
|
|
||
| static std::vector<BlenderCase> blender_cases() { | ||
| return { | ||
| {1.0F, 0.5F, 0.2F, 2.0F, "case1"}, | ||
| {-2.0F, 1.0F, -1.0F, 1.5F, "case2"}, | ||
| {0.0F, 0.0F, 1.0F, 1.0F, "case3"}, | ||
| {0.5F, -0.5F, 0.8F, 0.5F, "case4"}, | ||
| }; | ||
| } | ||
|
|
||
| // Smoke test for FourParameterBlender | ||
| TEST(GpuBlenderTest, Blender_GPU_Smoke_FourParameter) { | ||
| auto sizes = blender_test_sizes(); | ||
| std::mt19937 master_gen(MASTER_SEED); | ||
| std::uniform_int_distribution<uint32_t> seed_dist(0, std::numeric_limits<uint32_t>::max()); | ||
| std::vector<uint32_t> seeds(NUM_RNG_SEEDS_PER_TEST); | ||
| for (auto& s : seeds) | ||
| s = seed_dist(master_gen); | ||
|
|
||
| for (int size_idx = 0; size_idx < static_cast<int>(sizes.size()); ++size_idx) { | ||
| int N = sizes[size_idx]; | ||
|
|
||
| for (const auto& blend_case : blender_cases()) { | ||
| for (uint32_t test_seed : seeds) { | ||
| SCOPED_TRACE(testing::Message() << "N=" << N << ", seed=" << test_seed | ||
| << ", blend_type=" << blend_case.name); | ||
|
|
||
| std::mt19937 rng(test_seed); | ||
| std::uniform_real_distribution<float> tdist(0.0F, 10.0F); | ||
| std::uniform_real_distribution<float> ddist(0.0F, 10.0F); | ||
|
|
||
| std::vector<float> t_vec(N), d_vec(N); | ||
| for (int i = 0; i < N; ++i) { | ||
| t_vec[i] = tdist(rng); | ||
| d_vec[i] = ddist(rng); | ||
| } | ||
|
|
||
| FourParameterBlender blender{blend_case.beta1, blend_case.beta2, blend_case.beta3, | ||
| blend_case.eta}; | ||
|
|
||
| std::vector<float> actual = gpu_blend(t_vec, d_vec, blender); | ||
|
|
||
| ASSERT_EQ(actual.size(), static_cast<size_t>(N)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Smoke test for ThreeParameterBlender | ||
| struct ThreeParamBlenderCase { | ||
| float beta1, beta2, eta; | ||
| const char* name; | ||
| }; | ||
|
|
||
| static std::vector<ThreeParamBlenderCase> threeparam_blender_cases() { | ||
| return { | ||
| {1.0F, 0.5F, 2.0F, "three_case1"}, | ||
| {-2.0F, 1.0F, 1.5F, "three_case2"}, | ||
| {0.0F, 0.0F, 1.0F, "three_case3"}, | ||
| {0.5F, -0.5F, 0.5F, "three_case4"}, | ||
| }; | ||
| } | ||
|
|
||
| TEST(GpuBlenderTest, Blender_GPU_Smoke_ThreeParameter) { | ||
| auto sizes = blender_test_sizes(); | ||
| std::mt19937 master_gen(MASTER_SEED); | ||
| std::uniform_int_distribution<uint32_t> seed_dist(0, std::numeric_limits<uint32_t>::max()); | ||
| std::vector<uint32_t> seeds(NUM_RNG_SEEDS_PER_TEST); | ||
| for (auto& s : seeds) | ||
| s = seed_dist(master_gen); | ||
|
|
||
| for (int size_idx = 0; size_idx < static_cast<int>(sizes.size()); ++size_idx) { | ||
| int N = sizes[size_idx]; | ||
| for (const auto& blend_case : threeparam_blender_cases()) { | ||
| for (uint32_t test_seed : seeds) { | ||
| SCOPED_TRACE(testing::Message() << "N=" << N << ", seed=" << test_seed | ||
| << ", blend_type=" << blend_case.name); | ||
|
|
||
| std::mt19937 rng(test_seed); | ||
| std::uniform_real_distribution<float> tdist(0.0F, 10.0F); | ||
| std::uniform_real_distribution<float> ddist(0.0F, 10.0F); | ||
|
|
||
| std::vector<float> t_vec(N), d_vec(N); | ||
| for (int i = 0; i < N; ++i) { | ||
| t_vec[i] = tdist(rng); | ||
| d_vec[i] = ddist(rng); | ||
| } | ||
|
|
||
| ThreeParameterBlender blender{blend_case.beta1, blend_case.beta2, blend_case.eta}; | ||
|
|
||
| std::vector<float> actual = gpu_blend(t_vec, d_vec, blender); | ||
|
|
||
| ASSERT_EQ(actual.size(), static_cast<size_t>(N)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd be nice to have definition of the methods in a
blender.cufile :)