From 795a1b9c0810567f59f265abe940381037aff800 Mon Sep 17 00:00:00 2001 From: Matin Ghavami Date: Tue, 11 Nov 2025 16:19:29 -0500 Subject: [PATCH 1/5] add forward skeleton --- genmetaballs/src/cuda/core/forward.cu | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 genmetaballs/src/cuda/core/forward.cu diff --git a/genmetaballs/src/cuda/core/forward.cu b/genmetaballs/src/cuda/core/forward.cu new file mode 100644 index 0000000..c5289a8 --- /dev/null +++ b/genmetaballs/src/cuda/core/forward.cu @@ -0,0 +1,62 @@ +#include + +#include + +constexpr NUM_BLOCKS dim3(10); //XXX madeup +constexpr THREADS_PER_BLOCK dim3(10); + +namespace FMB { + +__device__ __host__ +std::vector> +get_pixel_coords_and_rays(const dim3 thread_idx, const dim3 block_idx) +{ + std::vector> res; + + uint32_t i_beg = 0; // XXX TODO + uint32_t i_end = 0; // XXX TODO + + for(int i = i_beg; i < i_end; i+=blockDim.x) { + //... + } + + return res; +} + +template +__global__ +render_kernel( + const typename Getter::Getter &fmb_getter, + const Intrinsics &intr, + const Pose &extr, + Image *img +) { + // TODO how to find the relevant chunk of computation from threadIdx, + // blockIdx, etc + auto pixel_coords_and_rays = get_pixel_coords_and_rays(threadIdx, blockIdx, ...); + + for(const auto &[pixel_coords, ray]: pixel_coords_and_rays) { + float w0 = 0.0f, tf = 0.0f, confidence = 0.0f; + for(const auto &fmb: fmb_getter.get_metaballs(ray)) { + t = Intersector::intersect(fmb, ray); + w = Blender::blend(t, fmb, ray); + confidence = Confidence::update(confidence, t, w); + tf += t; + w0 += w; + } + img.confidence.at(pixel_coords) = confidence; + img.depth.at(pixel_coords) = tf / w0; + } +} + +template +void +render_fmbs(const FMBs &fmbs, const Intrinsics &intr, const Pose &extr) +{ + // initialize the fmb_getter + typename Getter::Getter fmb_getter(fmbs, intr, extr); + auto kernel = render_kernel; + kernel<<>>(fmb_getter, fmbs, intr, extr); +} + +}; From 66e10b255e36aba90ff0cd5041fa2ec68054d6a7 Mon Sep 17 00:00:00 2001 From: Matin Ghavami Date: Sun, 16 Nov 2025 18:03:26 -0500 Subject: [PATCH 2/5] wip scaffolding --- genmetaballs/src/cuda/core/blender.cuh | 14 ++++++++ genmetaballs/src/cuda/core/camera.cuh | 14 ++++++++ genmetaballs/src/cuda/core/confidence.cuh | 12 +++++++ genmetaballs/src/cuda/core/fmb.cuh | 23 +++++++++++++ genmetaballs/src/cuda/core/forward.cu | 27 +++++++++------ genmetaballs/src/cuda/core/geometry.cuh | 33 ++++++++++++++++++ genmetaballs/src/cuda/core/getter.cuh | 0 genmetaballs/src/cuda/core/image.cuh | 12 +++++++ genmetaballs/src/cuda/core/intersector.cuh | 14 ++++++++ genmetaballs/src/cuda/core/utils.cu | 2 +- genmetaballs/src/cuda/core/utils.cuh | 40 ++++++++++++++++++++++ genmetaballs/src/cuda/core/utils.h | 10 ------ 12 files changed, 179 insertions(+), 22 deletions(-) create mode 100644 genmetaballs/src/cuda/core/blender.cuh create mode 100644 genmetaballs/src/cuda/core/camera.cuh create mode 100644 genmetaballs/src/cuda/core/confidence.cuh create mode 100644 genmetaballs/src/cuda/core/fmb.cuh create mode 100644 genmetaballs/src/cuda/core/geometry.cuh create mode 100644 genmetaballs/src/cuda/core/getter.cuh create mode 100644 genmetaballs/src/cuda/core/image.cuh create mode 100644 genmetaballs/src/cuda/core/intersector.cuh create mode 100644 genmetaballs/src/cuda/core/utils.cuh delete mode 100644 genmetaballs/src/cuda/core/utils.h diff --git a/genmetaballs/src/cuda/core/blender.cuh b/genmetaballs/src/cuda/core/blender.cuh new file mode 100644 index 0000000..2f56efd --- /dev/null +++ b/genmetaballs/src/cuda/core/blender.cuh @@ -0,0 +1,14 @@ +#pragma once + +#include "fmb.h" +#include "geometry.h" + + +struct ThreeParameterBlender { + float beta1; + float beta2; + float eta; + + __host__ __device__ __forceinline__ // TODO inline? + float blend(float t, float d, const FMB &fmb, const Ray &ray) const; +}; diff --git a/genmetaballs/src/cuda/core/camera.cuh b/genmetaballs/src/cuda/core/camera.cuh new file mode 100644 index 0000000..4b2d2b2 --- /dev/null +++ b/genmetaballs/src/cuda/core/camera.cuh @@ -0,0 +1,14 @@ +#pragma once + +#include + +struct Intrinsics { + uint32_t height; + uint32_t width; + float fx; + float fy; + float cx; + float cy; + float near; + float far; +}; diff --git a/genmetaballs/src/cuda/core/confidence.cuh b/genmetaballs/src/cuda/core/confidence.cuh new file mode 100644 index 0000000..a56d278 --- /dev/null +++ b/genmetaballs/src/cuda/core/confidence.cuh @@ -0,0 +1,12 @@ +#pragma once + +#include + + +struct TwoParameterConfidence { + float beta4; + float beta5; + + __host__ __device__ __forceinline__ + float get_confidence(float sumexpd) { return 0; } // TODO +}; diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh new file mode 100644 index 0000000..a6e6aa1 --- /dev/null +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -0,0 +1,23 @@ +#pragma once + +#include "geometry.cuh" + +struct FMB { + Pose pose; // mean + orientation + float3 extent; +}; + +template +class FMBs { +private: + containter_template fmbs_; + containter_template log_weights_; + +public: + FMBs(uint32_t size): + fmbs_(size), + log_weights_(size), + { + // TODO: set all log_weights_ to 0 + } +}; diff --git a/genmetaballs/src/cuda/core/forward.cu b/genmetaballs/src/cuda/core/forward.cu index c5289a8..79606f6 100644 --- a/genmetaballs/src/cuda/core/forward.cu +++ b/genmetaballs/src/cuda/core/forward.cu @@ -23,29 +23,34 @@ get_pixel_coords_and_rays(const dim3 thread_idx, const dim3 block_idx) return res; } + template __global__ render_kernel( - const typename Getter::Getter &fmb_getter, - const Intrinsics &intr, - const Pose &extr, + const Getter fmb_getter, + const Blender blender, + Confidence const *confidence, + Intrinsics const *intr, + Pose const *extr, Image *img ) { // TODO how to find the relevant chunk of computation from threadIdx, // blockIdx, etc - auto pixel_coords_and_rays = get_pixel_coords_and_rays(threadIdx, blockIdx, ...); + auto pixel_coords_and_rays = get_pixel_coords_and_rays( + threadIdx, blockIdx, blockDim, gridDim, intr, extr + ); for(const auto &[pixel_coords, ray]: pixel_coords_and_rays) { - float w0 = 0.0f, tf = 0.0f, confidence = 0.0f; - for(const auto &fmb: fmb_getter.get_metaballs(ray)) { - t = Intersector::intersect(fmb, ray); - w = Blender::blend(t, fmb, ray); - confidence = Confidence::update(confidence, t, w); + float w0 = 0.0f, tf = 0.0f, sumexpd = 0.0f; + for(const auto &fmb: fmb_getter->get_metaballs(ray)) { + const auto &[t, d] = Intersector::intersect(fmb, ray); + w = blender->blend(t, d, fmb, ray); + sumexpd += exp(d); tf += t; w0 += w; } - img.confidence.at(pixel_coords) = confidence; - img.depth.at(pixel_coords) = tf / w0; + img->confidence.at(pixel_coords) = confidence->get_confidence(sumexpd); + img->depth.at(pixel_coords) = tf / w0; } } diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh new file mode 100644 index 0000000..b5914e1 --- /dev/null +++ b/genmetaballs/src/cuda/core/geometry.cuh @@ -0,0 +1,33 @@ +#pragma once + +#include + + +typedef Vec3D = float3; + + +class Rotation { + + private: + // ... + float rotmat_[9]; + + public: + Vec3D apply(const Vec3D vec) const; + Rotation compose(const Rotation &rot) const; + Rotation inv() const; +}; + +struct Pose { + Rotation rot; + Vec3D tran; + + Vec3D apply(const Vec3D vec) const; + Pose compose(const Pose &pose) const; + Pose inv() const; +}; + +struct Ray { + Vec3D start; + Vec3D direction; +}; diff --git a/genmetaballs/src/cuda/core/getter.cuh b/genmetaballs/src/cuda/core/getter.cuh new file mode 100644 index 0000000..e69de29 diff --git a/genmetaballs/src/cuda/core/image.cuh b/genmetaballs/src/cuda/core/image.cuh new file mode 100644 index 0000000..f2eb1b0 --- /dev/null +++ b/genmetaballs/src/cuda/core/image.cuh @@ -0,0 +1,12 @@ +#pragma once + +#include + +#include "utils.cuh" +#include "geometry.cuh" + +template +struct Image { + Array2D confidence; + Array2D depth; +}; diff --git a/genmetaballs/src/cuda/core/intersector.cuh b/genmetaballs/src/cuda/core/intersector.cuh new file mode 100644 index 0000000..5dea2dc --- /dev/null +++ b/genmetaballs/src/cuda/core/intersector.cuh @@ -0,0 +1,14 @@ +#pragma once + +#include + +#include "fmb.h" +#include "geometry.h" + +// implement equation (6) in the paper +class LinearIntersector { + + static __device__ __host__ + std::pair + intersect(const FMB &fmb, const Ray &ray) const; +}; diff --git a/genmetaballs/src/cuda/core/utils.cu b/genmetaballs/src/cuda/core/utils.cu index 5643c32..7c91259 100644 --- a/genmetaballs/src/cuda/core/utils.cu +++ b/genmetaballs/src/cuda/core/utils.cu @@ -2,7 +2,7 @@ #include #include -#include "utils.h" +#include "utils.cuh" void cuda_check(cudaError_t code, const char *file, int line) { if (code != cudaSuccess) { diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh new file mode 100644 index 0000000..5fea960 --- /dev/null +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -0,0 +1,40 @@ +#pragma once + +#include + +#define CUDA_CHECK(x) \ + do { \ + cuda_check((x), __FILE__, __LINE__); \ + } while (0) + + +void cuda_check(cudaError_t code, const char *file, int line); + +// XXX container_t should be a thrust container type +template +class Array2D { +private: + //XXX TODO: make sure this works + container_t data_; + +public: + + __host__ __device__ __forceinline__ + T &at(const uint32_t i, const uint32_t j) + { + return data_[i * width + j]; + } + + __host__ __device__ __forceinline__ + const T &at(const uint32_t i, const uint32_t j) const + { + return data_[i * width + j]; + } + + __host__ __device__ + constexpr uint32_t size() const + { + return width * height; + } +}; + diff --git a/genmetaballs/src/cuda/core/utils.h b/genmetaballs/src/cuda/core/utils.h deleted file mode 100644 index 2a22197..0000000 --- a/genmetaballs/src/cuda/core/utils.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include - -#define CUDA_CHECK(x) \ - do { \ - cuda_check((x), __FILE__, __LINE__); \ - } while (0) - -void cuda_check(cudaError_t code, const char *file, int line); From 8822bfe10b0f799cfdaef49b7fabe9c997020bdf Mon Sep 17 00:00:00 2001 From: mugamma Date: Sun, 16 Nov 2025 23:33:47 +0000 Subject: [PATCH 3/5] add correct binding which compiles --- CMakeLists.txt | 4 +++- genmetaballs/src/cuda/bindings.cu | 14 ++++++++++++++ genmetaballs/src/cuda/core/add.cuh | 2 +- genmetaballs/src/cuda/core/geometry.cu | 6 ++++++ genmetaballs/src/cuda/core/geometry.cuh | 4 +++- genmetaballs/src/cuda/core/utils.cuh | 14 +++++++++----- tests/test_geometry.py | 13 +++++++++++++ 7 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 genmetaballs/src/cuda/core/geometry.cu create mode 100644 tests/test_geometry.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 3520ea8..6813b2d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,10 @@ set(CMAKE_CXX_STANDARD 20) add_library(genmetaballs_core genmetaballs/src/cuda/core/utils.cu - genmetaballs/src/cuda/core/utils.h + genmetaballs/src/cuda/core/utils.cuh genmetaballs/src/cuda/core/add.cuh + genmetaballs/src/cuda/core/geometry.cuh + genmetaballs/src/cuda/core/geometry.cu ) # Set include directories for the core library diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index f32e6a6..a694bbb 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -4,6 +4,7 @@ #include #include "core/add.cuh" +#include "core/geometry.cuh" constexpr uint32_t GRID_DIM = 4096; constexpr uint32_t BLOCK_DIM = 1024; @@ -17,4 +18,17 @@ NB_MODULE(_genmetaballs_bindings, m) { "Add two lists elementwise on the GPU", nb::arg("a"), nb::arg("b") ); + + nb::class_(m, "Vec3D") + .def(nb::init<>()) + .def(nb::init()) + .def_rw("x", &Vec3D::x) + .def_rw("y", &Vec3D::y) + .def_rw("z", &Vec3D::z) + .def("__add__", &operator+) + .def("__repr__", [](const Vec3D &v) { + return "Vec3D(" + std::to_string(v.x) + ", " + + std::to_string(v.y) + ", " + std::to_string(v.z) + ")"; + }); + } diff --git a/genmetaballs/src/cuda/core/add.cuh b/genmetaballs/src/cuda/core/add.cuh index 20fc703..bc8a09b 100644 --- a/genmetaballs/src/cuda/core/add.cuh +++ b/genmetaballs/src/cuda/core/add.cuh @@ -2,7 +2,7 @@ #include #include -#include "utils.h" +#include "utils.cuh" __global__ void add_kernel( float const *a, diff --git a/genmetaballs/src/cuda/core/geometry.cu b/genmetaballs/src/cuda/core/geometry.cu new file mode 100644 index 0000000..53b0161 --- /dev/null +++ b/genmetaballs/src/cuda/core/geometry.cu @@ -0,0 +1,6 @@ +#include "geometry.cuh" + +Vec3D operator+(const Vec3D a, const Vec3D b) +{ + return {1.0f, 2.0f, 3.0f}; +} diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh index b5914e1..0e25185 100644 --- a/genmetaballs/src/cuda/core/geometry.cuh +++ b/genmetaballs/src/cuda/core/geometry.cuh @@ -3,7 +3,9 @@ #include -typedef Vec3D = float3; +using Vec3D = float3; + +Vec3D operator+(const Vec3D a, const Vec3D b); class Rotation { diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh index 5fea960..04fc60f 100644 --- a/genmetaballs/src/cuda/core/utils.cuh +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -1,5 +1,6 @@ #pragma once +#include #include #define CUDA_CHECK(x) \ @@ -20,21 +21,24 @@ private: public: __host__ __device__ __forceinline__ - T &at(const uint32_t i, const uint32_t j) + container_t &at(const uint32_t i, const uint32_t j) { - return data_[i * width + j]; + return data_; + //return data_[i * width + j]; } __host__ __device__ __forceinline__ - const T &at(const uint32_t i, const uint32_t j) const + const container_t &at(const uint32_t i, const uint32_t j) const { - return data_[i * width + j]; + return data_; + //return data_[i * width + j]; } __host__ __device__ constexpr uint32_t size() const { - return width * height; + return 0; + //return width * height; } }; diff --git a/tests/test_geometry.py b/tests/test_geometry.py new file mode 100644 index 0000000..75820cd --- /dev/null +++ b/tests/test_geometry.py @@ -0,0 +1,13 @@ +import numpy as np +import pytest + +from genmetaballs import _genmetaballs_bindings as _gmbb + + +@pytest.fixture +def rng() -> np.random.Generator: + return np.random.default_rng(0) + + +def test_vec3d(rng: np.random.Generator) -> None: + _gmbb.Vec3D(0, 0, 0) From ee36a7ee983fd267c3296eb21304c245d7cda3dc Mon Sep 17 00:00:00 2001 From: mugamma Date: Sun, 16 Nov 2025 23:40:58 +0000 Subject: [PATCH 4/5] vector addition works --- genmetaballs/src/cuda/core/geometry.cu | 2 +- tests/test_geometry.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/genmetaballs/src/cuda/core/geometry.cu b/genmetaballs/src/cuda/core/geometry.cu index 53b0161..147a8cd 100644 --- a/genmetaballs/src/cuda/core/geometry.cu +++ b/genmetaballs/src/cuda/core/geometry.cu @@ -2,5 +2,5 @@ Vec3D operator+(const Vec3D a, const Vec3D b) { - return {1.0f, 2.0f, 3.0f}; + return {a.x + b.x, a.y + b.y, a.z + b.z}; } diff --git a/tests/test_geometry.py b/tests/test_geometry.py index 75820cd..00a64c3 100644 --- a/tests/test_geometry.py +++ b/tests/test_geometry.py @@ -9,5 +9,14 @@ def rng() -> np.random.Generator: return np.random.default_rng(0) -def test_vec3d(rng: np.random.Generator) -> None: +def test_vec3d_smoke() -> None: _gmbb.Vec3D(0, 0, 0) + +def test_vec3d_smoke(rng: np.random.Generator) -> None: + _a, _b = rng.uniform(size=3), rng.uniform(size=3) + a, b = _gmbb.Vec3D(*_a), _gmbb.Vec3D(*_b) + c = a + b + _c = _a + _b + assert all( + [np.isclose(c.x, _c[0]), np.isclose(c.y, _c[1]), np.isclose(c.z, _c[2])] + ) From 1ce26bd5580f1bdee80d18ef724c34654051a9fc Mon Sep 17 00:00:00 2001 From: mugamma Date: Sun, 16 Nov 2025 23:54:19 +0000 Subject: [PATCH 5/5] vec3d subtraction --- genmetaballs/src/cuda/bindings.cu | 15 +++--- genmetaballs/src/cuda/core/blender.cuh | 4 +- genmetaballs/src/cuda/core/confidence.cuh | 6 +-- genmetaballs/src/cuda/core/fmb.cuh | 7 +-- genmetaballs/src/cuda/core/forward.cu | 56 +++++++++------------- genmetaballs/src/cuda/core/geometry.cu | 7 ++- genmetaballs/src/cuda/core/geometry.cuh | 19 ++++---- genmetaballs/src/cuda/core/image.cuh | 4 +- genmetaballs/src/cuda/core/intersector.cuh | 5 +- genmetaballs/src/cuda/core/utils.cuh | 34 +++++-------- tests/test_geometry.py | 15 ++++-- 11 files changed, 77 insertions(+), 95 deletions(-) diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index 0fafe2c..940bc02 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -11,12 +11,8 @@ constexpr uint32_t BLOCK_DIM = 1024; namespace nb = nanobind; NB_MODULE(_genmetaballs_bindings, m) { - m.def( - "gpu_add", - &gpu_add, - "Add two lists elementwise on the GPU", - nb::arg("a"), nb::arg("b") - ); + m.def("gpu_add", &gpu_add, "Add two lists elementwise on the GPU", + nb::arg("a"), nb::arg("b")); nb::class_(m, "Vec3D") .def(nb::init<>()) @@ -25,8 +21,9 @@ NB_MODULE(_genmetaballs_bindings, m) { .def_rw("y", &Vec3D::y) .def_rw("z", &Vec3D::z) .def("__add__", &operator+) - .def("__repr__", [](const Vec3D &v) { - return "Vec3D(" + std::to_string(v.x) + ", " + - std::to_string(v.y) + ", " + std::to_string(v.z) + ")"; + .def("__sub__", &operator-) + .def("__repr__", [](const Vec3D& v) { + return "Vec3D(" + std::to_string(v.x) + ", " + std::to_string(v.y) + ", " + + std::to_string(v.z) + ")"; }); } diff --git a/genmetaballs/src/cuda/core/blender.cuh b/genmetaballs/src/cuda/core/blender.cuh index 2f56efd..8c4e21f 100644 --- a/genmetaballs/src/cuda/core/blender.cuh +++ b/genmetaballs/src/cuda/core/blender.cuh @@ -3,12 +3,12 @@ #include "fmb.h" #include "geometry.h" - struct ThreeParameterBlender { float beta1; float beta2; float eta; __host__ __device__ __forceinline__ // TODO inline? - float blend(float t, float d, const FMB &fmb, const Ray &ray) const; + float + blend(float t, float d, const FMB& fmb, const Ray& ray) const; }; diff --git a/genmetaballs/src/cuda/core/confidence.cuh b/genmetaballs/src/cuda/core/confidence.cuh index a56d278..b79c3ad 100644 --- a/genmetaballs/src/cuda/core/confidence.cuh +++ b/genmetaballs/src/cuda/core/confidence.cuh @@ -2,11 +2,11 @@ #include - struct TwoParameterConfidence { float beta4; float beta5; - __host__ __device__ __forceinline__ - float get_confidence(float sumexpd) { return 0; } // TODO + __host__ __device__ __forceinline__ float get_confidence(float sumexpd) { + return 0; + } // TODO }; diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index a6e6aa1..14b09f0 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -7,17 +7,14 @@ struct FMB { float3 extent; }; -template +template class FMBs { private: containter_template fmbs_; containter_template log_weights_; public: - FMBs(uint32_t size): - fmbs_(size), - log_weights_(size), - { + FMBs(uint32_t size) : fmbs_(size), log_weights_(size), { // TODO: set all log_weights_ to 0 } }; diff --git a/genmetaballs/src/cuda/core/forward.cu b/genmetaballs/src/cuda/core/forward.cu index 79606f6..00b8b44 100644 --- a/genmetaballs/src/cuda/core/forward.cu +++ b/genmetaballs/src/cuda/core/forward.cu @@ -1,67 +1,55 @@ #include - #include -constexpr NUM_BLOCKS dim3(10); //XXX madeup +constexpr NUM_BLOCKS dim3(10); // XXX madeup constexpr THREADS_PER_BLOCK dim3(10); namespace FMB { -__device__ __host__ -std::vector> -get_pixel_coords_and_rays(const dim3 thread_idx, const dim3 block_idx) -{ +__device__ __host__ std::vector> +get_pixel_coords_and_rays( + const dim3 thread_idx, const dim3 block_idx) { std::vector> res; uint32_t i_beg = 0; // XXX TODO uint32_t i_end = 0; // XXX TODO - for(int i = i_beg; i < i_end; i+=blockDim.x) { + for (int i = i_beg; i < i_end; i += blockDim.x) { //... } return res; } - -template -__global__ -render_kernel( - const Getter fmb_getter, - const Blender blender, - Confidence const *confidence, - Intrinsics const *intr, - Pose const *extr, - Image *img -) { +template +__global__ render_kernel(const Getter fmb_getter, const Blender blender, + Confidence const* confidence, Intrinsics const* intr, Pose const* extr, + Image* img) { // TODO how to find the relevant chunk of computation from threadIdx, // blockIdx, etc - auto pixel_coords_and_rays = get_pixel_coords_and_rays( - threadIdx, blockIdx, blockDim, gridDim, intr, extr - ); + auto pixel_coords_and_rays = + get_pixel_coords_and_rays(threadIdx, blockIdx, blockDim, gridDim, intr, extr); - for(const auto &[pixel_coords, ray]: pixel_coords_and_rays) { + for (const auto& [pixel_coords, ray] : pixel_coords_and_rays) { float w0 = 0.0f, tf = 0.0f, sumexpd = 0.0f; - for(const auto &fmb: fmb_getter->get_metaballs(ray)) { - const auto &[t, d] = Intersector::intersect(fmb, ray); - w = blender->blend(t, d, fmb, ray); - sumexpd += exp(d); - tf += t; - w0 += w; + for (const auto& fmb : fmb_getter->get_metaballs(ray)) { + const auto& [t, d] = Intersector::intersect(fmb, ray); + w = blender->blend(t, d, fmb, ray); + sumexpd += exp(d); + tf += t; + w0 += w; } img->confidence.at(pixel_coords) = confidence->get_confidence(sumexpd); img->depth.at(pixel_coords) = tf / w0; } } -template -void -render_fmbs(const FMBs &fmbs, const Intrinsics &intr, const Pose &extr) -{ - // initialize the fmb_getter +template +void render_fmbs(const FMBs& fmbs, const Intrinsics& intr, const Pose& extr) { + // initialize the fmb_getter typename Getter::Getter fmb_getter(fmbs, intr, extr); auto kernel = render_kernel; kernel<<>>(fmb_getter, fmbs, intr, extr); } -}; +}; // namespace FMB diff --git a/genmetaballs/src/cuda/core/geometry.cu b/genmetaballs/src/cuda/core/geometry.cu index 147a8cd..9e5c886 100644 --- a/genmetaballs/src/cuda/core/geometry.cu +++ b/genmetaballs/src/cuda/core/geometry.cu @@ -1,6 +1,9 @@ #include "geometry.cuh" -Vec3D operator+(const Vec3D a, const Vec3D b) -{ +Vec3D operator+(const Vec3D a, const Vec3D b) { return {a.x + b.x, a.y + b.y, a.z + b.z}; } + +Vec3D operator-(const Vec3D a, const Vec3D b) { + return {a.x - b.x, a.y - b.y, a.z - b.z}; +} diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh index 0e25185..e1f6334 100644 --- a/genmetaballs/src/cuda/core/geometry.cuh +++ b/genmetaballs/src/cuda/core/geometry.cuh @@ -2,22 +2,21 @@ #include - using Vec3D = float3; Vec3D operator+(const Vec3D a, const Vec3D b); - +Vec3D operator-(const Vec3D a, const Vec3D b); class Rotation { - private: - // ... - float rotmat_[9]; +private: + // ... + float rotmat_[9]; - public: - Vec3D apply(const Vec3D vec) const; - Rotation compose(const Rotation &rot) const; - Rotation inv() const; +public: + Vec3D apply(const Vec3D vec) const; + Rotation compose(const Rotation& rot) const; + Rotation inv() const; }; struct Pose { @@ -25,7 +24,7 @@ struct Pose { Vec3D tran; Vec3D apply(const Vec3D vec) const; - Pose compose(const Pose &pose) const; + Pose compose(const Pose& pose) const; Pose inv() const; }; diff --git a/genmetaballs/src/cuda/core/image.cuh b/genmetaballs/src/cuda/core/image.cuh index f2eb1b0..d8ac7f5 100644 --- a/genmetaballs/src/cuda/core/image.cuh +++ b/genmetaballs/src/cuda/core/image.cuh @@ -2,10 +2,10 @@ #include -#include "utils.cuh" #include "geometry.cuh" +#include "utils.cuh" -template +template struct Image { Array2D confidence; Array2D depth; diff --git a/genmetaballs/src/cuda/core/intersector.cuh b/genmetaballs/src/cuda/core/intersector.cuh index 5dea2dc..acfb3a1 100644 --- a/genmetaballs/src/cuda/core/intersector.cuh +++ b/genmetaballs/src/cuda/core/intersector.cuh @@ -8,7 +8,6 @@ // implement equation (6) in the paper class LinearIntersector { - static __device__ __host__ - std::pair - intersect(const FMB &fmb, const Ray &ray) const; + static __device__ __host__ std::pair intersect(const FMB& fmb, + const Ray& ray) const; }; diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh index 04fc60f..cf114ae 100644 --- a/genmetaballs/src/cuda/core/utils.cuh +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -3,42 +3,34 @@ #include #include -#define CUDA_CHECK(x) \ - do { \ - cuda_check((x), __FILE__, __LINE__); \ +#define CUDA_CHECK(x) \ + do { \ + cuda_check((x), __FILE__, __LINE__); \ } while (0) - -void cuda_check(cudaError_t code, const char *file, int line); +void cuda_check(cudaError_t code, const char* file, int line); // XXX container_t should be a thrust container type -template +template class Array2D { private: - //XXX TODO: make sure this works + // XXX TODO: make sure this works container_t data_; public: - - __host__ __device__ __forceinline__ - container_t &at(const uint32_t i, const uint32_t j) - { + __host__ __device__ __forceinline__ container_t& at(const uint32_t i, const uint32_t j) { return data_; - //return data_[i * width + j]; + // return data_[i * width + j]; } - __host__ __device__ __forceinline__ - const container_t &at(const uint32_t i, const uint32_t j) const - { + __host__ __device__ __forceinline__ const container_t& at(const uint32_t i, + const uint32_t j) const { return data_; - //return data_[i * width + j]; + // return data_[i * width + j]; } - __host__ __device__ - constexpr uint32_t size() const - { + __host__ __device__ constexpr uint32_t size() const { return 0; - //return width * height; + // return width * height; } }; - diff --git a/tests/test_geometry.py b/tests/test_geometry.py index 00a64c3..18bce0e 100644 --- a/tests/test_geometry.py +++ b/tests/test_geometry.py @@ -12,11 +12,18 @@ def rng() -> np.random.Generator: def test_vec3d_smoke() -> None: _gmbb.Vec3D(0, 0, 0) -def test_vec3d_smoke(rng: np.random.Generator) -> None: + +def test_vec3d_add(rng: np.random.Generator) -> None: _a, _b = rng.uniform(size=3), rng.uniform(size=3) a, b = _gmbb.Vec3D(*_a), _gmbb.Vec3D(*_b) c = a + b _c = _a + _b - assert all( - [np.isclose(c.x, _c[0]), np.isclose(c.y, _c[1]), np.isclose(c.z, _c[2])] - ) + assert all([np.isclose(c.x, _c[0]), np.isclose(c.y, _c[1]), np.isclose(c.z, _c[2])]) + + +def test_vec3d_sub(rng: np.random.Generator) -> None: + _a, _b = rng.uniform(size=3), rng.uniform(size=3) + a, b = _gmbb.Vec3D(*_a), _gmbb.Vec3D(*_b) + c = a - b + _c = _a - _b + assert all([np.isclose(c.x, _c[0]), np.isclose(c.y, _c[1]), np.isclose(c.z, _c[2])])