From 8fa12d6911c492e39bc0608e0741a03e46aeb329 Mon Sep 17 00:00:00 2001 From: Xiaoyan Wang Date: Thu, 20 Nov 2025 17:44:13 -0500 Subject: [PATCH 1/3] Update Array2D accessing pattern & use CUDA_CALLABLE macro --- genmetaballs/src/cuda/core/utils.cuh | 27 ++++++++++++++------------- tests/cpp_tests/test_utils.cu | 22 +++++++++++++++++----- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh index 20f5a66..c9d907b 100644 --- a/genmetaballs/src/cuda/core/utils.cuh +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -3,6 +3,7 @@ #include #include #include +#include #include #define CUDA_CALLABLE __host__ __device__ @@ -28,28 +29,28 @@ private: public: // constructor - __host__ __device__ constexpr Array2D(T* data, uint32_t rows, uint32_t cols) - : data_view_(data, rows, cols) {} - - // accessor methods - __host__ __device__ constexpr T& operator()(uint32_t row, uint32_t col) { - return data_view_(row, col); - } - __host__ __device__ constexpr T operator()(uint32_t row, uint32_t col) const { - return data_view_(row, col); + CUDA_CALLABLE constexpr Array2D(T* data_ptr, uint32_t rows, uint32_t cols) + : data_view_(data_ptr, rows, cols) {} + + // getting a 1D view of a specific row + // this supports array2d[row][col] access pattern and range-based for loops + // e.g., for (auto val : array2d[row]) { ... } + CUDA_CALLABLE constexpr auto operator[](uint32_t row) const { + return cuda::std::span(data_view_.data_handle() + row * num_cols(), num_cols()); } + // size methods - __host__ __device__ constexpr auto num_rows() const noexcept { + CUDA_CALLABLE constexpr auto num_rows() const noexcept { return data_view_.extent(0); } - __host__ __device__ constexpr auto num_cols() const noexcept { + CUDA_CALLABLE constexpr auto num_cols() const noexcept { return data_view_.extent(1); } - __host__ __device__ constexpr auto rank() const noexcept { + CUDA_CALLABLE constexpr auto rank() const noexcept { return data_view_.rank(); } - __host__ __device__ constexpr auto size() const noexcept { + CUDA_CALLABLE constexpr auto size() const noexcept { return data_view_.size(); } }; // class Array2D diff --git a/tests/cpp_tests/test_utils.cu b/tests/cpp_tests/test_utils.cu index b4cc098..3d07a10 100644 --- a/tests/cpp_tests/test_utils.cu +++ b/tests/cpp_tests/test_utils.cu @@ -92,15 +92,21 @@ TEST(GpuSigmoidTest, SigmoidGPUWithinBounds) { } } +namespace test_utils_gpu { // CUDA kernel to fill Array2D with sequential values __global__ void fill_array2d_kernel(Array2D array2d) { uint32_t i = threadIdx.x; uint32_t j = threadIdx.y; if (i < array2d.num_rows() && j < array2d.num_cols()) { - array2d(i, j) = i * array2d.num_cols() + j; + if (i == array2d.num_rows() - 1) { + array2d[i][j] = -1.0f; // last row set to -1 + } else { + array2d[i][j] = i * array2d.num_cols() + j; + } } } +} // namespace test_utils_gpu template class Array2DTestFixture : public ::testing::Test {}; @@ -118,16 +124,19 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); if constexpr (std::is_same_v>) { - for (auto i = 0; i < rows; i++) { + for (auto i = 0; i < rows - 1; i++) { for (auto j = 0; j < cols; j++) { - array2d(i, j) = i * cols + j; + array2d[i][j] = i * cols + j; } } + for (auto& val : array2d[rows - 1]) { + val = -1.0f; // setting last row to -1 with range-based for loop + } } else { // Launch kernel to fill Array2D on device // Note: we could've simply use thrust::sequence to fill the device vector, // but this is a simple example to demonstrate how to pass an Array2D to a kernel. - fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d); + test_utils_gpu::fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d); CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaDeviceSynchronize()); } @@ -141,7 +150,10 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { // for std::vector, this simply duplicate the vector. // for thrust::device_vector, it will copy the data to the host. thrust::host_vector host_data = data; - for (auto idx = 0; idx < rows * cols; idx++) { + for (auto idx = 0; idx < (rows - 1) * cols; idx++) { EXPECT_FLOAT_EQ(host_data[idx], idx); } + for (auto idx = (rows - 1) * cols; idx < rows * cols; idx++) { + EXPECT_FLOAT_EQ(host_data[idx], -1.0f); + } } From cd5b439cda6abb4ac531aebbea14f5aaa2e4ddf8 Mon Sep 17 00:00:00 2001 From: Xiaoyan Wang Date: Sat, 22 Nov 2025 23:28:37 -0500 Subject: [PATCH 2/3] Rename rank() -> ndim() and add additional unit tests suggested by Arijit --- genmetaballs/src/cuda/core/utils.cuh | 2 +- tests/cpp_tests/test_utils.cu | 43 +++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh index c9d907b..3d6a69c 100644 --- a/genmetaballs/src/cuda/core/utils.cuh +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -47,7 +47,7 @@ public: return data_view_.extent(1); } - CUDA_CALLABLE constexpr auto rank() const noexcept { + CUDA_CALLABLE constexpr auto ndim() const noexcept { return data_view_.rank(); } CUDA_CALLABLE constexpr auto size() const noexcept { diff --git a/tests/cpp_tests/test_utils.cu b/tests/cpp_tests/test_utils.cu index 3d07a10..3f47404 100644 --- a/tests/cpp_tests/test_utils.cu +++ b/tests/cpp_tests/test_utils.cu @@ -144,7 +144,7 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { EXPECT_EQ(array2d.size(), rows * cols); EXPECT_EQ(array2d.num_rows(), rows); EXPECT_EQ(array2d.num_cols(), cols); - EXPECT_EQ(array2d.rank(), 2); // 2D array + EXPECT_EQ(array2d.ndim(), 2); // 2D array // create host vector to verify the data // for std::vector, this simply duplicate the vector. @@ -157,3 +157,44 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { EXPECT_FLOAT_EQ(host_data[idx], -1.0f); } } + +// Test that modifications through view affect underlying data +TYPED_TEST(Array2DTestFixture, ViewModifiesUnderlyingData) { + if constexpr (std::is_same_v>) { + uint32_t rows = 3; + uint32_t cols = 4; + auto data = TypeParam(rows * cols, 0.0f); + auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); + + // Modify through view + array2d[1][2] = 42.5f; + // Verify underlying data changed + EXPECT_FLOAT_EQ(data[1 * cols + 2], 42.5f); + + // Modify underlying data directly + data[0 * cols + 1] = 99.9f; + // Verify view reflects change + EXPECT_FLOAT_EQ(array2d[0][1], 99.9f); + } +} + +// Test multiple views of the same data +TYPED_TEST(Array2DTestFixture, MultipleViewsOfSameData) { + if constexpr (std::is_same_v>) { + uint32_t rows = 2; + uint32_t cols = 3; + auto data = TypeParam(rows * cols, 0.0f); + auto view1 = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); + auto view2 = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); + + // Modify through view1 + view1[0][0] = 100.0f; + // Verify view2 sees the change + EXPECT_FLOAT_EQ(view2[0][0], 100.0f); + + // Modify through view2 + view2[1][2] = 200.0f; + // Verify view1 sees the change + EXPECT_FLOAT_EQ(view1[1][2], 200.0f); + } +} From 96ce9ba73cde2eee79db08fc643c3367c6f58448 Mon Sep 17 00:00:00 2001 From: Xiaoyan Wang Date: Sun, 23 Nov 2025 00:21:38 -0500 Subject: [PATCH 3/3] Move raw_pointer_cast to constructor of Array2D --- genmetaballs/src/cuda/core/utils.cuh | 14 ++++++++++++-- tests/cpp_tests/test_utils.cu | 8 ++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh index 3d6a69c..43e5707 100644 --- a/genmetaballs/src/cuda/core/utils.cuh +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -5,6 +5,8 @@ #include #include #include +#include +#include #define CUDA_CALLABLE __host__ __device__ @@ -29,8 +31,9 @@ private: public: // constructor - CUDA_CALLABLE constexpr Array2D(T* data_ptr, uint32_t rows, uint32_t cols) - : data_view_(data_ptr, rows, cols) {} + template + CUDA_CALLABLE constexpr Array2D(Pointer data_ptr, uint32_t rows, uint32_t cols) + : data_view_(thrust::raw_pointer_cast(data_ptr), rows, cols) {} // getting a 1D view of a specific row // this supports array2d[row][col] access pattern and range-based for loops @@ -54,3 +57,10 @@ public: return data_view_.size(); } }; // class Array2D + +// Type deduction guide +// if initialized with (Pointer, int, int), deduce T by looking at what raw_pointer_cast returns +// so we can write Array2D(array_ptr, rows, cols) instead of Array2D(array_ptr, rows, cols) +template +Array2D(Pointer, uint32_t, uint32_t) + -> Array2D::element_type>; diff --git a/tests/cpp_tests/test_utils.cu b/tests/cpp_tests/test_utils.cu index 3f47404..a4ebfd3 100644 --- a/tests/cpp_tests/test_utils.cu +++ b/tests/cpp_tests/test_utils.cu @@ -121,7 +121,7 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { auto data = TypeParam(rows * cols); // create 2D view into the underlying data on host or device - auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); + auto array2d = Array2D(data.data(), rows, cols); if constexpr (std::is_same_v>) { for (auto i = 0; i < rows - 1; i++) { @@ -164,7 +164,7 @@ TYPED_TEST(Array2DTestFixture, ViewModifiesUnderlyingData) { uint32_t rows = 3; uint32_t cols = 4; auto data = TypeParam(rows * cols, 0.0f); - auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); + auto array2d = Array2D(data.data(), rows, cols); // Modify through view array2d[1][2] = 42.5f; @@ -184,8 +184,8 @@ TYPED_TEST(Array2DTestFixture, MultipleViewsOfSameData) { uint32_t rows = 2; uint32_t cols = 3; auto data = TypeParam(rows * cols, 0.0f); - auto view1 = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); - auto view2 = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); + auto view1 = Array2D(data.data(), rows, cols); + auto view2 = Array2D(data.data(), rows, cols); // Modify through view1 view1[0][0] = 100.0f;