diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh index 20f5a66..43e5707 100644 --- a/genmetaballs/src/cuda/core/utils.cuh +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -3,7 +3,10 @@ #include #include #include +#include #include +#include +#include #define CUDA_CALLABLE __host__ __device__ @@ -28,28 +31,36 @@ private: public: // constructor - __host__ __device__ constexpr Array2D(T* data, uint32_t rows, uint32_t cols) - : data_view_(data, rows, cols) {} + template + CUDA_CALLABLE constexpr Array2D(Pointer data_ptr, uint32_t rows, uint32_t cols) + : data_view_(thrust::raw_pointer_cast(data_ptr), rows, cols) {} - // accessor methods - __host__ __device__ constexpr T& operator()(uint32_t row, uint32_t col) { - return data_view_(row, col); - } - __host__ __device__ constexpr T operator()(uint32_t row, uint32_t col) const { - return data_view_(row, col); + // getting a 1D view of a specific row + // this supports array2d[row][col] access pattern and range-based for loops + // e.g., for (auto val : array2d[row]) { ... } + CUDA_CALLABLE constexpr auto operator[](uint32_t row) const { + return cuda::std::span(data_view_.data_handle() + row * num_cols(), num_cols()); } + // size methods - __host__ __device__ constexpr auto num_rows() const noexcept { + CUDA_CALLABLE constexpr auto num_rows() const noexcept { return data_view_.extent(0); } - __host__ __device__ constexpr auto num_cols() const noexcept { + CUDA_CALLABLE constexpr auto num_cols() const noexcept { return data_view_.extent(1); } - __host__ __device__ constexpr auto rank() const noexcept { + CUDA_CALLABLE constexpr auto ndim() const noexcept { return data_view_.rank(); } - __host__ __device__ constexpr auto size() const noexcept { + CUDA_CALLABLE constexpr auto size() const noexcept { return data_view_.size(); } }; // class Array2D + +// Type deduction guide +// if initialized with (Pointer, int, int), deduce T by looking at what raw_pointer_cast returns +// so we can write Array2D(array_ptr, rows, cols) instead of Array2D(array_ptr, rows, cols) +template +Array2D(Pointer, uint32_t, uint32_t) + -> Array2D::element_type>; diff --git a/tests/cpp_tests/test_utils.cu b/tests/cpp_tests/test_utils.cu index b4cc098..a4ebfd3 100644 --- a/tests/cpp_tests/test_utils.cu +++ b/tests/cpp_tests/test_utils.cu @@ -92,15 +92,21 @@ TEST(GpuSigmoidTest, SigmoidGPUWithinBounds) { } } +namespace test_utils_gpu { // CUDA kernel to fill Array2D with sequential values __global__ void fill_array2d_kernel(Array2D array2d) { uint32_t i = threadIdx.x; uint32_t j = threadIdx.y; if (i < array2d.num_rows() && j < array2d.num_cols()) { - array2d(i, j) = i * array2d.num_cols() + j; + if (i == array2d.num_rows() - 1) { + array2d[i][j] = -1.0f; // last row set to -1 + } else { + array2d[i][j] = i * array2d.num_cols() + j; + } } } +} // namespace test_utils_gpu template class Array2DTestFixture : public ::testing::Test {}; @@ -115,19 +121,22 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { auto data = TypeParam(rows * cols); // create 2D view into the underlying data on host or device - auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); + auto array2d = Array2D(data.data(), rows, cols); if constexpr (std::is_same_v>) { - for (auto i = 0; i < rows; i++) { + for (auto i = 0; i < rows - 1; i++) { for (auto j = 0; j < cols; j++) { - array2d(i, j) = i * cols + j; + array2d[i][j] = i * cols + j; } } + for (auto& val : array2d[rows - 1]) { + val = -1.0f; // setting last row to -1 with range-based for loop + } } else { // Launch kernel to fill Array2D on device // Note: we could've simply use thrust::sequence to fill the device vector, // but this is a simple example to demonstrate how to pass an Array2D to a kernel. - fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d); + test_utils_gpu::fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d); CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaDeviceSynchronize()); } @@ -135,13 +144,57 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { EXPECT_EQ(array2d.size(), rows * cols); EXPECT_EQ(array2d.num_rows(), rows); EXPECT_EQ(array2d.num_cols(), cols); - EXPECT_EQ(array2d.rank(), 2); // 2D array + EXPECT_EQ(array2d.ndim(), 2); // 2D array // create host vector to verify the data // for std::vector, this simply duplicate the vector. // for thrust::device_vector, it will copy the data to the host. thrust::host_vector host_data = data; - for (auto idx = 0; idx < rows * cols; idx++) { + for (auto idx = 0; idx < (rows - 1) * cols; idx++) { EXPECT_FLOAT_EQ(host_data[idx], idx); } + for (auto idx = (rows - 1) * cols; idx < rows * cols; idx++) { + EXPECT_FLOAT_EQ(host_data[idx], -1.0f); + } +} + +// Test that modifications through view affect underlying data +TYPED_TEST(Array2DTestFixture, ViewModifiesUnderlyingData) { + if constexpr (std::is_same_v>) { + uint32_t rows = 3; + uint32_t cols = 4; + auto data = TypeParam(rows * cols, 0.0f); + auto array2d = Array2D(data.data(), rows, cols); + + // Modify through view + array2d[1][2] = 42.5f; + // Verify underlying data changed + EXPECT_FLOAT_EQ(data[1 * cols + 2], 42.5f); + + // Modify underlying data directly + data[0 * cols + 1] = 99.9f; + // Verify view reflects change + EXPECT_FLOAT_EQ(array2d[0][1], 99.9f); + } +} + +// Test multiple views of the same data +TYPED_TEST(Array2DTestFixture, MultipleViewsOfSameData) { + if constexpr (std::is_same_v>) { + uint32_t rows = 2; + uint32_t cols = 3; + auto data = TypeParam(rows * cols, 0.0f); + auto view1 = Array2D(data.data(), rows, cols); + auto view2 = Array2D(data.data(), rows, cols); + + // Modify through view1 + view1[0][0] = 100.0f; + // Verify view2 sees the change + EXPECT_FLOAT_EQ(view2[0][0], 100.0f); + + // Modify through view2 + view2[1][2] = 200.0f; + // Verify view1 sees the change + EXPECT_FLOAT_EQ(view1[1][2], 200.0f); + } }