Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions genmetaballs/src/cuda/core/utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
#include <cmath>
#include <cstdint>
#include <cuda/std/mdspan>
#include <cuda/std/span>
#include <cuda_runtime.h>
#include <memory>
#include <thrust/memory.h>

#define CUDA_CALLABLE __host__ __device__

Expand All @@ -28,28 +31,36 @@ private:

public:
// constructor
__host__ __device__ constexpr Array2D(T* data, uint32_t rows, uint32_t cols)
: data_view_(data, rows, cols) {}
template <typename Pointer>
CUDA_CALLABLE constexpr Array2D(Pointer data_ptr, uint32_t rows, uint32_t cols)
: data_view_(thrust::raw_pointer_cast(data_ptr), rows, cols) {}

// accessor methods
__host__ __device__ constexpr T& operator()(uint32_t row, uint32_t col) {
return data_view_(row, col);
}
__host__ __device__ constexpr T operator()(uint32_t row, uint32_t col) const {
return data_view_(row, col);
// getting a 1D view of a specific row
// this supports array2d[row][col] access pattern and range-based for loops
// e.g., for (auto val : array2d[row]) { ... }
CUDA_CALLABLE constexpr auto operator[](uint32_t row) const {
return cuda::std::span<T>(data_view_.data_handle() + row * num_cols(), num_cols());
}

// size methods
__host__ __device__ constexpr auto num_rows() const noexcept {
CUDA_CALLABLE constexpr auto num_rows() const noexcept {
return data_view_.extent(0);
}
__host__ __device__ constexpr auto num_cols() const noexcept {
CUDA_CALLABLE constexpr auto num_cols() const noexcept {
return data_view_.extent(1);
}

__host__ __device__ constexpr auto rank() const noexcept {
CUDA_CALLABLE constexpr auto ndim() const noexcept {
return data_view_.rank();
}
__host__ __device__ constexpr auto size() const noexcept {
CUDA_CALLABLE constexpr auto size() const noexcept {
return data_view_.size();
}
}; // class Array2D

// Type deduction guide
// if initialized with (Pointer, int, int), deduce T by looking at what raw_pointer_cast returns
// so we can write Array2D(array_ptr, rows, cols) instead of Array2D<Type>(array_ptr, rows, cols)
template <typename Pointer>
Array2D(Pointer, uint32_t, uint32_t)
-> Array2D<typename std::pointer_traits<Pointer>::element_type>;
67 changes: 60 additions & 7 deletions tests/cpp_tests/test_utils.cu
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you accept my ndim suggestion, then you would need to edit line 147 from rank to ndim for test to pass. It works on my end locally.

Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,21 @@ TEST(GpuSigmoidTest, SigmoidGPUWithinBounds) {
}
}

namespace test_utils_gpu {
// CUDA kernel to fill Array2D with sequential values
__global__ void fill_array2d_kernel(Array2D<float> array2d) {
uint32_t i = threadIdx.x;
uint32_t j = threadIdx.y;

if (i < array2d.num_rows() && j < array2d.num_cols()) {
array2d(i, j) = i * array2d.num_cols() + j;
if (i == array2d.num_rows() - 1) {
array2d[i][j] = -1.0f; // last row set to -1
} else {
array2d[i][j] = i * array2d.num_cols() + j;
}
}
}
} // namespace test_utils_gpu

template <typename Container>
class Array2DTestFixture : public ::testing::Test {};
Expand All @@ -115,33 +121,80 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) {

auto data = TypeParam(rows * cols);
// create 2D view into the underlying data on host or device
auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols);
auto array2d = Array2D(data.data(), rows, cols);

if constexpr (std::is_same_v<TypeParam, std::vector<float>>) {
for (auto i = 0; i < rows; i++) {
for (auto i = 0; i < rows - 1; i++) {
for (auto j = 0; j < cols; j++) {
array2d(i, j) = i * cols + j;
array2d[i][j] = i * cols + j;
}
}
for (auto& val : array2d[rows - 1]) {
val = -1.0f; // setting last row to -1 with range-based for loop
}
} else {
// Launch kernel to fill Array2D on device
// Note: we could've simply use thrust::sequence to fill the device vector,
// but this is a simple example to demonstrate how to pass an Array2D to a kernel.
fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d);
test_utils_gpu::fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}

EXPECT_EQ(array2d.size(), rows * cols);
EXPECT_EQ(array2d.num_rows(), rows);
EXPECT_EQ(array2d.num_cols(), cols);
EXPECT_EQ(array2d.rank(), 2); // 2D array
EXPECT_EQ(array2d.ndim(), 2); // 2D array

// create host vector to verify the data
// for std::vector, this simply duplicate the vector.
// for thrust::device_vector, it will copy the data to the host.
thrust::host_vector<float> host_data = data;
for (auto idx = 0; idx < rows * cols; idx++) {
for (auto idx = 0; idx < (rows - 1) * cols; idx++) {
EXPECT_FLOAT_EQ(host_data[idx], idx);
}
for (auto idx = (rows - 1) * cols; idx < rows * cols; idx++) {
EXPECT_FLOAT_EQ(host_data[idx], -1.0f);
}
}

// Test that modifications through view affect underlying data
TYPED_TEST(Array2DTestFixture, ViewModifiesUnderlyingData) {
if constexpr (std::is_same_v<TypeParam, std::vector<float>>) {
uint32_t rows = 3;
uint32_t cols = 4;
auto data = TypeParam(rows * cols, 0.0f);
auto array2d = Array2D(data.data(), rows, cols);

// Modify through view
array2d[1][2] = 42.5f;
// Verify underlying data changed
EXPECT_FLOAT_EQ(data[1 * cols + 2], 42.5f);

// Modify underlying data directly
data[0 * cols + 1] = 99.9f;
// Verify view reflects change
EXPECT_FLOAT_EQ(array2d[0][1], 99.9f);
}
}

// Test multiple views of the same data
TYPED_TEST(Array2DTestFixture, MultipleViewsOfSameData) {
if constexpr (std::is_same_v<TypeParam, std::vector<float>>) {
uint32_t rows = 2;
uint32_t cols = 3;
auto data = TypeParam(rows * cols, 0.0f);
auto view1 = Array2D(data.data(), rows, cols);
auto view2 = Array2D(data.data(), rows, cols);

// Modify through view1
view1[0][0] = 100.0f;
// Verify view2 sees the change
EXPECT_FLOAT_EQ(view2[0][0], 100.0f);

// Modify through view2
view2[1][2] = 200.0f;
// Verify view1 sees the change
EXPECT_FLOAT_EQ(view1[1][2], 200.0f);
}
}