Skip to content

Commit 4bee915

Browse files
authored
Update Array2D accessing pattern & use CUDA_CALLABLE macro (#17)
(Closes MET-47) ## Summary of Changes This PR addresses some of the suggestions that @mugamma brought up in #14. In particular, it defines the `operator[]` on `Array2D` to return a 1D view of a row, so we can use patterns like `array2d[i][j]` instead of `array2d(i, j)` to access the element. Another nice thing about returning the 1D span is that we can use range-based for loop to go over the elements in a row as well, e.g. ```cpp for (auto& val : array2d[rows]) { /* do something with val*/ } ``` You can find some example usages in the [included test file](https://github.com/probcomp/GenMetaBalls/pull/17/files#diff-92c53773082b537451d1c0e757c8ac6b5c6f85fd4bcd6e01bf82cade202575fb). Another minor change in this PR is the refactoring of`Array2D` methods to use the new `CUDA_CALLABLE` macro that Arijit introduced recently. ## Test Plan To run the included unit tests: ```bash pixi run test ```
1 parent 039ba52 commit 4bee915

File tree

2 files changed

+83
-19
lines changed

2 files changed

+83
-19
lines changed

genmetaballs/src/cuda/core/utils.cuh

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
#include <cmath>
44
#include <cstdint>
55
#include <cuda/std/mdspan>
6+
#include <cuda/std/span>
67
#include <cuda_runtime.h>
8+
#include <memory>
9+
#include <thrust/memory.h>
710

811
#define CUDA_CALLABLE __host__ __device__
912

@@ -28,28 +31,36 @@ private:
2831

2932
public:
3033
// constructor
31-
__host__ __device__ constexpr Array2D(T* data, uint32_t rows, uint32_t cols)
32-
: data_view_(data, rows, cols) {}
34+
template <typename Pointer>
35+
CUDA_CALLABLE constexpr Array2D(Pointer data_ptr, uint32_t rows, uint32_t cols)
36+
: data_view_(thrust::raw_pointer_cast(data_ptr), rows, cols) {}
3337

34-
// accessor methods
35-
__host__ __device__ constexpr T& operator()(uint32_t row, uint32_t col) {
36-
return data_view_(row, col);
37-
}
38-
__host__ __device__ constexpr T operator()(uint32_t row, uint32_t col) const {
39-
return data_view_(row, col);
38+
// getting a 1D view of a specific row
39+
// this supports array2d[row][col] access pattern and range-based for loops
40+
// e.g., for (auto val : array2d[row]) { ... }
41+
CUDA_CALLABLE constexpr auto operator[](uint32_t row) const {
42+
return cuda::std::span<T>(data_view_.data_handle() + row * num_cols(), num_cols());
4043
}
44+
4145
// size methods
42-
__host__ __device__ constexpr auto num_rows() const noexcept {
46+
CUDA_CALLABLE constexpr auto num_rows() const noexcept {
4347
return data_view_.extent(0);
4448
}
45-
__host__ __device__ constexpr auto num_cols() const noexcept {
49+
CUDA_CALLABLE constexpr auto num_cols() const noexcept {
4650
return data_view_.extent(1);
4751
}
4852

49-
__host__ __device__ constexpr auto rank() const noexcept {
53+
CUDA_CALLABLE constexpr auto ndim() const noexcept {
5054
return data_view_.rank();
5155
}
52-
__host__ __device__ constexpr auto size() const noexcept {
56+
CUDA_CALLABLE constexpr auto size() const noexcept {
5357
return data_view_.size();
5458
}
5559
}; // class Array2D
60+
61+
// Type deduction guide
62+
// if initialized with (Pointer, int, int), deduce T by looking at what raw_pointer_cast returns
63+
// so we can write Array2D(array_ptr, rows, cols) instead of Array2D<Type>(array_ptr, rows, cols)
64+
template <typename Pointer>
65+
Array2D(Pointer, uint32_t, uint32_t)
66+
-> Array2D<typename std::pointer_traits<Pointer>::element_type>;

tests/cpp_tests/test_utils.cu

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,21 @@ TEST(GpuSigmoidTest, SigmoidGPUWithinBounds) {
9292
}
9393
}
9494

95+
namespace test_utils_gpu {
9596
// CUDA kernel to fill Array2D with sequential values
9697
__global__ void fill_array2d_kernel(Array2D<float> array2d) {
9798
uint32_t i = threadIdx.x;
9899
uint32_t j = threadIdx.y;
99100

100101
if (i < array2d.num_rows() && j < array2d.num_cols()) {
101-
array2d(i, j) = i * array2d.num_cols() + j;
102+
if (i == array2d.num_rows() - 1) {
103+
array2d[i][j] = -1.0f; // last row set to -1
104+
} else {
105+
array2d[i][j] = i * array2d.num_cols() + j;
106+
}
102107
}
103108
}
109+
} // namespace test_utils_gpu
104110

105111
template <typename Container>
106112
class Array2DTestFixture : public ::testing::Test {};
@@ -115,33 +121,80 @@ TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) {
115121

116122
auto data = TypeParam(rows * cols);
117123
// create 2D view into the underlying data on host or device
118-
auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols);
124+
auto array2d = Array2D(data.data(), rows, cols);
119125

120126
if constexpr (std::is_same_v<TypeParam, std::vector<float>>) {
121-
for (auto i = 0; i < rows; i++) {
127+
for (auto i = 0; i < rows - 1; i++) {
122128
for (auto j = 0; j < cols; j++) {
123-
array2d(i, j) = i * cols + j;
129+
array2d[i][j] = i * cols + j;
124130
}
125131
}
132+
for (auto& val : array2d[rows - 1]) {
133+
val = -1.0f; // setting last row to -1 with range-based for loop
134+
}
126135
} else {
127136
// Launch kernel to fill Array2D on device
128137
// Note: we could've simply use thrust::sequence to fill the device vector,
129138
// but this is a simple example to demonstrate how to pass an Array2D to a kernel.
130-
fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d);
139+
test_utils_gpu::fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d);
131140
CUDA_CHECK(cudaGetLastError());
132141
CUDA_CHECK(cudaDeviceSynchronize());
133142
}
134143

135144
EXPECT_EQ(array2d.size(), rows * cols);
136145
EXPECT_EQ(array2d.num_rows(), rows);
137146
EXPECT_EQ(array2d.num_cols(), cols);
138-
EXPECT_EQ(array2d.rank(), 2); // 2D array
147+
EXPECT_EQ(array2d.ndim(), 2); // 2D array
139148

140149
// create host vector to verify the data
141150
// for std::vector, this simply duplicate the vector.
142151
// for thrust::device_vector, it will copy the data to the host.
143152
thrust::host_vector<float> host_data = data;
144-
for (auto idx = 0; idx < rows * cols; idx++) {
153+
for (auto idx = 0; idx < (rows - 1) * cols; idx++) {
145154
EXPECT_FLOAT_EQ(host_data[idx], idx);
146155
}
156+
for (auto idx = (rows - 1) * cols; idx < rows * cols; idx++) {
157+
EXPECT_FLOAT_EQ(host_data[idx], -1.0f);
158+
}
159+
}
160+
161+
// Test that modifications through view affect underlying data
162+
TYPED_TEST(Array2DTestFixture, ViewModifiesUnderlyingData) {
163+
if constexpr (std::is_same_v<TypeParam, std::vector<float>>) {
164+
uint32_t rows = 3;
165+
uint32_t cols = 4;
166+
auto data = TypeParam(rows * cols, 0.0f);
167+
auto array2d = Array2D(data.data(), rows, cols);
168+
169+
// Modify through view
170+
array2d[1][2] = 42.5f;
171+
// Verify underlying data changed
172+
EXPECT_FLOAT_EQ(data[1 * cols + 2], 42.5f);
173+
174+
// Modify underlying data directly
175+
data[0 * cols + 1] = 99.9f;
176+
// Verify view reflects change
177+
EXPECT_FLOAT_EQ(array2d[0][1], 99.9f);
178+
}
179+
}
180+
181+
// Test multiple views of the same data
182+
TYPED_TEST(Array2DTestFixture, MultipleViewsOfSameData) {
183+
if constexpr (std::is_same_v<TypeParam, std::vector<float>>) {
184+
uint32_t rows = 2;
185+
uint32_t cols = 3;
186+
auto data = TypeParam(rows * cols, 0.0f);
187+
auto view1 = Array2D(data.data(), rows, cols);
188+
auto view2 = Array2D(data.data(), rows, cols);
189+
190+
// Modify through view1
191+
view1[0][0] = 100.0f;
192+
// Verify view2 sees the change
193+
EXPECT_FLOAT_EQ(view2[0][0], 100.0f);
194+
195+
// Modify through view2
196+
view2[1][2] = 200.0f;
197+
// Verify view1 sees the change
198+
EXPECT_FLOAT_EQ(view1[1][2], 200.0f);
199+
}
147200
}

0 commit comments

Comments
 (0)