-
Notifications
You must be signed in to change notification settings - Fork 0
Implement Array2D for non-owning 2D views of Host/Device memory
#14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8f2daf9
9729de1
429c317
bc4372b
d080465
bbe8659
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -115,5 +115,6 @@ SpacesInSquareBrackets: false | |
| StatementMacros: | ||
| - Q_UNUSED | ||
| - QT_REQUIRE_VERSION | ||
| InsertNewlineAtEOF: true | ||
| ... | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| #pragma once | ||
|
|
||
| #include <cstdint> | ||
| #include <cuda/std/mdspan> | ||
| #include <cuda_runtime.h> | ||
|
|
||
| #define CUDA_CHECK(x) \ | ||
|
|
@@ -10,27 +11,38 @@ | |
|
|
||
| void cuda_check(cudaError_t code, const char* file, int line); | ||
|
|
||
| // XXX container_t should be a thrust container type | ||
| template <typename container_t> | ||
| // Non-owning 2D view into a contiguous array in either host or device memory | ||
| template <typename T> | ||
| class Array2D { | ||
| private: | ||
| // XXX TODO: make sure this works | ||
| container_t data_; | ||
| cuda::std::mdspan< | ||
| T, cuda::std::extents<uint32_t, cuda::std::dynamic_extent, cuda::std::dynamic_extent>> | ||
| data_view_; | ||
|
|
||
| public: | ||
| __host__ __device__ __forceinline__ container_t& at(const uint32_t i, const uint32_t j) { | ||
| return data_; | ||
| // return data_[i * width + j]; | ||
| } | ||
| // constructor | ||
| __host__ __device__ constexpr Array2D(T* data, uint32_t rows, uint32_t cols) | ||
| : data_view_(data, rows, cols) {} | ||
|
|
||
| __host__ __device__ __forceinline__ const container_t& at(const uint32_t i, | ||
| const uint32_t j) const { | ||
| return data_; | ||
| // return data_[i * width + j]; | ||
| // accessor methods | ||
| __host__ __device__ constexpr T& operator()(uint32_t row, uint32_t col) { | ||
| return data_view_(row, col); | ||
| } | ||
| __host__ __device__ constexpr T operator()(uint32_t row, uint32_t col) const { | ||
| return data_view_(row, col); | ||
| } | ||
|
Comment on lines
+28
to
+33
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason for using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point! Sadly, prior to C++23, we aren't allowed to define |
||
| // size methods | ||
| __host__ __device__ constexpr auto num_rows() const noexcept { | ||
| return data_view_.extent(0); | ||
| } | ||
| __host__ __device__ constexpr auto num_cols() const noexcept { | ||
| return data_view_.extent(1); | ||
| } | ||
|
|
||
| __host__ __device__ constexpr uint32_t size() const { | ||
| return 0; | ||
| // return width * height; | ||
| __host__ __device__ constexpr auto rank() const noexcept { | ||
| return data_view_.rank(); | ||
| } | ||
| __host__ __device__ constexpr auto size() const noexcept { | ||
| return data_view_.size(); | ||
| } | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| #include <cstdint> | ||
| #include <cuda_runtime.h> | ||
| #include <gtest/gtest.h> | ||
| #include <thrust/device_vector.h> | ||
| #include <thrust/host_vector.h> | ||
| #include <type_traits> | ||
| #include <vector> | ||
|
|
||
| #include "core/utils.cuh" | ||
|
|
||
| // CUDA kernel to fill Array2D with sequential values | ||
| __global__ void fill_array2d_kernel(Array2D<float> array2d) { | ||
| uint32_t i = threadIdx.x; | ||
| uint32_t j = threadIdx.y; | ||
|
|
||
| if (i < array2d.num_rows() && j < array2d.num_cols()) { | ||
| array2d(i, j) = i * array2d.num_cols() + j; | ||
| } | ||
| } | ||
|
|
||
| template <typename Container> | ||
| class Array2DTestFixture : public ::testing::Test {}; | ||
|
|
||
| using ContainerTypes = ::testing::Types<std::vector<float>, thrust::device_vector<float>>; | ||
|
|
||
| TYPED_TEST_SUITE(Array2DTestFixture, ContainerTypes); | ||
|
|
||
| TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { | ||
| uint32_t rows = 4; | ||
| uint32_t cols = 6; | ||
|
|
||
| auto data = TypeParam(rows * cols); | ||
| // create 2D view into the underlying data on host or device | ||
| auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the purpose of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is needed because Alternatively, if we're expecting to use thrust a lot, I'm just thinking that we can also define overloaded |
||
|
|
||
| if constexpr (std::is_same_v<TypeParam, std::vector<float>>) { | ||
| for (auto i = 0; i < rows; i++) { | ||
| for (auto j = 0; j < cols; j++) { | ||
| array2d(i, j) = i * cols + j; | ||
| } | ||
| } | ||
| } else { | ||
| // Launch kernel to fill Array2D on device | ||
| // Note: we could've simply use thrust::sequence to fill the device vector, | ||
| // but this is a simple example to demonstrate how to pass an Array2D to a kernel. | ||
| fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d); | ||
| CUDA_CHECK(cudaGetLastError()); | ||
| CUDA_CHECK(cudaDeviceSynchronize()); | ||
| } | ||
|
|
||
| EXPECT_EQ(array2d.size(), rows * cols); | ||
| EXPECT_EQ(array2d.num_rows(), rows); | ||
| EXPECT_EQ(array2d.num_cols(), cols); | ||
| EXPECT_EQ(array2d.rank(), 2); // 2D array | ||
|
|
||
| // create host vector to verify the data | ||
| // for std::vector, this simply duplicate the vector. | ||
| // for thrust::device_vector, it will copy the data to the host. | ||
| thrust::host_vector<float> host_data = data; | ||
| for (auto idx = 0; idx < rows * cols; idx++) { | ||
| EXPECT_FLOAT_EQ(host_data[idx], idx); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The change is needed because the CUDA header files also defines
operator+in the global scope, so the compiler no longer has enough context to uniquely identify the rightoperator+here.