Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,6 @@ SpacesInSquareBrackets: false
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
InsertNewlineAtEOF: true
...

6 changes: 5 additions & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ Checks: >
-modernize-avoid-c-arrays,
-readability-magic-numbers,
-cert-dcl37-c,
-cert-dcl51-cpp
-cert-dcl51-cpp,
-cert-dcl58-cpp,

WarningsAsErrors: ''
HeaderFilterRegex: '.*'
Expand All @@ -51,4 +52,7 @@ CheckOptions:
value: 'std::vector;.*Iterator'
- key: modernize-use-nodiscard.Macros
value: 'CUDA_CHECK'
- key: readability-function-cognitive-complexity.IgnoreMacros
# GTest macros can artificially increase the cognitive complexity, so we ignore them.
value: true

8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,19 +69,19 @@ find_package(GTest CONFIG REQUIRED)

# C++/CUDA test executable
file(GLOB CPP_CUDA_TEST_SOURCES tests/cpp_tests/*.cu tests/cpp_tests/*.cpp)
add_executable(test_add ${CPP_CUDA_TEST_SOURCES})
add_executable(cpp_tests ${CPP_CUDA_TEST_SOURCES})

target_link_libraries(test_add
target_link_libraries(cpp_tests
PRIVATE
genmetaballs_core
GTest::gtest
GTest::gtest_main
)

# Enable CUDA for the test executable
set_target_properties(test_add PROPERTIES
set_target_properties(cpp_tests PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
)

include(GoogleTest)
gtest_discover_tests(test_add)
gtest_discover_tests(cpp_tests)
7 changes: 4 additions & 3 deletions genmetaballs/src/cuda/bindings.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <cstdint>
#include <nanobind/nanobind.h>
#include <nanobind/operators.h>
#include <nanobind/stl/vector.h>

#include "core/add.cuh"
Expand All @@ -20,10 +21,10 @@ NB_MODULE(_genmetaballs_bindings, m) {
.def_rw("x", &Vec3D::x)
.def_rw("y", &Vec3D::y)
.def_rw("z", &Vec3D::z)
.def("__add__", &operator+)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change is needed because the CUDA header files also defines operator+ in the global scope, so the compiler no longer has enough context to uniquely identify the right operator+ here.

.def("__sub__", &operator-)
.def(nb::self + nb::self)
.def(nb::self - nb::self)
.def("__repr__", [](const Vec3D& v) {
nb::str s = nb::str("Vec3D({}, {}, {})").format(v.x, v.y, v.z);
return s;
});
}
}
42 changes: 27 additions & 15 deletions genmetaballs/src/cuda/core/utils.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <cstdint>
#include <cuda/std/mdspan>
#include <cuda_runtime.h>

#define CUDA_CHECK(x) \
Expand All @@ -10,27 +11,38 @@

void cuda_check(cudaError_t code, const char* file, int line);

// XXX container_t should be a thrust container type
template <typename container_t>
// Non-owning 2D view into a contiguous array in either host or device memory
template <typename T>
class Array2D {
private:
// XXX TODO: make sure this works
container_t data_;
cuda::std::mdspan<
T, cuda::std::extents<uint32_t, cuda::std::dynamic_extent, cuda::std::dynamic_extent>>
data_view_;

public:
__host__ __device__ __forceinline__ container_t& at(const uint32_t i, const uint32_t j) {
return data_;
// return data_[i * width + j];
}
// constructor
__host__ __device__ constexpr Array2D(T* data, uint32_t rows, uint32_t cols)
: data_view_(data, rows, cols) {}

__host__ __device__ __forceinline__ const container_t& at(const uint32_t i,
const uint32_t j) const {
return data_;
// return data_[i * width + j];
// accessor methods
__host__ __device__ constexpr T& operator()(uint32_t row, uint32_t col) {
return data_view_(row, col);
}
__host__ __device__ constexpr T operator()(uint32_t row, uint32_t col) const {
return data_view_(row, col);
}
Comment on lines +28 to +33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for using operator() instead of the more natural operator[]?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! Sadly, prior to C++23, we aren't allowed to define operator[] with more than one arguments. In cuda::std::mdspan, they also made similar design choice, where they defined operator() in earlier C++ standards, and switched to operator[] in C++23 and above

// size methods
__host__ __device__ constexpr auto num_rows() const noexcept {
return data_view_.extent(0);
}
__host__ __device__ constexpr auto num_cols() const noexcept {
return data_view_.extent(1);
}

__host__ __device__ constexpr uint32_t size() const {
return 0;
// return width * height;
__host__ __device__ constexpr auto rank() const noexcept {
return data_view_.rank();
}
__host__ __device__ constexpr auto size() const noexcept {
return data_view_.size();
}
};
63 changes: 63 additions & 0 deletions tests/cpp_tests/test_utils.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <cstdint>
#include <cuda_runtime.h>
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <type_traits>
#include <vector>

#include "core/utils.cuh"

// CUDA kernel to fill Array2D with sequential values
__global__ void fill_array2d_kernel(Array2D<float> array2d) {
uint32_t i = threadIdx.x;
uint32_t j = threadIdx.y;

if (i < array2d.num_rows() && j < array2d.num_cols()) {
array2d(i, j) = i * array2d.num_cols() + j;
}
}

template <typename Container>
class Array2DTestFixture : public ::testing::Test {};

using ContainerTypes = ::testing::Types<std::vector<float>, thrust::device_vector<float>>;

TYPED_TEST_SUITE(Array2DTestFixture, ContainerTypes);

TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) {
uint32_t rows = 4;
uint32_t cols = 6;

auto data = TypeParam(rows * cols);
// create 2D view into the underlying data on host or device
auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the purpose of thrust::raw_pointer_cast here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed because thrust::host/device_vector's data() method returns a custom pointer type, so this casting is needed to convert them to raw pointer that mdspan can understand.

Alternatively, if we're expecting to use thrust a lot, I'm just thinking that we can also define overloaded Array2D constructors that are specialized to thrust pointer types so we don't have to explicitly invoke this conversion everywhere


if constexpr (std::is_same_v<TypeParam, std::vector<float>>) {
for (auto i = 0; i < rows; i++) {
for (auto j = 0; j < cols; j++) {
array2d(i, j) = i * cols + j;
}
}
} else {
// Launch kernel to fill Array2D on device
// Note: we could've simply use thrust::sequence to fill the device vector,
// but this is a simple example to demonstrate how to pass an Array2D to a kernel.
fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}

EXPECT_EQ(array2d.size(), rows * cols);
EXPECT_EQ(array2d.num_rows(), rows);
EXPECT_EQ(array2d.num_cols(), cols);
EXPECT_EQ(array2d.rank(), 2); // 2D array

// create host vector to verify the data
// for std::vector, this simply duplicate the vector.
// for thrust::device_vector, it will copy the data to the host.
thrust::host_vector<float> host_data = data;
for (auto idx = 0; idx < rows * cols; idx++) {
EXPECT_FLOAT_EQ(host_data[idx], idx);
}
}