diff --git a/.clang-format b/.clang-format index 48bf157..e0d7241 100644 --- a/.clang-format +++ b/.clang-format @@ -115,5 +115,6 @@ SpacesInSquareBrackets: false StatementMacros: - Q_UNUSED - QT_REQUIRE_VERSION +InsertNewlineAtEOF: true ... diff --git a/.clang-tidy b/.clang-tidy index 8bf217c..db4c130 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,7 +37,8 @@ Checks: > -modernize-avoid-c-arrays, -readability-magic-numbers, -cert-dcl37-c, - -cert-dcl51-cpp + -cert-dcl51-cpp, + -cert-dcl58-cpp, WarningsAsErrors: '' HeaderFilterRegex: '.*' @@ -51,4 +52,7 @@ CheckOptions: value: 'std::vector;.*Iterator' - key: modernize-use-nodiscard.Macros value: 'CUDA_CHECK' + - key: readability-function-cognitive-complexity.IgnoreMacros + # GTest macros can artificially increase the cognitive complexity, so we ignore them. + value: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 285c6dd..054ac28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,9 +69,9 @@ find_package(GTest CONFIG REQUIRED) # C++/CUDA test executable file(GLOB CPP_CUDA_TEST_SOURCES tests/cpp_tests/*.cu tests/cpp_tests/*.cpp) -add_executable(test_add ${CPP_CUDA_TEST_SOURCES}) +add_executable(cpp_tests ${CPP_CUDA_TEST_SOURCES}) -target_link_libraries(test_add +target_link_libraries(cpp_tests PRIVATE genmetaballs_core GTest::gtest @@ -79,9 +79,9 @@ target_link_libraries(test_add ) # Enable CUDA for the test executable -set_target_properties(test_add PROPERTIES +set_target_properties(cpp_tests PROPERTIES CUDA_SEPARABLE_COMPILATION ON ) include(GoogleTest) -gtest_discover_tests(test_add) +gtest_discover_tests(cpp_tests) diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index 9e29186..15cbcc0 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -1,5 +1,6 @@ #include #include +#include #include #include "core/add.cuh" @@ -20,10 +21,10 @@ NB_MODULE(_genmetaballs_bindings, m) { .def_rw("x", &Vec3D::x) .def_rw("y", &Vec3D::y) .def_rw("z", &Vec3D::z) - .def("__add__", &operator+) - .def("__sub__", &operator-) + .def(nb::self + nb::self) + .def(nb::self - nb::self) .def("__repr__", [](const Vec3D& v) { nb::str s = nb::str("Vec3D({}, {}, {})").format(v.x, v.y, v.z); return s; }); -} \ No newline at end of file +} diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh index cf114ae..108cd4d 100644 --- a/genmetaballs/src/cuda/core/utils.cuh +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -1,6 +1,7 @@ #pragma once #include +#include #include #define CUDA_CHECK(x) \ @@ -10,27 +11,38 @@ void cuda_check(cudaError_t code, const char* file, int line); -// XXX container_t should be a thrust container type -template +// Non-owning 2D view into a contiguous array in either host or device memory +template class Array2D { private: - // XXX TODO: make sure this works - container_t data_; + cuda::std::mdspan< + T, cuda::std::extents> + data_view_; public: - __host__ __device__ __forceinline__ container_t& at(const uint32_t i, const uint32_t j) { - return data_; - // return data_[i * width + j]; - } + // constructor + __host__ __device__ constexpr Array2D(T* data, uint32_t rows, uint32_t cols) + : data_view_(data, rows, cols) {} - __host__ __device__ __forceinline__ const container_t& at(const uint32_t i, - const uint32_t j) const { - return data_; - // return data_[i * width + j]; + // accessor methods + __host__ __device__ constexpr T& operator()(uint32_t row, uint32_t col) { + return data_view_(row, col); + } + __host__ __device__ constexpr T operator()(uint32_t row, uint32_t col) const { + return data_view_(row, col); + } + // size methods + __host__ __device__ constexpr auto num_rows() const noexcept { + return data_view_.extent(0); + } + __host__ __device__ constexpr auto num_cols() const noexcept { + return data_view_.extent(1); } - __host__ __device__ constexpr uint32_t size() const { - return 0; - // return width * height; + __host__ __device__ constexpr auto rank() const noexcept { + return data_view_.rank(); + } + __host__ __device__ constexpr auto size() const noexcept { + return data_view_.size(); } }; diff --git a/tests/cpp_tests/test_utils.cu b/tests/cpp_tests/test_utils.cu new file mode 100644 index 0000000..9fb97e8 --- /dev/null +++ b/tests/cpp_tests/test_utils.cu @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "core/utils.cuh" + +// CUDA kernel to fill Array2D with sequential values +__global__ void fill_array2d_kernel(Array2D array2d) { + uint32_t i = threadIdx.x; + uint32_t j = threadIdx.y; + + if (i < array2d.num_rows() && j < array2d.num_cols()) { + array2d(i, j) = i * array2d.num_cols() + j; + } +} + +template +class Array2DTestFixture : public ::testing::Test {}; + +using ContainerTypes = ::testing::Types, thrust::device_vector>; + +TYPED_TEST_SUITE(Array2DTestFixture, ContainerTypes); + +TYPED_TEST(Array2DTestFixture, CreateAndAccessArray2D) { + uint32_t rows = 4; + uint32_t cols = 6; + + auto data = TypeParam(rows * cols); + // create 2D view into the underlying data on host or device + auto array2d = Array2D(thrust::raw_pointer_cast(data.data()), rows, cols); + + if constexpr (std::is_same_v>) { + for (auto i = 0; i < rows; i++) { + for (auto j = 0; j < cols; j++) { + array2d(i, j) = i * cols + j; + } + } + } else { + // Launch kernel to fill Array2D on device + // Note: we could've simply use thrust::sequence to fill the device vector, + // but this is a simple example to demonstrate how to pass an Array2D to a kernel. + fill_array2d_kernel<<<1, dim3(rows, cols)>>>(array2d); + CUDA_CHECK(cudaGetLastError()); + CUDA_CHECK(cudaDeviceSynchronize()); + } + + EXPECT_EQ(array2d.size(), rows * cols); + EXPECT_EQ(array2d.num_rows(), rows); + EXPECT_EQ(array2d.num_cols(), cols); + EXPECT_EQ(array2d.rank(), 2); // 2D array + + // create host vector to verify the data + // for std::vector, this simply duplicate the vector. + // for thrust::device_vector, it will copy the data to the host. + thrust::host_vector host_data = data; + for (auto idx = 0; idx < rows * cols; idx++) { + EXPECT_FLOAT_EQ(host_data[idx], idx); + } +}