From a06396a2e39e168cadac7a0f068a3c1a82ba77e3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 17 Mar 2026 03:11:47 -0500 Subject: [PATCH 01/14] Remove device_memory_resource inheritance from all resources and adaptors Remove the device_memory_resource virtual base class inheritance from all production memory resources, adaptors, and stream_ordered_memory_resource. Resources now derive publicly from cuda::mr::shared_resource (for stateful/adaptor types) or stand alone with direct CCCL concept methods (for stateless types). The legacy do_allocate/do_deallocate/do_is_equal virtual overrides and pointer-based per-device-resource APIs are removed. stream_ordered_memory_resource provides allocate/deallocate/allocate_sync/ deallocate_sync directly instead of through the DMR virtual dispatch. All 103 C++ tests and 1165 Python tests pass. --- cpp/include/rmm/cuda_stream.hpp | 9 +- cpp/include/rmm/detail/cccl_adaptors.hpp | 77 +++++- .../rmm/mr/aligned_resource_adaptor.hpp | 43 +-- cpp/include/rmm/mr/arena_memory_resource.hpp | 43 +-- .../rmm/mr/binning_memory_resource.hpp | 45 +-- .../rmm/mr/callback_memory_resource.hpp | 37 +-- .../mr/cuda_async_managed_memory_resource.hpp | 48 +--- .../rmm/mr/cuda_async_memory_resource.hpp | 45 +-- .../mr/cuda_async_view_memory_resource.hpp | 32 +-- cpp/include/rmm/mr/cuda_memory_resource.hpp | 37 ++- .../fixed_size_memory_resource_impl.hpp | 2 +- .../mr/detail/pool_memory_resource_impl.hpp | 2 +- .../detail/stream_ordered_memory_resource.hpp | 40 ++- .../mr/failure_callback_resource_adaptor.hpp | 55 +--- .../rmm/mr/fixed_size_memory_resource.hpp | 43 +-- .../rmm/mr/limiting_resource_adaptor.hpp | 57 +--- .../rmm/mr/logging_resource_adaptor.hpp | 44 +-- .../rmm/mr/managed_memory_resource.hpp | 37 ++- cpp/include/rmm/mr/per_device_resource.hpp | 261 ++---------------- .../rmm/mr/pinned_host_memory_resource.hpp | 37 ++- cpp/include/rmm/mr/pool_memory_resource.hpp | 43 +-- .../rmm/mr/prefetch_resource_adaptor.hpp | 43 +-- .../rmm/mr/sam_headroom_memory_resource.hpp | 43 +-- .../rmm/mr/statistics_resource_adaptor.hpp | 43 +-- cpp/include/rmm/mr/system_memory_resource.hpp | 36 ++- .../rmm/mr/thread_safe_resource_adaptor.hpp | 43 +-- .../rmm/mr/tracking_resource_adaptor.hpp | 43 +-- cpp/src/cuda_stream.cpp | 2 + cpp/src/mr/aligned_resource_adaptor.cpp | 23 -- cpp/src/mr/arena_memory_resource.cpp | 22 -- cpp/src/mr/binning_memory_resource.cpp | 23 -- cpp/src/mr/callback_memory_resource.cpp | 23 -- .../mr/cuda_async_managed_memory_resource.cpp | 26 -- cpp/src/mr/cuda_async_memory_resource.cpp | 25 -- cpp/src/mr/fixed_size_memory_resource.cpp | 23 -- cpp/src/mr/limiting_resource_adaptor.cpp | 33 --- cpp/src/mr/logging_resource_adaptor.cpp | 23 -- cpp/src/mr/pool_memory_resource.cpp | 23 -- cpp/src/mr/prefetch_resource_adaptor.cpp | 23 -- cpp/src/mr/sam_headroom_memory_resource.cpp | 25 -- cpp/src/mr/statistics_resource_adaptor.cpp | 23 -- cpp/src/mr/thread_safe_resource_adaptor.cpp | 23 -- cpp/src/mr/tracking_resource_adaptor.cpp | 23 -- cpp/tests/CMakeLists.txt | 4 +- cpp/tests/device_buffer_tests.cu | 36 +-- cpp/tests/mr/adaptor_tests.cpp | 25 +- cpp/tests/mr/aligned_mr_tests.cpp | 37 ++- cpp/tests/mr/arena_mr_tests.cpp | 24 +- cpp/tests/mr/cuda_async_managed_mr_tests.cpp | 4 +- cpp/tests/mr/cuda_async_mr_tests.cpp | 2 +- .../mr/device_memory_resource_view_tests.cpp | 258 ----------------- cpp/tests/mr/failure_callback_mr_tests.cpp | 15 +- cpp/tests/mr/host_mr_ref_tests.cpp | 27 +- cpp/tests/mr/limiting_mr_tests.cpp | 15 +- cpp/tests/mr/mr_ref_default_tests.cpp | 47 +--- cpp/tests/mr/mr_ref_test.hpp | 10 - cpp/tests/mr/pinned_host_pool_mr_tests.cpp | 10 +- cpp/tests/mr/polymorphic_allocator_tests.cpp | 15 +- cpp/tests/mr/pool_mr_tests.cpp | 31 +-- .../mr/prefetch_resource_adaptor_tests.cpp | 4 +- cpp/tests/mr/statistics_mr_tests.cpp | 5 +- .../mr/stream_allocator_adaptor_tests.cpp | 6 +- cpp/tests/mr/system_mr_tests.cu | 2 +- cpp/tests/mr/tracking_mr_tests.cpp | 5 +- cpp/tests/prefetch_tests.cpp | 8 +- 65 files changed, 354 insertions(+), 1887 deletions(-) delete mode 100644 cpp/tests/mr/device_memory_resource_view_tests.cpp diff --git a/cpp/include/rmm/cuda_stream.hpp b/cpp/include/rmm/cuda_stream.hpp index da620f5c5..cddcc45bd 100644 --- a/cpp/include/rmm/cuda_stream.hpp +++ b/cpp/include/rmm/cuda_stream.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -98,6 +98,13 @@ class cuda_stream { */ operator cuda_stream_view() const; + /** + * @brief Implicit conversion to cuda::stream_ref + * + * @return A stream_ref of the owned stream + */ + operator cuda::stream_ref() const; + /** * @brief Synchronize the owned CUDA stream. * diff --git a/cpp/include/rmm/detail/cccl_adaptors.hpp b/cpp/include/rmm/detail/cccl_adaptors.hpp index d8f6999e0..92e75df48 100644 --- a/cpp/include/rmm/detail/cccl_adaptors.hpp +++ b/cpp/include/rmm/detail/cccl_adaptors.hpp @@ -32,6 +32,34 @@ inline constexpr bool is_specialization_of_v = false; template