Skip to content

Commit 6d1e1ee

Browse files
committed
Remove device_memory_resource and DMR bridge infrastructure
Delete device_memory_resource.hpp and device_memory_resource_view.hpp. Strip DMR bridge code from cccl_adaptors.hpp, keeping shared_resource_cast wrappers. Inline do_allocate/do_deallocate into allocate/deallocate in stream_ordered_memory_resource. Convert benchmarks from shared_ptr<DMR> to any_device_resource. Rewrite test mocks to satisfy CCCL concepts directly, with copyable forwarding wrappers to work around basic_any type-erasure limitations with GMock types. Replace reinterpret_cast stream constructions with cuda_stream_view{}.
1 parent d0f847f commit 6d1e1ee

24 files changed

+315
-811
lines changed

cpp/include/rmm/detail/cccl_adaptors.hpp

Lines changed: 28 additions & 196 deletions
Large diffs are not rendered by default.

cpp/include/rmm/mr/arena_memory_resource.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ namespace mr {
2424
/**
2525
* @brief A suballocator that emphasizes fragmentation avoidance and scalable concurrency support.
2626
*
27-
* Allocation (do_allocate()) and deallocation (do_deallocate()) are thread-safe. Also,
27+
* Allocation and deallocation are thread-safe. Also,
2828
* this class is compatible with CUDA per-thread default stream.
2929
*
3030
* GPU memory is divided into a global arena, per-thread arenas for default streams, and per-stream

cpp/include/rmm/mr/callback_memory_resource.hpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,10 @@ namespace mr {
2828
*
2929
* * Returns a pointer to an allocation of at least `bytes` usable immediately on
3030
* `stream`. The stream-ordered behavior requirements are identical to
31-
* `device_memory_resource::allocate`.
31+
* `allocate`.
3232
*
33-
* * This signature is compatible with `do_allocate` but adds the extra function
34-
* parameter `arg`. The `arg` is provided to the constructor of the
35-
* `callback_memory_resource` and will be forwarded along to every invocation
36-
* of the callback function.
33+
* * The `arg` is provided to the constructor of the `callback_memory_resource`
34+
* and will be forwarded along to every invocation of the callback function.
3735
*/
3836
using allocate_callback_t = std::function<void*(std::size_t, cuda_stream_view, void*)>;
3937

@@ -46,12 +44,10 @@ using allocate_callback_t = std::function<void*(std::size_t, cuda_stream_view, v
4644
* * Deallocates memory pointed to by `ptr`. `bytes` specifies the size of the allocation
4745
* in bytes, and must equal the value of `bytes` that was passed to the allocate callback
4846
* function. The stream-ordered behavior requirements are identical to
49-
* `device_memory_resource::deallocate`.
47+
* `deallocate`.
5048
*
51-
* * This signature is compatible with `do_deallocate` but adds the extra function
52-
* parameter `arg`. The `arg` is provided to the constructor of the
53-
* `callback_memory_resource` and will be forwarded along to every invocation
54-
* of the callback function.
49+
* * The `arg` is provided to the constructor of the `callback_memory_resource`
50+
* and will be forwarded along to every invocation of the callback function.
5551
*/
5652
using deallocate_callback_t = std::function<void(void*, std::size_t, cuda_stream_view, void*)>;
5753

cpp/include/rmm/mr/cuda_async_view_memory_resource.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,6 @@ class cuda_async_view_memory_resource final {
6868
cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) =
6969
default; ///< @default_move_assignment{cuda_async_view_memory_resource}
7070

71-
// -- CCCL memory resource interface (hides device_memory_resource versions) --
72-
7371
/**
7472
* @brief Allocates memory of size at least \p bytes.
7573
*

cpp/include/rmm/mr/cuda_memory_resource.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ class cuda_memory_resource final {
3434
cuda_memory_resource& operator=(cuda_memory_resource&&) =
3535
default; ///< @default_move_assignment{cuda_memory_resource}
3636

37-
// -- CCCL memory resource interface (hides device_memory_resource versions) --
38-
3937
/**
4038
* @brief Allocates memory of size at least \p bytes.
4139
*

cpp/include/rmm/mr/detail/arena.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,6 @@ inline auto max_free_size(std::set<superblock> const& superblocks)
480480
*
481481
* The global arena is a shared memory pool from which other arenas allocate superblocks.
482482
*
483-
* @tparam Upstream Memory resource to use for allocating the arena. Implements
484-
* rmm::mr::device_memory_resource interface.
485483
*/
486484
class global_arena final {
487485
public:
@@ -778,8 +776,6 @@ class global_arena final {
778776
* An arena is a per-thread or per-non-default-stream memory pool. It allocates
779777
* superblocks from the global arena, and returns them when the superblocks become empty.
780778
*
781-
* @tparam Upstream Memory resource to use for allocating the global arena. Implements
782-
* rmm::mr::device_memory_resource interface.
783779
*/
784780
class arena {
785781
public:
@@ -957,8 +953,6 @@ class arena {
957953
*
958954
* This is useful when a thread is about to terminate, and it contains a per-thread arena.
959955
*
960-
* @tparam Upstream Memory resource to use for allocating the global arena. Implements
961-
* rmm::mr::device_memory_resource interface.
962956
*/
963957
class arena_cleaner {
964958
public:

cpp/include/rmm/mr/detail/device_memory_resource_view.hpp

Lines changed: 0 additions & 180 deletions
This file was deleted.

cpp/include/rmm/mr/detail/stream_ordered_memory_resource.hpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -76,39 +76,39 @@ class stream_ordered_memory_resource : public crtp<PoolResource> {
7676
stream_ordered_memory_resource& operator=(stream_ordered_memory_resource&&) = delete;
7777

7878
/**
79-
* @brief Allocates memory of size at least `size` bytes.
79+
* @brief Allocates memory of size at least `bytes` bytes.
8080
*
8181
* The returned pointer has at least 256B alignment.
8282
*
8383
* @throws `std::bad_alloc` if the requested allocation could not be fulfilled
8484
*
8585
* @param stream The stream in which to order this allocation
86-
* @param size The size in bytes of the allocation
86+
* @param bytes The size in bytes of the allocation
8787
* @param alignment Unused; alignment is always at least `CUDA_ALLOCATION_ALIGNMENT`
8888
* @return void* Pointer to the newly allocated memory
8989
*/
90-
void* allocate(cuda::stream_ref stream, std::size_t size, std::size_t /*alignment*/)
90+
void* allocate(cuda::stream_ref stream, std::size_t bytes, std::size_t /*alignment*/)
9191
{
9292
auto const strm = cuda_stream_view{stream};
9393

94-
RMM_LOG_TRACE("[A][stream %s][%zuB]", rmm::detail::format_stream(strm), size);
94+
RMM_LOG_TRACE("[A][stream %s][%zuB]", rmm::detail::format_stream(strm), bytes);
9595

96-
if (size == 0) { return nullptr; }
96+
if (bytes == 0) { return nullptr; }
9797

9898
lock_guard lock(mtx_);
9999

100100
auto stream_event = get_event(strm);
101101

102-
size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT);
103-
RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(),
102+
bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
103+
RMM_EXPECTS(bytes <= this->underlying().get_maximum_allocation_size(),
104104
std::string("Maximum allocation size exceeded (failed to allocate ") +
105-
rmm::detail::format_bytes(size) + ")",
105+
rmm::detail::format_bytes(bytes) + ")",
106106
rmm::out_of_memory);
107-
auto const block = this->underlying().get_block(size, stream_event);
107+
auto const block = this->underlying().get_block(bytes, stream_event);
108108

109109
RMM_LOG_TRACE("[A][stream %s][%zuB][%p]",
110110
rmm::detail::format_stream(stream_event.stream),
111-
size,
111+
bytes,
112112
block.pointer());
113113

114114
log_summary_trace();
@@ -121,29 +121,29 @@ class stream_ordered_memory_resource : public crtp<PoolResource> {
121121
*
122122
* @param stream The stream in which to order this deallocation
123123
* @param ptr Pointer to be deallocated
124-
* @param size The size in bytes of the allocation to deallocate
124+
* @param bytes The size in bytes of the allocation to deallocate
125125
* @param alignment Unused
126126
*/
127127
void deallocate(cuda::stream_ref stream,
128128
void* ptr,
129-
std::size_t size,
129+
std::size_t bytes,
130130
std::size_t /*alignment*/) noexcept
131131
{
132132
auto const strm = cuda_stream_view{stream};
133133

134-
RMM_LOG_TRACE("[D][stream %s][%zuB][%p]", rmm::detail::format_stream(strm), size, ptr);
134+
RMM_LOG_TRACE("[D][stream %s][%zuB][%p]", rmm::detail::format_stream(strm), bytes, ptr);
135135

136-
if (size == 0 || ptr == nullptr) { return; }
136+
if (bytes == 0 || ptr == nullptr) { return; }
137137

138138
lock_guard lock(mtx_);
139139
auto stream_event = get_event(strm);
140140

141-
size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT);
142-
auto const block = this->underlying().free_block(ptr, size);
141+
bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
142+
auto const block = this->underlying().free_block(ptr, bytes);
143143

144144
// TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case
145-
// we may be able to delay recording the event in some situations. But using events rather than
146-
// streams allows stealing from deleted streams.
145+
// we may be able to delay recording the event in some situations. But using events rather
146+
// than streams allows stealing from deleted streams.
147147
RMM_ASSERT_CUDA_SUCCESS(cudaEventRecord(stream_event.event, strm.value()));
148148

149149
stream_free_blocks_[stream_event].insert(block);

0 commit comments

Comments
 (0)