Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 28 additions & 196 deletions cpp/include/rmm/detail/cccl_adaptors.hpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cpp/include/rmm/mr/arena_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace mr {
/**
* @brief A suballocator that emphasizes fragmentation avoidance and scalable concurrency support.
*
* Allocation (do_allocate()) and deallocation (do_deallocate()) are thread-safe. Also,
* Allocation and deallocation are thread-safe. Also,
* this class is compatible with CUDA per-thread default stream.
*
* GPU memory is divided into a global arena, per-thread arenas for default streams, and per-stream
Expand Down
16 changes: 6 additions & 10 deletions cpp/include/rmm/mr/callback_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,10 @@ namespace mr {
*
* * Returns a pointer to an allocation of at least `bytes` usable immediately on
* `stream`. The stream-ordered behavior requirements are identical to
* `device_memory_resource::allocate`.
* `allocate`.
*
* * This signature is compatible with `do_allocate` but adds the extra function
* parameter `arg`. The `arg` is provided to the constructor of the
* `callback_memory_resource` and will be forwarded along to every invocation
* of the callback function.
* * The `arg` is provided to the constructor of the `callback_memory_resource`
* and will be forwarded along to every invocation of the callback function.
*/
using allocate_callback_t = std::function<void*(std::size_t, cuda_stream_view, void*)>;

Expand All @@ -46,12 +44,10 @@ using allocate_callback_t = std::function<void*(std::size_t, cuda_stream_view, v
* * Deallocates memory pointed to by `ptr`. `bytes` specifies the size of the allocation
* in bytes, and must equal the value of `bytes` that was passed to the allocate callback
* function. The stream-ordered behavior requirements are identical to
* `device_memory_resource::deallocate`.
* `deallocate`.
*
* * This signature is compatible with `do_deallocate` but adds the extra function
* parameter `arg`. The `arg` is provided to the constructor of the
* `callback_memory_resource` and will be forwarded along to every invocation
* of the callback function.
* * The `arg` is provided to the constructor of the `callback_memory_resource`
* and will be forwarded along to every invocation of the callback function.
*/
using deallocate_callback_t = std::function<void(void*, std::size_t, cuda_stream_view, void*)>;

Expand Down
2 changes: 0 additions & 2 deletions cpp/include/rmm/mr/cuda_async_view_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ class cuda_async_view_memory_resource final {
cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) =
default; ///< @default_move_assignment{cuda_async_view_memory_resource}

// -- CCCL memory resource interface (hides device_memory_resource versions) --

/**
* @brief Allocates memory of size at least \p bytes.
*
Expand Down
2 changes: 0 additions & 2 deletions cpp/include/rmm/mr/cuda_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ class cuda_memory_resource final {
cuda_memory_resource& operator=(cuda_memory_resource&&) =
default; ///< @default_move_assignment{cuda_memory_resource}

// -- CCCL memory resource interface (hides device_memory_resource versions) --

/**
* @brief Allocates memory of size at least \p bytes.
*
Expand Down
6 changes: 0 additions & 6 deletions cpp/include/rmm/mr/detail/arena.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,8 +480,6 @@ inline auto max_free_size(std::set<superblock> const& superblocks)
*
* The global arena is a shared memory pool from which other arenas allocate superblocks.
*
* @tparam Upstream Memory resource to use for allocating the arena. Implements
* rmm::mr::device_memory_resource interface.
*/
class global_arena final {
public:
Expand Down Expand Up @@ -778,8 +776,6 @@ class global_arena final {
* An arena is a per-thread or per-non-default-stream memory pool. It allocates
* superblocks from the global arena, and returns them when the superblocks become empty.
*
* @tparam Upstream Memory resource to use for allocating the global arena. Implements
* rmm::mr::device_memory_resource interface.
*/
class arena {
public:
Expand Down Expand Up @@ -957,8 +953,6 @@ class arena {
*
* This is useful when a thread is about to terminate, and it contains a per-thread arena.
*
* @tparam Upstream Memory resource to use for allocating the global arena. Implements
* rmm::mr::device_memory_resource interface.
*/
class arena_cleaner {
public:
Expand Down
180 changes: 0 additions & 180 deletions cpp/include/rmm/mr/detail/device_memory_resource_view.hpp

This file was deleted.

36 changes: 18 additions & 18 deletions cpp/include/rmm/mr/detail/stream_ordered_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,39 +76,39 @@ class stream_ordered_memory_resource : public crtp<PoolResource> {
stream_ordered_memory_resource& operator=(stream_ordered_memory_resource&&) = delete;

/**
* @brief Allocates memory of size at least `size` bytes.
* @brief Allocates memory of size at least `bytes` bytes.
*
* The returned pointer has at least 256B alignment.
*
* @throws `std::bad_alloc` if the requested allocation could not be fulfilled
*
* @param stream The stream in which to order this allocation
* @param size The size in bytes of the allocation
* @param bytes The size in bytes of the allocation
* @param alignment Unused; alignment is always at least `CUDA_ALLOCATION_ALIGNMENT`
* @return void* Pointer to the newly allocated memory
*/
void* allocate(cuda::stream_ref stream, std::size_t size, std::size_t /*alignment*/)
void* allocate(cuda::stream_ref stream, std::size_t bytes, std::size_t /*alignment*/)
{
auto const strm = cuda_stream_view{stream};

RMM_LOG_TRACE("[A][stream %s][%zuB]", rmm::detail::format_stream(strm), size);
RMM_LOG_TRACE("[A][stream %s][%zuB]", rmm::detail::format_stream(strm), bytes);

if (size == 0) { return nullptr; }
if (bytes == 0) { return nullptr; }

lock_guard lock(mtx_);

auto stream_event = get_event(strm);

size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT);
RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(),
bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
RMM_EXPECTS(bytes <= this->underlying().get_maximum_allocation_size(),
std::string("Maximum allocation size exceeded (failed to allocate ") +
rmm::detail::format_bytes(size) + ")",
rmm::detail::format_bytes(bytes) + ")",
rmm::out_of_memory);
auto const block = this->underlying().get_block(size, stream_event);
auto const block = this->underlying().get_block(bytes, stream_event);

RMM_LOG_TRACE("[A][stream %s][%zuB][%p]",
rmm::detail::format_stream(stream_event.stream),
size,
bytes,
block.pointer());

log_summary_trace();
Expand All @@ -121,29 +121,29 @@ class stream_ordered_memory_resource : public crtp<PoolResource> {
*
* @param stream The stream in which to order this deallocation
* @param ptr Pointer to be deallocated
* @param size The size in bytes of the allocation to deallocate
* @param bytes The size in bytes of the allocation to deallocate
* @param alignment Unused
*/
void deallocate(cuda::stream_ref stream,
void* ptr,
std::size_t size,
std::size_t bytes,
std::size_t /*alignment*/) noexcept
{
auto const strm = cuda_stream_view{stream};

RMM_LOG_TRACE("[D][stream %s][%zuB][%p]", rmm::detail::format_stream(strm), size, ptr);
RMM_LOG_TRACE("[D][stream %s][%zuB][%p]", rmm::detail::format_stream(strm), bytes, ptr);

if (size == 0 || ptr == nullptr) { return; }
if (bytes == 0 || ptr == nullptr) { return; }

lock_guard lock(mtx_);
auto stream_event = get_event(strm);

size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT);
auto const block = this->underlying().free_block(ptr, size);
bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
auto const block = this->underlying().free_block(ptr, bytes);

// TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case
// we may be able to delay recording the event in some situations. But using events rather than
// streams allows stealing from deleted streams.
// we may be able to delay recording the event in some situations. But using events rather
// than streams allows stealing from deleted streams.
RMM_ASSERT_CUDA_SUCCESS(cudaEventRecord(stream_event.event, strm.value()));

stream_free_blocks_[stream_event].insert(block);
Expand Down
Loading
Loading