|
| 1 | +/* |
| 2 | + mallocMC: Memory Allocator for Many Core Architectures. |
| 3 | +
|
| 4 | + Copyright 2014-2024 Institute of Radiation Physics, |
| 5 | + Helmholtz-Zentrum Dresden - Rossendorf |
| 6 | +
|
| 7 | + Author(s): Carlchristian Eckert - c.eckert ( at ) hzdr.de |
| 8 | + Julian Lenz - j.lenz ( at ) hzdr.de |
| 9 | +
|
| 10 | + Permission is hereby granted, free of charge, to any person obtaining a copy |
| 11 | + of this software and associated documentation files (the "Software"), to deal |
| 12 | + in the Software without restriction, including without limitation the rights |
| 13 | + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 14 | + copies of the Software, and to permit persons to whom the Software is |
| 15 | + furnished to do so, subject to the following conditions: |
| 16 | +
|
| 17 | + The above copyright notice and this permission notice shall be included in |
| 18 | + all copies or substantial portions of the Software. |
| 19 | +
|
| 20 | + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 21 | + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 22 | + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 23 | + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 24 | + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 25 | + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 26 | + THE SOFTWARE. |
| 27 | +*/ |
| 28 | + |
| 29 | +#pragma once |
| 30 | + |
| 31 | +#include <alpaka/alpaka.hpp> |
| 32 | + |
| 33 | +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED |
| 34 | +# include <gallatin/allocators/gallatin.cuh> |
| 35 | +#else |
| 36 | + |
| 37 | +// Construct a fake, so we get a nice error message when we try to use is |
| 38 | +// and it's not in the way when we don't. |
| 39 | +namespace gallatin::allocators |
| 40 | +{ |
| 41 | + template<size_t...> |
| 42 | + struct Gallatin |
| 43 | + { |
| 44 | + static auto generate_on_device(auto...) |
| 45 | + { |
| 46 | + return nullptr; |
| 47 | + } |
| 48 | + }; |
| 49 | +} // namespace gallatin::allocators |
| 50 | + |
| 51 | +#endif |
| 52 | + |
| 53 | +namespace mallocMC |
| 54 | +{ |
| 55 | + namespace CreationPolicies |
| 56 | + { |
| 57 | + /** |
| 58 | + * @brief Prototype integration of Gallatin (https://dl.acm.org/doi/10.1145/3627535.3638499) |
| 59 | + * |
| 60 | + * This CreationPolicy integrates the CUDA code for the Gallatin prototype into mallocMC |
| 61 | + * as a thin wrapper. Its intended for proof-of-principle tests and benchmarks only and |
| 62 | + * obviously only works with on CUDA devices. |
| 63 | + * |
| 64 | + * It also only works with the reservePoolPolicies::Noop beccause it does what CudaSetLimits |
| 65 | + * does internally on its own. |
| 66 | + * |
| 67 | + * If we should ever see the need for it, we'd re-implement it in alpaka for a fully-fletched |
| 68 | + * and well-maintained version of this. |
| 69 | + * Experience has been mixed so far: While we could reproduce good performance in some cases, |
| 70 | + * fragmentation was found to be unusably high (to the point of single-digit utilisaton of |
| 71 | + * available memory) in PIConGPU. That's why there's currently no plan to lift the prototype |
| 72 | + * status in the near future. |
| 73 | + */ |
| 74 | + template< |
| 75 | + typename T_AlignmentPolicy, |
| 76 | + size_t bytes_per_segment = 16ULL * 1024 * 1024, |
| 77 | + size_t smallest_slice = 16, |
| 78 | + size_t largest_slice = 4096> |
| 79 | + class GallatinCudaImpl |
| 80 | + { |
| 81 | + using Gallatin = gallatin::allocators::Gallatin<bytes_per_segment, smallest_slice, largest_slice>; |
| 82 | + |
| 83 | + public: |
| 84 | + template<typename T_AlignmentPolicyLocal> |
| 85 | + using AlignmentAwarePolicy |
| 86 | + = GallatinCudaImpl<T_AlignmentPolicyLocal, bytes_per_segment, smallest_slice, largest_slice>; |
| 87 | + Gallatin* heap{nullptr}; |
| 88 | + |
| 89 | + static constexpr auto providesAvailableSlots = false; |
| 90 | + |
| 91 | + template<typename AlpakaAcc> |
| 92 | + ALPAKA_FN_ACC auto create(AlpakaAcc const& acc, uint32_t bytes) const -> void* |
| 93 | + { |
| 94 | + return heap->malloc(static_cast<size_t>(bytes)); |
| 95 | + } |
| 96 | + |
| 97 | + template<typename AlpakaAcc> |
| 98 | + ALPAKA_FN_ACC void destroy(AlpakaAcc const& /*acc*/, void* mem) const |
| 99 | + { |
| 100 | + heap->free(mem); |
| 101 | + } |
| 102 | + |
| 103 | + ALPAKA_FN_ACC auto isOOM(void* p, size_t s) const -> bool |
| 104 | + { |
| 105 | + return s != 0 && (p == nullptr); |
| 106 | + } |
| 107 | + |
| 108 | + template<typename AlpakaAcc, typename AlpakaDevice, typename AlpakaQueue, typename T_DeviceAllocator> |
| 109 | + static void initHeap( |
| 110 | + AlpakaDevice& dev, |
| 111 | + AlpakaQueue& queue, |
| 112 | + T_DeviceAllocator* devAllocator, |
| 113 | + void*, |
| 114 | + size_t memsize) |
| 115 | + { |
| 116 | + static_assert( |
| 117 | + std::is_same_v<alpaka::AccToTag<AlpakaAcc>, alpaka::TagGpuCudaRt>, |
| 118 | + "The GallatinCuda creation policy is only available on CUDA architectures. Please choose a " |
| 119 | + "different one."); |
| 120 | + |
| 121 | + // This is an extremely hot fix: |
| 122 | + // PIConGPU initialises its allocator with 0 bytes to be able to distribute the pointer. |
| 123 | + // Only afterwards it can find out its actual memory requirements and uses destructiveResize to set |
| 124 | + // the correct heap size. Gallatin runs into issues with this approach. |
| 125 | + // Instead, we simply don't believe the request if it's 0. |
| 126 | + if(memsize == 0) |
| 127 | + return; |
| 128 | + |
| 129 | + auto devHost = alpaka::getDevByIdx(alpaka::PlatformCpu{}, 0); |
| 130 | + using Dim = typename alpaka::trait::DimType<AlpakaAcc>::type; |
| 131 | + using Idx = typename alpaka::trait::IdxType<AlpakaAcc>::type; |
| 132 | + using VecType = alpaka::Vec<Dim, Idx>; |
| 133 | + |
| 134 | + auto tmp = Gallatin::generate_on_device(memsize, 42, true); |
| 135 | + auto workDivSingleThread |
| 136 | + = alpaka::WorkDivMembers<Dim, Idx>{VecType::ones(), VecType::ones(), VecType::ones()}; |
| 137 | + alpaka::exec<AlpakaAcc>( |
| 138 | + queue, |
| 139 | + workDivSingleThread, |
| 140 | + [tmp, devAllocator] ALPAKA_FN_ACC(AlpakaAcc const&) { devAllocator->heap = tmp; }); |
| 141 | + } |
| 142 | + |
| 143 | + static auto classname() -> std::string |
| 144 | + { |
| 145 | + return "GallatinCuda"; |
| 146 | + } |
| 147 | + }; |
| 148 | + |
| 149 | + template< |
| 150 | + size_t bytes_per_segment = 16ULL * 1024 * 1024, |
| 151 | + size_t smallest_slice = 16, |
| 152 | + size_t largest_slice = 4096> |
| 153 | + struct GallatinCuda |
| 154 | + { |
| 155 | + template<typename T_AlignmentPolicy> |
| 156 | + using AlignmentAwarePolicy |
| 157 | + = GallatinCudaImpl<T_AlignmentPolicy, bytes_per_segment, smallest_slice, largest_slice>; |
| 158 | + }; |
| 159 | + |
| 160 | + } // namespace CreationPolicies |
| 161 | +} // namespace mallocMC |
0 commit comments