|
18 | 18 |
|
19 | 19 | // Handler for plain, pointer-based CUDA allocations |
20 | 20 | struct BufferMem { |
21 | | - using native_type = CUdeviceptr; |
22 | 21 |
|
23 | | - // If this allocation is a sub-buffer (i.e., a view on an existing |
24 | | - // allocation), this is the pointer to the parent handler structure |
25 | | - ur_mem_handle_t Parent; |
26 | | - // CUDA handler for the pointer |
27 | | - native_type Ptr; |
| 22 | + struct BufferMap { |
| 23 | + /// Size of the active mapped region. |
| 24 | + size_t MapSize; |
| 25 | + /// Offset of the active mapped region. |
| 26 | + size_t MapOffset; |
| 27 | + /// Original flags for the mapped region |
| 28 | + ur_map_flags_t MapFlags; |
| 29 | + /// Allocated host memory used exclusively for this map. |
| 30 | + std::unique_ptr<unsigned char[]> MapMem; |
28 | 31 |
|
29 | | - /// Pointer associated with this device on the host |
30 | | - void *HostPtr; |
31 | | - /// Size of the allocation in bytes |
32 | | - size_t Size; |
33 | | - /// Size of the active mapped region. |
34 | | - size_t MapSize; |
35 | | - /// Offset of the active mapped region. |
36 | | - size_t MapOffset; |
37 | | - /// Pointer to the active mapped region, if any |
38 | | - void *MapPtr; |
39 | | - /// Original flags for the mapped region |
40 | | - ur_map_flags_t MapFlags; |
| 32 | + BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags) |
| 33 | + : MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags), |
| 34 | + MapMem(nullptr) {} |
| 35 | + |
| 36 | + BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags, |
| 37 | + std::unique_ptr<unsigned char[]> &MapMem) |
| 38 | + : MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags), |
| 39 | + MapMem(std::move(MapMem)) {} |
| 40 | + |
| 41 | + size_t getMapSize() const noexcept { return MapSize; } |
| 42 | + |
| 43 | + size_t getMapOffset() const noexcept { return MapOffset; } |
| 44 | + |
| 45 | + ur_map_flags_t getMapFlags() const noexcept { return MapFlags; } |
| 46 | + }; |
41 | 47 |
|
42 | 48 | /** AllocMode |
43 | 49 | * classic: Just a normal buffer allocated on the device via cuda malloc |
44 | 50 | * use_host_ptr: Use an address on the host for the device |
45 | | - * copy_in: The data for the device comes from the host but the host |
46 | | - pointer is not available later for re-use |
47 | | - * alloc_host_ptr: Uses pinned-memory allocation |
48 | | - */ |
| 51 | + * copy_in: The data for the device comes from the host but the host pointer |
| 52 | + * is not available later for re-use alloc_host_ptr: Uses pinned-memory |
| 53 | + * allocation |
| 54 | + */ |
49 | 55 | enum class AllocMode { |
50 | 56 | Classic, |
51 | 57 | UseHostPtr, |
52 | 58 | CopyIn, |
53 | 59 | AllocHostPtr, |
54 | | - } MemAllocMode; |
| 60 | + }; |
| 61 | + |
| 62 | + using native_type = CUdeviceptr; |
| 63 | + |
| 64 | + /// If this allocation is a sub-buffer (i.e., a view on an existing |
| 65 | + /// allocation), this is the pointer to the parent handler structure |
| 66 | + ur_mem_handle_t Parent; |
| 67 | + /// CUDA handler for the pointer |
| 68 | + native_type Ptr; |
| 69 | + /// Pointer associated with this device on the host |
| 70 | + void *HostPtr; |
| 71 | + /// Size of the allocation in bytes |
| 72 | + size_t Size; |
| 73 | + /// A map that contains all the active mappings for this buffer. |
| 74 | + std::unordered_map<void *, BufferMap> PtrToBufferMap; |
| 75 | + |
| 76 | + AllocMode MemAllocMode; |
55 | 77 |
|
56 | 78 | BufferMem(ur_mem_handle_t Parent, BufferMem::AllocMode Mode, CUdeviceptr Ptr, |
57 | 79 | void *HostPtr, size_t Size) |
58 | | - : Parent{Parent}, Ptr{Ptr}, HostPtr{HostPtr}, Size{Size}, MapSize{0}, |
59 | | - MapOffset{0}, MapPtr{nullptr}, MapFlags{UR_MAP_FLAG_WRITE}, |
60 | | - MemAllocMode{Mode} {}; |
| 80 | + : Parent{Parent}, Ptr{Ptr}, HostPtr{HostPtr}, Size{Size}, |
| 81 | + PtrToBufferMap{}, MemAllocMode{Mode} {}; |
61 | 82 |
|
62 | 83 | native_type get() const noexcept { return Ptr; } |
63 | 84 |
|
64 | 85 | size_t getSize() const noexcept { return Size; } |
65 | 86 |
|
66 | | - void *getMapPtr() const noexcept { return MapPtr; } |
67 | | - |
68 | | - size_t getMapSize() const noexcept { return MapSize; } |
69 | | - |
70 | | - size_t getMapOffset() const noexcept { return MapOffset; } |
| 87 | + BufferMap *getMapDetails(void *Map) { |
| 88 | + auto details = PtrToBufferMap.find(Map); |
| 89 | + if (details != PtrToBufferMap.end()) { |
| 90 | + return &details->second; |
| 91 | + } |
| 92 | + return nullptr; |
| 93 | + } |
71 | 94 |
|
72 | 95 | /// Returns a pointer to data visible on the host that contains |
73 | 96 | /// the data on the device associated with this allocation. |
74 | 97 | /// The offset is used to index into the CUDA allocation. |
75 | | - void *mapToPtr(size_t Size, size_t Offset, ur_map_flags_t Flags) noexcept { |
76 | | - assert(MapPtr == nullptr); |
77 | | - MapSize = Size; |
78 | | - MapOffset = Offset; |
79 | | - MapFlags = Flags; |
80 | | - if (HostPtr) { |
81 | | - MapPtr = static_cast<char *>(HostPtr) + Offset; |
| 98 | + void *mapToPtr(size_t MapSize, size_t MapOffset, |
| 99 | + ur_map_flags_t MapFlags) noexcept { |
| 100 | + |
| 101 | + void *MapPtr = nullptr; |
| 102 | + if (HostPtr == nullptr) { |
| 103 | + /// If HostPtr is invalid, we need to create a Mapping that owns its own |
| 104 | + /// memory on the host. |
| 105 | + auto MapMem = std::make_unique<unsigned char[]>(MapSize); |
| 106 | + MapPtr = MapMem.get(); |
| 107 | + PtrToBufferMap.insert( |
| 108 | + {MapPtr, BufferMap(MapSize, MapOffset, MapFlags, MapMem)}); |
82 | 109 | } else { |
83 | | - // TODO: Allocate only what is needed based on the offset |
84 | | - MapPtr = static_cast<void *>(malloc(this->getSize())); |
| 110 | + /// However, if HostPtr already has valid memory (e.g. pinned allocation), |
| 111 | + /// we can just use that memory for the mapping. |
| 112 | + MapPtr = static_cast<char *>(HostPtr) + MapOffset; |
| 113 | + PtrToBufferMap.insert({MapPtr, BufferMap(MapSize, MapOffset, MapFlags)}); |
85 | 114 | } |
86 | 115 | return MapPtr; |
87 | 116 | } |
88 | 117 |
|
89 | 118 | /// Detach the allocation from the host memory. |
90 | | - void unmap(void *) noexcept { |
91 | | - assert(MapPtr != nullptr); |
92 | | - |
93 | | - if (MapPtr != HostPtr) { |
94 | | - free(MapPtr); |
95 | | - } |
96 | | - MapPtr = nullptr; |
97 | | - MapSize = 0; |
98 | | - MapOffset = 0; |
99 | | - } |
100 | | - |
101 | | - ur_map_flags_t getMapFlags() const noexcept { |
| 119 | + void unmap(void *MapPtr) noexcept { |
102 | 120 | assert(MapPtr != nullptr); |
103 | | - return MapFlags; |
| 121 | + PtrToBufferMap.erase(MapPtr); |
104 | 122 | } |
105 | 123 | }; |
106 | 124 |
|
|
0 commit comments