Skip to content

Commit 9884edf

Browse files
assistant-librarian[bot]AlexVlx
authored andcommitted
Re-work and enhance allocation interposition mode (#122)
The initial implementation of allocation interposition was overly complicated and had issues that could lead to cycles during static initialisation. This patch reworks it to: - prevent potential cycles by only calling into HIP APIs once HIP init has completed; - use the new `hipMemAdvise` API on system allocated memory rather than `hipMallocManaged`, which is cheaper and more robust as it does not break calling non-interposed `free` on the allocation; - interpose `mmap` / `munmap`; - make the stack accessible. This is a transparent change (the new header will still work with older compilers) and is paired with corresponding compiler updates (which will enable the new capability by setting the `__HIPSTDPAR_INTERPOSE_ALLOC_V1__` macro). --- 🔁 Imported from [ROCm/rocThrust#543](ROCm/rocThrust#543) 🧑‍💻 Originally authored by @AlexVlx --------- Co-authored-by: Alex Voicu <[email protected]> Co-authored-by: assistant-librarian[bot] <assistant-librarian[bot]@users.noreply.github.com>
1 parent 8e9e829 commit 9884edf

File tree

3 files changed

+284
-3
lines changed

3 files changed

+284
-3
lines changed

projects/rocthrust/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@
4141
#if defined(__HIPSTDPAR__)
4242

4343
// Interposed allocations
44-
# if defined(__HIPSTDPAR_INTERPOSE_ALLOC__)
45-
# include "impl/interpose_allocations.hpp"
46-
# endif
44+
#if defined(__HIPSTDPAR_INTERPOSE_ALLOC_V1__)
45+
# include "impl/interpose_allocations_v1.hpp"
46+
#elif defined(__HIPSTDPAR_INTERPOSE_ALLOC__)
47+
# include "impl/interpose_allocations_v0.hpp"
48+
#endif
4749
// Parallel STL algorithms
4850
# include "impl/batch.hpp"
4951
# include "impl/copy.hpp"
Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
15+
/*! \file thrust/system/hip/interpose_allocations.hpp
16+
* \brief Interposed allocations/deallocations implementation detail header for HIPSTDPAR.
17+
*/
18+
19+
#pragma once
20+
21+
#if defined(__HIPSTDPAR__)
22+
#if defined(__HIPSTDPAR_INTERPOSE_ALLOC_V1__)
23+
#include <hip/hip_runtime.h>
24+
25+
#if __has_include(<pthread.h>)
26+
#include <pthread.h>
27+
#define __HIPSTDPAR_INTERPOSE_ALLOC_HAS_STACK_ACCESS__
28+
#endif
29+
#if __has_include(<sys/mman.h>)
30+
#include <sys/mman.h>
31+
#endif
32+
#if __has_include(<sys/unistd.h>)
33+
#include <sys/unistd.h>
34+
#endif
35+
36+
#include <climits>
37+
#include <cstddef>
38+
#include <cstdint>
39+
#include <cstring>
40+
#include <iostream>
41+
#include <memory>
42+
#include <stdexcept>
43+
#include <utility>
44+
45+
extern "C" {
46+
__attribute__((weak)) void __hipstdpar_hidden_free(void*);
47+
__attribute__((weak)) void* __hipstdpar_hidden_memalign(::std::size_t,
48+
::std::size_t);
49+
#if defined(_POSIX_MAPPED_FILES)
50+
#define __HIPSTDPAR_INTERPOSE_ALLOC_CAN_MMAP__
51+
__attribute__((weak))
52+
void* __hipstdpar_hidden_mmap(
53+
void*, ::std::size_t, int, int, int, ::off_t) noexcept;
54+
__attribute__((weak))
55+
int __hipstdpar_hidden_munmap(void*, ::std::size_t) noexcept;
56+
#endif // _POSIX_MAPPED_FILES
57+
}
58+
59+
namespace hipstd
60+
{
61+
inline static const bool __initialised{hipInit(0) == hipSuccess};
62+
63+
#if defined(__HIPSTDPAR_INTERPOSE_ALLOC_HAS_STACK_ACCESS__)
64+
class Stack_accessor final {
65+
// DATA
66+
::std::uint64_t* ps_{};
67+
::std::size_t n_{};
68+
::std::int32_t d_{};
69+
70+
// IMPLEMENTATION - ACCESSORS
71+
bool touch_stack_() const
72+
{ // Due to how the kernel manages memory, we have to pre-access.
73+
::std::uint64_t r{1};
74+
for (auto i = 0u; i != n_ / sizeof(*ps_); ++i) r += ps_[i];
75+
return r;
76+
}
77+
public:
78+
// CREATORS
79+
Stack_accessor()
80+
{
81+
pthread_attr_t t{};
82+
if (pthread_getattr_np(pthread_self(), &t)) {
83+
throw ::std::runtime_error("Failed to get thread attributes.");
84+
}
85+
if (pthread_attr_getstack(&t, reinterpret_cast<void**>(&ps_), &n_)) {
86+
throw ::std::runtime_error(
87+
"Failed to get thread stack attributes.");
88+
}
89+
if (!ps_ || n_ == 0)
90+
return;
91+
if (hipGetDevice(&d_) != hipSuccess) {
92+
throw ::std::runtime_error(
93+
"Failed to retrieve accelerator for HIPSTDPAR");
94+
}
95+
if (touch_stack_() &&
96+
hipMemAdvise(ps_, n_, hipMemAdviseSetAccessedBy, d_) != hipSuccess) {
97+
throw ::std::runtime_error(
98+
"Failed to make thread stack accessible.");
99+
}
100+
}
101+
~Stack_accessor()
102+
{
103+
if (!ps_ || n_ == 0) return;
104+
if (hipMemAdvise(ps_, n_, hipMemAdviseUnsetAccessedBy, d_) != hipSuccess) {
105+
::std::cerr << "Failed to unset thread stack accessibility." <<
106+
::std::endl;
107+
}
108+
}
109+
};
110+
inline Stack_accessor __main_stack_accessor{};
111+
inline thread_local Stack_accessor __thread_stack_accessor{};
112+
#endif // __HIPSTDPAR_INTERPOSE_ALLOC_HAS_STACK_ACCESS__
113+
} // Namespace hipstd.
114+
115+
extern "C" {
116+
inline __attribute__((used)) void* __hipstdpar_aligned_alloc(std::size_t a,
117+
std::size_t n)
118+
{
119+
auto r = __hipstdpar_hidden_memalign(a, n);
120+
121+
if (!hipstd::__initialised) return r;
122+
123+
hipDevice_t d{};
124+
hipGetDevice(&d);
125+
126+
if (hipMemAdvise(r, n, hipMemAdviseSetAccessedBy, d) != hipSuccess)
127+
return nullptr;
128+
129+
return r;
130+
}
131+
132+
inline __attribute__((used)) void* __hipstdpar_malloc(std::size_t n)
133+
{
134+
return __hipstdpar_aligned_alloc(alignof(std::max_align_t), n);
135+
}
136+
137+
inline __attribute__((used)) void* __hipstdpar_calloc(std::size_t n,
138+
std::size_t sz)
139+
{
140+
return ::std::memset(__hipstdpar_malloc(n * sz), 0, n * sz);
141+
}
142+
143+
inline __attribute__((used))
144+
int __hipstdpar_posix_aligned_alloc(void** p, std::size_t a, std::size_t n)
145+
{ // TODO: check invariants on alignment
146+
if (!p || n == 0) return 0;
147+
148+
*p = __hipstdpar_aligned_alloc(a, n);
149+
150+
return 1;
151+
}
152+
153+
inline __attribute__((used)) void __hipstdpar_free(void* p)
154+
{
155+
if (hipstd::__initialised) {
156+
hipDevice_t d{};
157+
hipGetDevice(&d);
158+
159+
// Even if this fails there isn't much to do.
160+
hipMemAdvise(p, UINT64_MAX, hipMemAdviseUnsetAccessedBy, d);
161+
}
162+
return __hipstdpar_hidden_free(p);
163+
}
164+
165+
inline __attribute__((used)) void* __hipstdpar_realloc(void* p,
166+
std::size_t n)
167+
{
168+
auto q = std::memcpy(__hipstdpar_malloc(n), p, n);
169+
__hipstdpar_free(p);
170+
171+
return q;
172+
}
173+
174+
inline __attribute__((used))
175+
void* __hipstdpar_realloc_array(void* p, std::size_t n, std::size_t sz)
176+
{ // TODO: handle overflow in n * sz gracefully, as per spec.
177+
return __hipstdpar_realloc(p, n * sz);
178+
}
179+
180+
inline __attribute__((used))
181+
void* __hipstdpar_operator_new_aligned(std::size_t n, std::size_t a)
182+
{
183+
if (auto p = __hipstdpar_aligned_alloc(a, n)) return p;
184+
185+
throw std::runtime_error{"Failed __hipstdpar_operator_new_aligned"};
186+
}
187+
188+
inline __attribute__((used)) void* __hipstdpar_operator_new(std::size_t n)
189+
{ // TODO: consider adding the special handling for operator new
190+
return __hipstdpar_operator_new_aligned(n, alignof(std::max_align_t));
191+
}
192+
193+
inline __attribute__((used)) void* __hipstdpar_operator_new_nothrow(
194+
std::size_t n, std::nothrow_t) noexcept
195+
{
196+
try {
197+
return __hipstdpar_operator_new(n);
198+
}
199+
catch (...) {
200+
// TODO: handle the potential exception
201+
}
202+
}
203+
204+
inline __attribute__((used)) void* __hipstdpar_operator_new_aligned_nothrow(
205+
std::size_t n, std::size_t a, std::nothrow_t) noexcept
206+
{ // TODO: consider adding the special handling for operator new
207+
try {
208+
return __hipstdpar_operator_new_aligned(n, a);
209+
}
210+
catch (...) {
211+
// TODO: handle the potential exception.
212+
}
213+
}
214+
215+
inline __attribute__((used)) void __hipstdpar_operator_delete_aligned_sized(
216+
void* p, std::size_t, std::size_t) noexcept
217+
{
218+
return __hipstdpar_free(p);
219+
}
220+
221+
inline __attribute__((used))
222+
void __hipstdpar_operator_delete(void* p) noexcept
223+
{
224+
return __hipstdpar_free(p);
225+
}
226+
227+
inline __attribute__((used))
228+
void __hipstdpar_operator_delete_aligned(void* p, std::size_t) noexcept
229+
{
230+
return __hipstdpar_free(p);
231+
}
232+
233+
inline __attribute__((used))
234+
void __hipstdpar_operator_delete_sized(void* p, std::size_t n) noexcept
235+
{
236+
return __hipstdpar_operator_delete_aligned_sized(
237+
p, n, alignof(std::max_align_t));
238+
}
239+
240+
#if defined(__HIPSTDPAR_INTERPOSE_ALLOC_CAN_MMAP__)
241+
inline __attribute__((used))
242+
void* __hipstdpar_mmap(void* p, std::size_t n, int prot, int f, int fd,
243+
off_t dx) noexcept
244+
{
245+
if (auto r = __hipstdpar_hidden_mmap(p, n, prot, f, fd, dx)) {
246+
if (!hipstd::__initialised) return r;
247+
248+
hipDevice_t d{};
249+
hipGetDevice(&d);
250+
251+
if (hipMemAdvise(r, n, hipMemAdviseSetAccessedBy, d) != hipSuccess)
252+
return nullptr;
253+
254+
return r;
255+
}
256+
return nullptr;
257+
}
258+
259+
inline __attribute__((used))
260+
int __hipstdpar_munmap(void* p, std::size_t n) noexcept
261+
{
262+
if (hipstd::__initialised) {
263+
hipDevice_t d{};
264+
hipGetDevice(&d);
265+
266+
if (hipMemAdvise(p, n, hipMemAdviseUnsetAccessedBy, d) != hipSuccess)
267+
return -1;
268+
}
269+
return __hipstdpar_hidden_munmap(p, n);
270+
}
271+
#endif // __HIPSTDPAR_INTERPOSE_ALLOC_CAN_MMAP__
272+
} // extern "C"
273+
# else // __HIPSTDPAR_INTERPOSE_ALLOC_V1__
274+
# error "__HIPSTDPAR_INTERPOSE_ALLOC_V1__ should be defined. Please use the '--hipstdpar-interpose-alloc' compile option."
275+
# endif // __HIPSTDPAR_INTERPOSE_ALLOC_V1__
276+
277+
#else // __HIPSTDPAR__
278+
# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option."
279+
#endif // __HIPSTDPAR__

0 commit comments

Comments
 (0)