Skip to content

Commit b410c40

Browse files
committed
rocr: Ensure globals are initialized at first use
When ROCr is built as a static library, global variables were often not initialized to valid values at their first use. This change addresses that problem. Change-Id: I550fa41feb3bc04b9cc686bcfb4acf2a7b651a88
1 parent 80da7d5 commit b410c40

36 files changed

+323
-233
lines changed

runtime/hsa-runtime/core/common/shared.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444

4545
namespace rocr {
4646
namespace core {
47-
std::function<void*(size_t, size_t, uint32_t, int)> BaseShared::allocate_ = nullptr;
48-
std::function<void(void*)> BaseShared::free_ = nullptr;
47+
4948
} // namespace core
5049
} // namespace rocr

runtime/hsa-runtime/core/common/shared.h

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -58,25 +58,36 @@ namespace core {
5858
class BaseShared {
5959
public:
6060
static void SetAllocateAndFree(
61-
const std::function<void*(size_t, size_t, uint32_t, int)>& allocate,
62-
const std::function<void(void*)>& free) {
63-
allocate_ = allocate;
64-
free_ = free;
61+
const std::function<void*(size_t, size_t, uint32_t, int)>& alloc,
62+
const std::function<void(void*)>& fr) {
63+
allocate_() = alloc;
64+
free_() = fr;
6565
}
6666

6767
protected:
68-
static std::function<void*(size_t, size_t, uint32_t, int)> allocate_;
69-
static std::function<void(void*)> free_;
68+
static __forceinline std::function<void*(size_t, size_t, uint32_t, int)>&
69+
allocate_() {
70+
static std::function<void*(size_t, size_t, uint32_t, int)> alloc =
71+
nullptr;
72+
return alloc;
73+
}
74+
static __forceinline std::function<void(void*)>&
75+
free_() {
76+
static std::function<void(void*)> fr = nullptr;
77+
return fr;
78+
}
79+
7080
};
7181

7282
/// @brief Default Allocator for Shared. Ensures allocations are whole pages.
7383
template <typename T> class PageAllocator : private BaseShared {
7484
public:
7585
__forceinline static T* alloc(int flags = 0) {
76-
T* ret = reinterpret_cast<T*>(allocate_(AlignUp(sizeof(T), 4096), 4096, flags, 0));
86+
T* ret = reinterpret_cast<T*>(
87+
allocate_()(AlignUp(sizeof(T), 4096), 4096, flags, 0));
7788
if (ret == nullptr) throw std::bad_alloc();
7889

79-
MAKE_NAMED_SCOPE_GUARD(throwGuard, [&]() { free_(ret); });
90+
MAKE_NAMED_SCOPE_GUARD(throwGuard, [&]() { free_()(ret); });
8091

8192
new (ret) T;
8293

@@ -85,10 +96,11 @@ template <typename T> class PageAllocator : private BaseShared {
8596
}
8697

8798
__forceinline static T* alloc(int agent_node_id, int flags) {
88-
T* ret = reinterpret_cast<T*>(allocate_(AlignUp(sizeof(T), 4096), 4096, flags, agent_node_id));
99+
T* ret = reinterpret_cast<T*>(
100+
allocate_()(AlignUp(sizeof(T), 4096), 4096, flags, agent_node_id));
89101
if (ret == nullptr) throw std::bad_alloc();
90102

91-
MAKE_NAMED_SCOPE_GUARD(throwGuard, [&]() { free_(ret); });
103+
MAKE_NAMED_SCOPE_GUARD(throwGuard, [&]() { free_()(ret); });
92104

93105
new (ret) T;
94106

@@ -99,7 +111,7 @@ template <typename T> class PageAllocator : private BaseShared {
99111
__forceinline static void free(T* ptr) {
100112
if (ptr != nullptr) {
101113
ptr->~T();
102-
free_(ptr);
114+
free_()(ptr);
103115
}
104116
}
105117
};
@@ -110,7 +122,7 @@ template <typename T, typename Allocator = PageAllocator<T>>
110122
class Shared final : private BaseShared {
111123
public:
112124
explicit Shared(Allocator* pool = nullptr, int flags = 0) : pool_(pool) {
113-
assert(allocate_ != nullptr && free_ != nullptr &&
125+
assert(allocate_() != nullptr && free_() != nullptr &&
114126
"Shared object allocator is not set");
115127

116128
if (pool_)
@@ -120,7 +132,7 @@ class Shared final : private BaseShared {
120132
}
121133

122134
explicit Shared(int agent_node_id, Allocator* pool = nullptr, int flags = 0) : pool_(pool) {
123-
assert(allocate_ != nullptr && free_ != nullptr &&
135+
assert(allocate_() != nullptr && free_() != nullptr &&
124136
"Shared object allocator is not set");
125137

126138
if (pool_)
@@ -130,7 +142,8 @@ class Shared final : private BaseShared {
130142
}
131143

132144
~Shared() {
133-
assert(allocate_ != nullptr && free_ != nullptr && "Shared object allocator is not set");
145+
assert(allocate_() != nullptr && free_() != nullptr &&
146+
"Shared object allocator is not set");
134147

135148
if (pool_)
136149
pool_->free(shared_object_);
@@ -164,19 +177,20 @@ class Shared final : private BaseShared {
164177
template <typename T> class Shared<T, PageAllocator<T>> final : private BaseShared {
165178
public:
166179
Shared(int flags = 0) {
167-
assert(allocate_ != nullptr && free_ != nullptr && "Shared object allocator is not set");
180+
assert(allocate_() != nullptr && free_() != nullptr &&
181+
"Shared object allocator is not set");
168182

169183
shared_object_ = PageAllocator<T>::alloc(flags);
170184
}
171185

172186
Shared(int agent_node_id, int flags) {
173-
assert(allocate_ != nullptr && free_ != nullptr && "Shared object allocator is not set");
187+
assert(allocate_() != nullptr && free_() != nullptr && "Shared object allocator is not set");
174188

175189
shared_object_ = PageAllocator<T>::alloc(agent_node_id, flags);
176190
}
177191

178192
~Shared() {
179-
assert(allocate_ != nullptr && free_ != nullptr &&
193+
assert(allocate_() != nullptr && free_() != nullptr &&
180194
"Shared object allocator is not set");
181195

182196
PageAllocator<T>::free(shared_object_);
@@ -207,18 +221,19 @@ template <typename T, size_t Align> class SharedArray final : private BaseShared
207221
SharedArray() : shared_object_(nullptr) {}
208222

209223
explicit SharedArray(size_t length) : shared_object_(nullptr), len(length) {
210-
assert(allocate_ != nullptr && free_ != nullptr && "Shared object allocator is not set");
224+
assert(allocate_() != nullptr && free_() != nullptr &&
225+
"Shared object allocator is not set");
211226
static_assert((__alignof(T) <= Align) || (Align == 0), "Align is less than alignof(T)");
212227

213228
shared_object_ =
214-
reinterpret_cast<T*>(allocate_(sizeof(T) * length, Max(__alignof(T), Align), 0, 0));
229+
reinterpret_cast<T*>(allocate_()(sizeof(T) * length, Max(__alignof(T), Align), 0, 0));
215230
if (shared_object_ == nullptr) throw std::bad_alloc();
216231

217232
size_t i = 0;
218233

219234
MAKE_NAMED_SCOPE_GUARD(loopGuard, [&]() {
220235
for (size_t t = 0; t < i - 1; t++) shared_object_[t].~T();
221-
free_(shared_object_);
236+
free_()(shared_object_);
222237
});
223238

224239
for (; i < length; i++) new (&shared_object_[i]) T;
@@ -227,11 +242,12 @@ template <typename T, size_t Align> class SharedArray final : private BaseShared
227242
}
228243

229244
~SharedArray() {
230-
assert(allocate_ != nullptr && free_ != nullptr && "Shared object allocator is not set");
245+
assert(allocate_() != nullptr && free_() != nullptr &&
246+
"Shared object allocator is not set");
231247

232248
if (shared_object_ != nullptr) {
233249
for (size_t i = 0; i < len; i++) shared_object_[i].~T();
234-
free_(shared_object_);
250+
free_()(shared_object_);
235251
}
236252
}
237253

runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ class AieAqlQueue : public core::Queue,
6262
core::DoorbellSignal {
6363
public:
6464
static __forceinline bool IsType(core::Signal *signal) {
65-
return signal->IsType(&rtti_id_);
65+
return signal->IsType(&rtti_id());
6666
}
6767

6868
static __forceinline bool IsType(core::Queue *queue) {
69-
return queue->IsType(&rtti_id_);
69+
return queue->IsType(&rtti_id());
7070
}
7171

7272
AieAqlQueue() = delete;
@@ -123,7 +123,7 @@ class AieAqlQueue : public core::Queue,
123123
uint32_t queue_size_bytes_ = std::numeric_limits<uint32_t>::max();
124124

125125
protected:
126-
bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id_; }
126+
bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id(); }
127127

128128
private:
129129
AieAgent &agent_;
@@ -144,7 +144,11 @@ class AieAqlQueue : public core::Queue,
144144

145145
/// @brief Indicates if queue is active.
146146
std::atomic<bool> active_;
147-
static int rtti_id_;
147+
static __forceinline int& rtti_id() {
148+
static int rtti_id_ = 0;
149+
return rtti_id_;
150+
}
151+
148152
};
149153

150154
} // namespace AMD

runtime/hsa-runtime/core/inc/amd_aql_queue.h

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@ namespace AMD {
5757
class AqlQueue : public core::Queue, private core::LocalSignal, public core::DoorbellSignal {
5858
public:
5959
static __forceinline bool IsType(core::Signal* signal) {
60-
return signal->IsType(&rtti_id_);
60+
return signal->IsType(&rtti_id());
6161
}
6262

63-
static __forceinline bool IsType(core::Queue* queue) { return queue->IsType(&rtti_id_); }
63+
static __forceinline bool IsType(core::Queue* queue) { return queue->IsType(&rtti_id()); }
6464

6565
// Acquires/releases queue resources and requests HW schedule/deschedule.
6666
AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id,
@@ -228,7 +228,7 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo
228228
void AsyncReclaimAltScratch();
229229

230230
protected:
231-
bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id_; }
231+
bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id(); }
232232

233233
private:
234234
uint32_t ComputeRingBufferMinPkts();
@@ -331,18 +331,28 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo
331331
std::vector<uint32_t> cu_mask_;
332332

333333
// Shared event used for queue errors
334-
static HsaEvent* queue_event_;
335-
334+
static __forceinline HsaEvent*& queue_event() {
335+
static HsaEvent* queue_event_ = nullptr;
336+
return queue_event_;
337+
}
336338
// Queue count - used to ref count queue_event_
337-
static std::atomic<uint32_t> queue_count_;
339+
static __forceinline std::atomic<uint32_t>& queue_count() {
340+
static std::atomic<uint32_t> queue_count_(0);
341+
return queue_count_;
342+
}
338343

339344
// Mutex for queue_event_ manipulation
340-
static KernelMutex queue_lock_;
341-
345+
static __forceinline KernelMutex& queue_lock() {
346+
static KernelMutex queue_lock_;
347+
return queue_lock_;
348+
}
342349
// Async scratch single limit - may be modified after init
343350
size_t async_scratch_single_limit_;
344351

345-
static int rtti_id_;
352+
static __forceinline int& rtti_id() {
353+
static int rtti_id_ = 0;
354+
return rtti_id_;
355+
}
346356

347357
// Forbid copying and moving of this object
348358
DISALLOW_COPY_AND_ASSIGN(AqlQueue);

runtime/hsa-runtime/core/inc/amd_blit_kernel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class BlitKernel : public core::Blit {
117117
virtual void GangLeader(bool gang_leader) override {}
118118
virtual bool GangLeader() const override { return false; }
119119

120+
const uint16_t kInvalidPacketHeader = HSA_PACKET_TYPE_INVALID;
120121
private:
121122
union KernelArgs {
122123
struct __ALIGNED__(16) {

runtime/hsa-runtime/core/inc/amd_memory_region.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ class MemoryRegion : public core::MemoryRegion {
167167
return static_cast<uint32_t>(mem_props_.MemoryClockMax);
168168
}
169169

170-
__forceinline size_t GetPageSize() const { return kPageSize_; }
170+
__forceinline size_t GetPageSize() const { return kPageSize(); }
171171

172172
__forceinline const HsaMemFlags &mem_flags() const { return mem_flag_; }
173173
__forceinline const HsaMemMapFlags &map_flags() const { return map_flag_; }
@@ -195,7 +195,10 @@ class MemoryRegion : public core::MemoryRegion {
195195
// fragments of the block routing to the same MemoryRegion.
196196
mutable KernelMutex access_lock_;
197197

198-
static size_t kPageSize_;
198+
static __forceinline const size_t& kPageSize() {
199+
static size_t kPageSize_ = sysconf(_SC_PAGESIZE);
200+
return kPageSize_;
201+
}
199202

200203
// Determine access type allowed to requesting device
201204
hsa_amd_memory_pool_access_t GetAccessInfo(const core::Agent& agent,

runtime/hsa-runtime/core/inc/default_signal.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class BusyWaitSignal : public Signal {
5959
/// @brief Determines if a Signal* can be safely converted to BusyWaitSignal*
6060
/// via static_cast.
6161
static __forceinline bool IsType(Signal* ptr) {
62-
return ptr->IsType(&rtti_id_);
62+
return ptr->IsType(&rtti_id());
6363
}
6464

6565
/// @brief See base class Signal.
@@ -154,10 +154,13 @@ class BusyWaitSignal : public Signal {
154154
__forceinline HsaEvent* EopEvent() { return NULL; }
155155

156156
protected:
157-
bool _IsA(rtti_t id) const { return id == &rtti_id_; }
157+
bool _IsA(rtti_t id) const { return id == &rtti_id(); }
158158

159159
private:
160-
static int rtti_id_;
160+
static __forceinline int& rtti_id() {
161+
static int rtti_id_ = 0;
162+
return rtti_id_;
163+
}
161164

162165
DISALLOW_COPY_AND_ASSIGN(BusyWaitSignal);
163166
};
@@ -167,20 +170,23 @@ class DefaultSignal : private LocalSignal, public BusyWaitSignal {
167170
public:
168171
/// @brief Determines if a Signal* can be safely converted to BusyWaitSignal*
169172
/// via static_cast.
170-
static __forceinline bool IsType(Signal* ptr) { return ptr->IsType(&rtti_id_); }
173+
static __forceinline bool IsType(Signal* ptr) { return ptr->IsType(&rtti_id()); }
171174

172175
/// @brief See base class Signal.
173176
explicit DefaultSignal(hsa_signal_value_t initial_value, bool enableIPC = false)
174177
: LocalSignal(initial_value, enableIPC), BusyWaitSignal(signal(), enableIPC) {}
175178

176179
protected:
177180
bool _IsA(rtti_t id) const {
178-
if (id == &rtti_id_) return true;
181+
if (id == &rtti_id()) return true;
179182
return BusyWaitSignal::_IsA(id);
180183
}
181184

182185
private:
183-
static int rtti_id_;
186+
static __forceinline int& rtti_id() {
187+
static int rtti_id_ = 0;
188+
return rtti_id_;
189+
}
184190

185191
DISALLOW_COPY_AND_ASSIGN(DefaultSignal);
186192
};

runtime/hsa-runtime/core/inc/host_queue.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ namespace rocr {
5252
namespace core {
5353
class HostQueue : public Queue {
5454
public:
55-
static __forceinline bool IsType(core::Queue* queue) { return queue->IsType(&rtti_id_); }
55+
static __forceinline bool IsType(core::Queue* queue) { return queue->IsType(&rtti_id()); }
5656

5757
HostQueue(hsa_region_t region, uint32_t ring_size, hsa_queue_type32_t type,
5858
uint32_t features, hsa_signal_t doorbell_signal);
@@ -175,17 +175,23 @@ class HostQueue : public Queue {
175175
void operator delete(void*, void*) {}
176176

177177
protected:
178-
bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id_; }
178+
bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id(); }
179179

180180
private:
181-
static int rtti_id_;
181+
static __forceinline int& rtti_id() {
182+
static int rtti_id_ = 0;
183+
return rtti_id_;
184+
}
182185
static const size_t kRingAlignment = 256;
183186
const uint32_t size_;
184187
void* ring_;
185188

186189
// Host queue id counter, starting from 0x80000000 to avoid overlaping
187190
// with aql queue id.
188-
static std::atomic<uint32_t> queue_count_;
191+
static __forceinline std::atomic<uint32_t>& queue_count() {
192+
static std::atomic<uint32_t> queue_count_;
193+
return queue_count_;
194+
}
189195

190196
DISALLOW_COPY_AND_ASSIGN(HostQueue);
191197
};

0 commit comments

Comments
 (0)