Skip to content

Commit 8c4b55c

Browse files
committed
[L0 v2] Move locking to top-level functions
to make it consistent and to make sure all the locks are taken in the same order. Also, add a few missing TRACK_SCOPE_LATENCY calls
1 parent 73e54a0 commit 8c4b55c

File tree

6 files changed

+82
-53
lines changed

6 files changed

+82
-53
lines changed

source/adapters/level_zero/v2/context.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ ur_result_t urContextGetInfo(ur_context_handle_t hContext,
134134
void *pContextInfo,
135135

136136
size_t *pPropSizeRet) {
137-
std::shared_lock<ur_shared_mutex> Lock(hContext->Mutex);
137+
// No locking needed here, we only read const members
138+
138139
UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet);
139140
switch (
140141
(uint32_t)contextInfoType) { // cast to avoid warnings on EXT enum values

source/adapters/level_zero/v2/event.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,6 @@ static uint64_t adjustEndEventTimestamp(uint64_t adjustedStartTimestamp,
9393
}
9494

9595
uint64_t ur_event_handle_t_::getEventEndTimestamp() {
96-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
97-
9896
// If adjustedEventEndTimestamp on the event is non-zero it means it has
9997
// collected the result of the queue already. In that case it has been
10098
// adjusted and is ready for immediate return.
@@ -120,8 +118,6 @@ void ur_event_handle_t_::recordStartTimestamp() {
120118
UR_CALL_THROWS(ur::level_zero::urDeviceGetGlobalTimestamps(
121119
getDevice(), &deviceStartTimestamp, nullptr));
122120

123-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
124-
125121
adjustedEventStartTimestamp = deviceStartTimestamp;
126122
}
127123

@@ -183,6 +179,8 @@ ur_result_t urEventGetProfilingInfo(
183179
size_t *pPropValueSizeRet ///< [out][optional] pointer to the actual size in
184180
///< bytes returned in propValue
185181
) {
182+
std::scoped_lock<ur_shared_mutex> lock(hEvent->Mutex);
183+
186184
// The event must either have profiling enabled or be recording timestamps.
187185
bool isTimestampedEvent = hEvent->isTimestamped();
188186
if (!hEvent->isProfilingEnabled() && !isTimestampedEvent) {

source/adapters/level_zero/v2/kernel.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,6 @@ ur_result_t ur_kernel_handle_t_::setArgValue(
182182
return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX;
183183
}
184184

185-
std::scoped_lock<ur_shared_mutex> guard(Mutex);
186-
187185
for (auto &singleDeviceKernel : deviceKernels) {
188186
if (!singleDeviceKernel.has_value()) {
189187
continue;
@@ -217,8 +215,6 @@ ur_program_handle_t ur_kernel_handle_t_::getProgramHandle() const {
217215

218216
ur_result_t ur_kernel_handle_t_::setExecInfo(ur_kernel_exec_info_t propName,
219217
const void *pPropValue) {
220-
std::scoped_lock<ur_shared_mutex> Guard(Mutex);
221-
222218
for (auto &kernel : deviceKernels) {
223219
if (!kernel.has_value())
224220
continue;
@@ -315,6 +311,8 @@ ur_result_t urKernelSetArgValue(
315311
*pArgValue ///< [in] argument value represented as matching arg type.
316312
) {
317313
TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgValue");
314+
315+
std::scoped_lock<ur_shared_mutex> guard(hKernel->Mutex);
318316
return hKernel->setArgValue(argIndex, argSize, pProperties, pArgValue);
319317
}
320318

@@ -327,6 +325,8 @@ ur_result_t urKernelSetArgPointer(
327325
*pArgValue ///< [in] argument value represented as matching arg type.
328326
) {
329327
TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgPointer");
328+
329+
std::scoped_lock<ur_shared_mutex> guard(hKernel->Mutex);
330330
return hKernel->setArgPointer(argIndex, pProperties, pArgValue);
331331
}
332332

@@ -336,6 +336,9 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
336336
ur_mem_handle_t hArgValue) {
337337
TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgMemObj");
338338

339+
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> guard(hKernel->Mutex,
340+
hArgValue->Mutex);
341+
339342
// TODO: support properties
340343
std::ignore = pProperties;
341344

@@ -367,6 +370,8 @@ urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex,
367370
const ur_kernel_arg_local_properties_t *pProperties) {
368371
TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgLocal");
369372

373+
std::scoped_lock<ur_shared_mutex> guard(hKernel->Mutex);
374+
370375
std::ignore = pProperties;
371376

372377
return hKernel->setArgValue(argIndex, argSize, nullptr, nullptr);
@@ -384,6 +389,8 @@ ur_result_t urKernelSetExecInfo(
384389
std::ignore = propSize;
385390
std::ignore = pProperties;
386391

392+
std::scoped_lock<ur_shared_mutex> guard(hKernel->Mutex);
393+
387394
return hKernel->setExecInfo(propName, pPropValue);
388395
}
389396

@@ -401,7 +408,7 @@ ur_result_t urKernelGetGroupInfo(
401408
) {
402409
UrReturnHelper returnValue(paramValueSize, pParamValue, pParamValueSizeRet);
403410

404-
std::shared_lock<ur_shared_mutex> Guard(hKernel->Mutex);
411+
// No locking needed here, we only read const members
405412
switch (paramName) {
406413
case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
407414
// TODO: To revisit after level_zero/issues/262 is resolved
@@ -483,7 +490,7 @@ ur_result_t urKernelGetSubGroupInfo(
483490

484491
auto props = hKernel->getProperties(hDevice);
485492

486-
std::shared_lock<ur_shared_mutex> Guard(hKernel->Mutex);
493+
// No locking needed here, we only read const members
487494
if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) {
488495
returnValue(uint32_t{props.maxSubgroupSize});
489496
} else if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) {

source/adapters/level_zero/v2/memory.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ static ur_result_t synchronousZeCopy(ur_context_handle_t hContext,
125125
ur_result_t
126126
ur_discrete_mem_handle_t::migrateBufferTo(ur_device_handle_t hDevice, void *src,
127127
size_t size) {
128+
TRACK_SCOPE_LATENCY("ur_discrete_mem_handle_t::migrateBufferTo");
129+
128130
auto Id = hDevice->Id.value();
129131

130132
if (!deviceAllocations[Id]) {
@@ -163,9 +165,11 @@ ur_discrete_mem_handle_t::~ur_discrete_mem_handle_t() {
163165
}
164166
}
165167

166-
void *ur_discrete_mem_handle_t::getDevicePtrUnlocked(
168+
void *ur_discrete_mem_handle_t::getDevicePtr(
167169
ur_device_handle_t hDevice, access_mode_t access, size_t offset,
168170
size_t size, std::function<void(void *src, void *dst, size_t)> migrate) {
171+
TRACK_SCOPE_LATENCY("ur_discrete_mem_handle_t::getDevicePtr");
172+
169173
std::ignore = access;
170174
std::ignore = size;
171175
std::ignore = migrate;
@@ -198,17 +202,10 @@ void *ur_discrete_mem_handle_t::getDevicePtrUnlocked(
198202
offset;
199203
}
200204

201-
void *ur_discrete_mem_handle_t::getDevicePtr(
202-
ur_device_handle_t hDevice, access_mode_t access, size_t offset,
203-
size_t size, std::function<void(void *src, void *dst, size_t)> migrate) {
204-
std::lock_guard lock(this->Mutex);
205-
return getDevicePtrUnlocked(hDevice, access, offset, size, migrate);
206-
}
207-
208205
void *ur_discrete_mem_handle_t::mapHostPtr(
209206
access_mode_t access, size_t offset, size_t size,
210207
std::function<void(void *src, void *dst, size_t)> migrate) {
211-
std::lock_guard lock(this->Mutex);
208+
TRACK_SCOPE_LATENCY("ur_discrete_mem_handle_t::mapHostPtr");
212209

213210
// TODO: use async alloc?
214211

@@ -231,7 +228,7 @@ void *ur_discrete_mem_handle_t::mapHostPtr(
231228
void ur_discrete_mem_handle_t::unmapHostPtr(
232229
void *pMappedPtr,
233230
std::function<void(void *src, void *dst, size_t)> migrate) {
234-
std::lock_guard lock(this->Mutex);
231+
TRACK_SCOPE_LATENCY("ur_discrete_mem_handle_t::unmapHostPtr");
235232

236233
for (auto &hostAllocation : hostAllocations) {
237234
if (hostAllocation.ptr == pMappedPtr) {
@@ -241,9 +238,9 @@ void ur_discrete_mem_handle_t::unmapHostPtr(
241238
deviceAllocations[activeAllocationDevice->Id.value()]) +
242239
hostAllocation.offset;
243240
} else if (hostAllocation.access != access_mode_t::write_invalidate) {
244-
devicePtr = ur_cast<char *>(getDevicePtrUnlocked(
245-
hContext->getDevices()[0], access_mode_t::read_only,
246-
hostAllocation.offset, hostAllocation.size, migrate));
241+
devicePtr = ur_cast<char *>(
242+
getDevicePtr(hContext->getDevices()[0], access_mode_t::read_only,
243+
hostAllocation.offset, hostAllocation.size, migrate));
247244
}
248245

249246
if (devicePtr) {
@@ -328,7 +325,8 @@ ur_result_t urMemBufferCreateWithNativeHandle(
328325
ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName,
329326
size_t propSize, void *pPropValue,
330327
size_t *pPropSizeRet) {
331-
std::shared_lock<ur_shared_mutex> Lock(hMemory->Mutex);
328+
// No locking needed here, we only read const members
329+
332330
UrReturnHelper returnValue(propSize, pPropValue, pPropSizeRet);
333331

334332
switch (propName) {

source/adapters/level_zero/v2/memory.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct ur_mem_handle_t_ : _ur_object {
2626
write_invalidate
2727
};
2828

29+
// Following functions should always be called under the lock.
2930
virtual void *
3031
getDevicePtr(ur_device_handle_t, access_mode_t, size_t offset, size_t size,
3132
std::function<void(void *src, void *dst, size_t)> mecmpy) = 0;
@@ -109,10 +110,6 @@ struct ur_discrete_mem_handle_t : public ur_mem_handle_t_ {
109110
std::function<void(void *src, void *dst, size_t)>) override;
110111

111112
private:
112-
void *getDevicePtrUnlocked(ur_device_handle_t, access_mode_t, size_t offset,
113-
size_t size,
114-
std::function<void(void *src, void *dst, size_t)>);
115-
116113
// Vector of per-device allocations indexed by device->Id
117114
std::vector<void *> deviceAllocations;
118115

0 commit comments

Comments
 (0)