Skip to content

Commit cdb4cab

Browse files
authored
[DevSAN] Cache internal queue to avoid repeated create/destroy (#19540)
1 parent a09d795 commit cdb4cab

File tree

10 files changed

+65
-32
lines changed

10 files changed

+65
-32
lines changed

unified-runtime/source/loader/layers/sanitizer/asan/asan_buffer.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
9090
assert((void *)Device != nullptr && "Device cannot be nullptr");
9191

9292
std::scoped_lock<ur_shared_mutex> Guard(Mutex);
93+
auto CI = getAsanInterceptor()->getContextInfo(Context);
9394
auto &Allocation = Allocations[Device];
9495
ur_result_t URes = UR_RESULT_SUCCESS;
9596
if (!Allocation) {
@@ -106,9 +107,9 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
106107
}
107108

108109
if (HostPtr) {
109-
ManagedQueue Queue(Context, Device);
110+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(Device);
110111
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
111-
Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
112+
InternalQueue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
112113
if (URes != UR_RESULT_SUCCESS) {
113114
UR_LOG_L(
114115
getContext()->logger, ERR,
@@ -147,10 +148,10 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
147148

148149
// Copy data from last synced device to host
149150
{
150-
ManagedQueue Queue(Context, LastSyncedDevice.hDevice);
151+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(Device);
151152
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
152-
Queue, true, HostAllocation, LastSyncedDevice.MemHandle, Size, 0,
153-
nullptr, nullptr);
153+
InternalQueue, true, HostAllocation, LastSyncedDevice.MemHandle, Size,
154+
0, nullptr, nullptr);
154155
if (URes != UR_RESULT_SUCCESS) {
155156
UR_LOG_L(getContext()->logger, ERR,
156157
"Failed to migrate memory buffer data");
@@ -160,9 +161,10 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
160161

161162
// Sync data back to device
162163
{
163-
ManagedQueue Queue(Context, Device);
164+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(Device);
164165
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
165-
Queue, true, Allocation, HostAllocation, Size, 0, nullptr, nullptr);
166+
InternalQueue, true, Allocation, HostAllocation, Size, 0, nullptr,
167+
nullptr);
166168
if (URes != UR_RESULT_SUCCESS) {
167169
UR_LOG_L(getContext()->logger, ERR,
168170
"Failed to migrate memory buffer data");

unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreate(
677677
std::shared_ptr<ContextInfo> CtxInfo =
678678
getAsanInterceptor()->getContextInfo(hContext);
679679
for (const auto &hDevice : CtxInfo->DeviceList) {
680-
ManagedQueue InternalQueue(hContext, hDevice);
680+
ur_queue_handle_t InternalQueue = CtxInfo->getInternalQueue(hDevice);
681681
char *Handle = nullptr;
682682
UR_CALL(pMemBuffer->getHandle(hDevice, Handle));
683683
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(

unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -270,17 +270,15 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
270270
auto ContextInfo = getContextInfo(Context);
271271
auto DeviceInfo = getDeviceInfo(Device);
272272

273-
ManagedQueue InternalQueue(Context, Device);
274-
if (!InternalQueue) {
275-
UR_LOG_L(getContext()->logger, ERR, "Failed to create internal queue");
276-
return UR_RESULT_ERROR_INVALID_QUEUE;
277-
}
273+
ur_queue_handle_t InternalQueue = ContextInfo->getInternalQueue(Device);
278274

279275
UR_CALL(prepareLaunch(ContextInfo, DeviceInfo, InternalQueue, Kernel,
280276
LaunchInfo));
281277

282278
UR_CALL(updateShadowMemory(ContextInfo, DeviceInfo, InternalQueue));
283279

280+
UR_CALL(getContext()->urDdiTable.Queue.pfnFinish(InternalQueue));
281+
284282
return UR_RESULT_SUCCESS;
285283
}
286284

@@ -467,6 +465,7 @@ ur_result_t AsanInterceptor::unregisterProgram(ur_program_handle_t Program) {
467465

468466
ur_result_t AsanInterceptor::registerSpirKernels(ur_program_handle_t Program) {
469467
auto Context = GetContext(Program);
468+
auto CI = getContextInfo(Context);
470469
std::vector<ur_device_handle_t> Devices = GetDevices(Program);
471470

472471
for (auto Device : Devices) {
@@ -484,11 +483,11 @@ ur_result_t AsanInterceptor::registerSpirKernels(ur_program_handle_t Program) {
484483
assert((MetadataSize % sizeof(SpirKernelInfo) == 0) &&
485484
"SpirKernelMetadata size is not correct");
486485

487-
ManagedQueue Queue(Context, Device);
486+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(Device);
488487

489488
std::vector<SpirKernelInfo> SKInfo(NumOfSpirKernel);
490489
Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
491-
Queue, true, &SKInfo[0], MetadataPtr,
490+
InternalQueue, true, &SKInfo[0], MetadataPtr,
492491
sizeof(SpirKernelInfo) * NumOfSpirKernel, 0, nullptr, nullptr);
493492
if (Result != UR_RESULT_SUCCESS) {
494493
UR_LOG_L(getContext()->logger, ERR, "Can't read the value of <{}>: {}",
@@ -504,7 +503,7 @@ ur_result_t AsanInterceptor::registerSpirKernels(ur_program_handle_t Program) {
504503
}
505504
std::vector<char> KernelNameV(SKI.Size);
506505
Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
507-
Queue, true, KernelNameV.data(), (void *)SKI.KernelName,
506+
InternalQueue, true, KernelNameV.data(), (void *)SKI.KernelName,
508507
sizeof(char) * SKI.Size, 0, nullptr, nullptr);
509508
if (Result != UR_RESULT_SUCCESS) {
510509
UR_LOG_L(getContext()->logger, ERR, "Can't read kernel name: {}",
@@ -537,7 +536,7 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
537536
assert(ProgramInfo != nullptr && "unregistered program!");
538537

539538
for (auto Device : Devices) {
540-
ManagedQueue Queue(Context, Device);
539+
ur_queue_handle_t InternalQueue = ContextInfo->getInternalQueue(Device);
541540

542541
size_t MetadataSize;
543542
void *MetadataPtr;
@@ -554,7 +553,7 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
554553
"DeviceGlobal metadata size is not correct");
555554
std::vector<DeviceGlobalInfo> GVInfos(NumOfDeviceGlobal);
556555
Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
557-
Queue, true, &GVInfos[0], MetadataPtr,
556+
InternalQueue, true, &GVInfos[0], MetadataPtr,
558557
sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, nullptr, nullptr);
559558
if (Result != UR_RESULT_SUCCESS) {
560559
UR_LOG_L(getContext()->logger, ERR, "Device Global[{}] Read Failed: {}",
@@ -932,6 +931,8 @@ bool ProgramInfo::isKernelInstrumented(ur_kernel_handle_t Kernel) const {
932931
ContextInfo::~ContextInfo() {
933932
Stats.Print(Handle);
934933

934+
InternalQueueMap.clear();
935+
935936
[[maybe_unused]] ur_result_t URes;
936937
if (USMPool) {
937938
URes = getContext()->urDdiTable.USM.pfnPoolRelease(USMPool);
@@ -971,6 +972,13 @@ ur_usm_pool_handle_t ContextInfo::getUSMPool() {
971972
return USMPool;
972973
}
973974

975+
ur_queue_handle_t ContextInfo::getInternalQueue(ur_device_handle_t Device) {
976+
std::scoped_lock<ur_shared_mutex> Guard(InternalQueueMapMutex);
977+
if (!InternalQueueMap[Device])
978+
InternalQueueMap[Device].emplace(Handle, Device);
979+
return *InternalQueueMap[Device];
980+
}
981+
974982
AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
975983
[[maybe_unused]] ur_result_t Result;
976984
if (Host.LocalArgs) {

unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "asan_statistics.hpp"
2121
#include "sanitizer_common/sanitizer_common.hpp"
2222
#include "sanitizer_common/sanitizer_options.hpp"
23+
#include "sanitizer_common/sanitizer_utils.hpp"
2324
#include "ur_sanitizer_layer.hpp"
2425

2526
#include <memory>
@@ -143,6 +144,10 @@ struct ContextInfo {
143144
std::vector<ur_device_handle_t> DeviceList;
144145
std::unordered_map<ur_device_handle_t, AllocInfoList> AllocInfosMap;
145146

147+
ur_shared_mutex InternalQueueMapMutex;
148+
std::unordered_map<ur_device_handle_t, std::optional<ManagedQueue>>
149+
InternalQueueMap;
150+
146151
AsanStatsWrapper Stats;
147152

148153
explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) {
@@ -163,6 +168,8 @@ struct ContextInfo {
163168
}
164169

165170
ur_usm_pool_handle_t getUSMPool();
171+
172+
ur_queue_handle_t getInternalQueue(ur_device_handle_t);
166173
};
167174

168175
struct AsanRuntimeDataWrapper {

unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ ur_usm_type_t GetUSMType(ur_context_handle_t Context, const void *MemPtr) {
3131
} // namespace
3232

3333
ManagedQueue::ManagedQueue(ur_context_handle_t Context,
34-
ur_device_handle_t Device) {
34+
ur_device_handle_t Device, bool IsOutOfOrder) {
35+
ur_queue_properties_t Prop{UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr,
36+
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE};
3537
[[maybe_unused]] auto Result = getContext()->urDdiTable.Queue.pfnCreate(
36-
Context, Device, nullptr, &Handle);
38+
Context, Device, IsOutOfOrder ? &Prop : nullptr, &Handle);
3739
assert(Result == UR_RESULT_SUCCESS && "Failed to create ManagedQueue");
3840
UR_LOG_L(getContext()->logger, DEBUG, ">>> ManagedQueue {}", (void *)Handle);
3941
}

unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
namespace ur_sanitizer_layer {
2424

2525
struct ManagedQueue {
26-
ManagedQueue(ur_context_handle_t Context, ur_device_handle_t Device);
26+
ManagedQueue(ur_context_handle_t Context, ur_device_handle_t Device,
27+
bool IsOutOfOrder = false);
2728
~ManagedQueue();
2829

2930
// Disable copy semantics

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
9999

100100
std::scoped_lock<ur_shared_mutex> Guard(Mutex);
101101
auto &Allocation = Allocations[Device];
102+
auto CI = getTsanInterceptor()->getContextInfo(Context);
102103
ur_result_t URes = UR_RESULT_SUCCESS;
103104
if (!Allocation) {
104105
ur_usm_desc_t USMDesc{};
@@ -114,7 +115,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
114115
}
115116

116117
if (HostPtr) {
117-
ManagedQueue Queue(Context, Device);
118+
ur_queue_handle_t Queue = CI->getInternalQueue(Device);
118119
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
119120
Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
120121
if (URes != UR_RESULT_SUCCESS) {
@@ -155,7 +156,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
155156

156157
// Copy data from last synced device to host
157158
{
158-
ManagedQueue Queue(Context, LastSyncedDevice.hDevice);
159+
ur_queue_handle_t Queue = CI->getInternalQueue(Device);
159160
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
160161
Queue, true, HostAllocation, LastSyncedDevice.MemHandle, Size, 0,
161162
nullptr, nullptr);
@@ -168,7 +169,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
168169

169170
// Sync data back to device
170171
{
171-
ManagedQueue Queue(Context, Device);
172+
ur_queue_handle_t Queue = CI->getInternalQueue(Device);
172173
URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
173174
Queue, true, Allocation, HostAllocation, Size, 0, nullptr, nullptr);
174175
if (URes != UR_RESULT_SUCCESS) {

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ ur_result_t urMemBufferCreate(
402402
std::shared_ptr<ContextInfo> CtxInfo =
403403
getTsanInterceptor()->getContextInfo(hContext);
404404
for (const auto &hDevice : CtxInfo->DeviceList) {
405-
ManagedQueue InternalQueue(hContext, hDevice);
405+
ur_queue_handle_t InternalQueue = CtxInfo->getInternalQueue(hDevice);
406406
char *Handle = nullptr;
407407
UR_CALL(pMemBuffer->getHandle(hDevice, Handle));
408408
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
*/
1414

1515
#include "tsan_interceptor.hpp"
16-
#include "sanitizer_common/sanitizer_utils.hpp"
1716
#include "tsan_report.hpp"
1817

1918
namespace ur_sanitizer_layer {
@@ -107,6 +106,13 @@ void ContextInfo::insertAllocInfo(TsanAllocInfo AI) {
107106
AllocInfos.insert(std::move(AI));
108107
}
109108

109+
ur_queue_handle_t ContextInfo::getInternalQueue(ur_device_handle_t Device) {
110+
std::scoped_lock<ur_shared_mutex> Guard(InternalQueueMapMutex);
111+
if (!InternalQueueMap[Device])
112+
InternalQueueMap[Device].emplace(Handle, Device, true);
113+
return *InternalQueueMap[Device];
114+
}
115+
110116
TsanInterceptor::~TsanInterceptor() {
111117
// We must release these objects before releasing adapters, since
112118
// they may use the adapter in their destructor
@@ -190,7 +196,7 @@ TsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
190196
auto &ProgramInfo = getProgramInfo(Program);
191197

192198
for (auto Device : Devices) {
193-
ManagedQueue Queue(Context, Device);
199+
ur_queue_handle_t Queue = ContextInfo->getInternalQueue(Device);
194200

195201
size_t MetadataSize;
196202
void *MetadataPtr;
@@ -333,16 +339,14 @@ ur_result_t TsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
333339
auto CI = getContextInfo(GetContext(Queue));
334340
auto DI = getDeviceInfo(GetDevice(Queue));
335341

336-
ManagedQueue InternalQueue(CI->Handle, DI->Handle);
337-
if (!InternalQueue) {
338-
UR_LOG_L(getContext()->logger, ERR, "Failed to create internal queue");
339-
return UR_RESULT_ERROR_INVALID_QUEUE;
340-
}
342+
ur_queue_handle_t InternalQueue = CI->getInternalQueue(DI->Handle);
341343

342344
UR_CALL(prepareLaunch(CI, DI, InternalQueue, Kernel, LaunchInfo));
343345

344346
UR_CALL(updateShadowMemory(CI, DI, Kernel, InternalQueue));
345347

348+
UR_CALL(getContext()->urDdiTable.Queue.pfnFinish(InternalQueue));
349+
346350
return UR_RESULT_SUCCESS;
347351
}
348352

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "sanitizer_common/sanitizer_allocator.hpp"
1717
#include "sanitizer_common/sanitizer_common.hpp"
18+
#include "sanitizer_common/sanitizer_utils.hpp"
1819
#include "tsan_buffer.hpp"
1920
#include "tsan_libdevice.hpp"
2021
#include "tsan_shadow.hpp"
@@ -58,13 +59,18 @@ struct ContextInfo {
5859
ur_shared_mutex AllocInfosMutex;
5960
std::set<TsanAllocInfo> AllocInfos;
6061

62+
ur_shared_mutex InternalQueueMapMutex;
63+
std::unordered_map<ur_device_handle_t, std::optional<ManagedQueue>>
64+
InternalQueueMap;
65+
6166
explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) {
6267
[[maybe_unused]] auto Result =
6368
getContext()->urDdiTable.Context.pfnRetain(Context);
6469
assert(Result == UR_RESULT_SUCCESS);
6570
}
6671

6772
~ContextInfo() {
73+
InternalQueueMap.clear();
6874
[[maybe_unused]] auto Result =
6975
getContext()->urDdiTable.Context.pfnRelease(Handle);
7076
assert(Result == UR_RESULT_SUCCESS);
@@ -75,6 +81,8 @@ struct ContextInfo {
7581
ContextInfo &operator=(const ContextInfo &) = delete;
7682

7783
void insertAllocInfo(TsanAllocInfo AI);
84+
85+
ur_queue_handle_t getInternalQueue(ur_device_handle_t);
7886
};
7987

8088
struct DeviceGlobalInfo {

0 commit comments

Comments
 (0)