Skip to content

Commit 1e6ce3e

Browse files
authored
[DevASAN] Move memory alloc info into DeviceInfo (#20611)
Since CPU/GPU device both support USM indirect access, we need to poison shadow of whole allocated memory in the device instead of only one context.
1 parent 0c0349a commit 1e6ce3e

File tree

10 files changed

+89
-43
lines changed

10 files changed

+89
-43
lines changed

sycl/test-e2e/AddressSanitizer/invalid-argument/bad-context.cpp renamed to sycl/test-e2e/AddressSanitizer/invalid-argument/bad-device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ int main() {
1717
});
1818
gpu_queue.wait();
1919
// CHECK: DeviceSanitizer: invalid-argument on kernel
20-
// CHECK: The {{[0-9]+}}th argument {{.*}} is allocated in other context
20+
// CHECK: The {{[0-9]+}}th argument {{.*}} is allocated in other device
2121
// CHECK: {{.*}} is located inside of Device USM region
2222

2323
sycl::free(data, cpu_queue);

unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices,
5656
(void *)DI->Handle, (void *)Context);
5757
DI->Shadow = ShadowMemory;
5858
CI->DeviceList.emplace_back(hDevice);
59-
CI->AllocInfosMap[hDevice];
6059
}
6160
}
6261
return UR_RESULT_SUCCESS;
@@ -1620,6 +1619,30 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer(
16201619
return result;
16211620
}
16221621

1622+
__urdlllocal ur_result_t UR_APICALL urKernelSetExecInfo(
1623+
/// [in] handle of the kernel object
1624+
ur_kernel_handle_t hKernel,
1625+
/// [in] name of the execution attribute
1626+
ur_kernel_exec_info_t propName,
1627+
/// [in] size in byte the attribute value
1628+
size_t propSize,
1629+
/// [in][optional] pointer to execution info properties.
1630+
const ur_kernel_exec_info_properties_t *pProperties,
1631+
/// [in][typename(propName, propSize)] pointer to memory location holding
1632+
/// the property value.
1633+
const void *pPropValue) {
1634+
UR_LOG_L(getContext()->logger, DEBUG, "==== urKernelSetExecInfo");
1635+
1636+
UR_CALL(getContext()->urDdiTable.Kernel.pfnSetExecInfo(
1637+
hKernel, propName, propSize, pProperties, pPropValue));
1638+
auto &KI = getAsanInterceptor()->getOrCreateKernelInfo(hKernel);
1639+
if (propName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS) {
1640+
KI.IsIndirectAccess = *ur_cast<const bool *>(pPropValue);
1641+
}
1642+
1643+
return UR_RESULT_SUCCESS;
1644+
}
1645+
16231646
///////////////////////////////////////////////////////////////////////////////
16241647
/// @brief Intercept function for urDeviceGetInfo
16251648
__urdlllocal ur_result_t UR_APICALL urDeviceGetInfo(
@@ -1922,6 +1945,7 @@ __urdlllocal ur_result_t UR_APICALL urGetKernelProcAddrTable(
19221945
pDdiTable->pfnSetArgMemObj = ur_sanitizer_layer::asan::urKernelSetArgMemObj;
19231946
pDdiTable->pfnSetArgLocal = ur_sanitizer_layer::asan::urKernelSetArgLocal;
19241947
pDdiTable->pfnSetArgPointer = ur_sanitizer_layer::asan::urKernelSetArgPointer;
1948+
pDdiTable->pfnSetExecInfo = ur_sanitizer_layer::asan::urKernelSetExecInfo;
19251949

19261950
return result;
19271951
}

unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,11 @@ ur_result_t AsanInterceptor::allocateMemory(ur_context_handle_t Context,
143143
AI->print();
144144

145145
// For updating shadow memory
146-
if (Device) { // Device/Shared USM
147-
ContextInfo->insertAllocInfo({Device}, AI);
146+
if (DeviceInfo) { // Device/Shared USM
147+
DeviceInfo->insertAllocInfo(AI);
148148
} else { // Host USM
149-
ContextInfo->insertAllocInfo(ContextInfo->DeviceList, AI);
149+
for (const auto &Device : ContextInfo->DeviceList)
150+
getDeviceInfo(Device)->insertAllocInfo(AI);
150151
}
151152

152153
// For memory release
@@ -212,9 +213,10 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context,
212213
AllocInfo->ReleaseStack = GetCurrentBacktrace();
213214

214215
if (AllocInfo->Type == AllocType::HOST_USM) {
215-
ContextInfo->insertAllocInfo(ContextInfo->DeviceList, AllocInfo);
216+
for (const auto &Device : ContextInfo->DeviceList)
217+
getDeviceInfo(Device)->insertAllocInfo(AllocInfo);
216218
} else {
217-
ContextInfo->insertAllocInfo({AllocInfo->Device}, AllocInfo);
219+
getDeviceInfo(AllocInfo->Device)->insertAllocInfo(AllocInfo);
218220
}
219221

220222
// If quarantine is disabled, USM is freed immediately
@@ -279,7 +281,7 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
279281
(void)ArgPointer;
280282
}
281283
}
282-
UR_CALL(updateShadowMemory(ContextInfo, DeviceInfo, InternalQueue));
284+
UR_CALL(updateShadowMemory(DeviceInfo, InternalQueue));
283285

284286
UR_CALL(prepareLaunch(ContextInfo, DeviceInfo, InternalQueue, Kernel,
285287
LaunchInfo));
@@ -423,16 +425,14 @@ AsanInterceptor::enqueueAllocInfo(std::shared_ptr<DeviceInfo> &DeviceInfo,
423425
}
424426

425427
ur_result_t
426-
AsanInterceptor::updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
427-
std::shared_ptr<DeviceInfo> &DeviceInfo,
428+
AsanInterceptor::updateShadowMemory(std::shared_ptr<DeviceInfo> &DeviceInfo,
428429
ur_queue_handle_t Queue) {
429-
auto &AllocInfos = ContextInfo->AllocInfosMap[DeviceInfo->Handle];
430-
std::scoped_lock<ur_shared_mutex> Guard(AllocInfos.Mutex);
430+
std::scoped_lock<ur_shared_mutex> Guard(DeviceInfo->AllocInfos.Mutex);
431431

432-
for (auto &AI : AllocInfos.List) {
432+
for (auto &AI : DeviceInfo->AllocInfos.List) {
433433
UR_CALL(enqueueAllocInfo(DeviceInfo, Queue, AI));
434434
}
435-
AllocInfos.List.clear();
435+
DeviceInfo->AllocInfos.List.clear();
436436

437437
return UR_RESULT_SUCCESS;
438438
}
@@ -585,7 +585,7 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
585585
GetCurrentBacktrace(),
586586
{}});
587587

588-
ContextInfo->insertAllocInfo({Device}, AI);
588+
getDeviceInfo(Device)->insertAllocInfo(AI);
589589
ProgramInfo->AllocInfoForGlobals.emplace(AI);
590590

591591
std::scoped_lock<ur_shared_mutex> Guard(m_AllocationMapMutex);
@@ -754,7 +754,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
754754
continue;
755755
}
756756
if (auto ValidateResult = ValidateUSMPointer(
757-
ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
757+
Kernel, ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
758758
ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, ValidateResult,
759759
PtrPair.second);
760760
if (ValidateResult.Type != ValidateUSMResult::MAYBE_HOST_POINTER) {
@@ -801,7 +801,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
801801
if (LaunchInfo.LocalWorkSize.empty()) {
802802
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
803803
auto URes = getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
804-
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
804+
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset.data(),
805805
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
806806
if (URes != UR_RESULT_SUCCESS) {
807807
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {

unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.hpp

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,16 @@ struct DeviceInfo {
5454
std::queue<std::shared_ptr<AllocInfo>> Quarantine;
5555
size_t QuarantineSize = 0;
5656

57+
AllocInfoList AllocInfos;
58+
5759
// Device handles are special and alive in the whole process lifetime,
5860
// so we needn't retain&release here.
5961
explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {}
62+
63+
void insertAllocInfo(std::shared_ptr<AllocInfo> &AI) {
64+
std::scoped_lock<ur_shared_mutex> Guard(AllocInfos.Mutex);
65+
AllocInfos.List.emplace_back(AI);
66+
}
6067
};
6168

6269
struct QueueInfo {
@@ -88,6 +95,8 @@ struct KernelInfo {
8895
bool IsInstrumented = false;
8996
// check shadow bounds
9097
bool IsCheckShadowBounds = false;
98+
// might have indirect access
99+
bool IsIndirectAccess = false;
91100

92101
// lock this mutex if following fields are accessed
93102
ur_shared_mutex Mutex;
@@ -147,7 +156,6 @@ struct ContextInfo {
147156
std::atomic<int32_t> RefCount = 1;
148157

149158
std::vector<ur_device_handle_t> DeviceList;
150-
std::unordered_map<ur_device_handle_t, AllocInfoList> AllocInfosMap;
151159

152160
ur_shared_mutex InternalQueueMapMutex;
153161
std::unordered_map<ur_device_handle_t, std::optional<ManagedQueue>>
@@ -169,15 +177,6 @@ struct ContextInfo {
169177

170178
~ContextInfo();
171179

172-
void insertAllocInfo(const std::vector<ur_device_handle_t> &Devices,
173-
std::shared_ptr<AllocInfo> &AI) {
174-
for (auto Device : Devices) {
175-
auto &AllocInfos = AllocInfosMap[Device];
176-
std::scoped_lock<ur_shared_mutex> Guard(AllocInfos.Mutex);
177-
AllocInfos.List.emplace_back(AI);
178-
}
179-
}
180-
181180
ur_usm_pool_handle_t getUSMPool();
182181

183182
ur_queue_handle_t getInternalQueue(ur_device_handle_t);
@@ -249,7 +248,7 @@ struct LaunchInfo {
249248
ur_context_handle_t Context = nullptr;
250249
ur_device_handle_t Device = nullptr;
251250
const size_t *GlobalWorkSize = nullptr;
252-
const size_t *GlobalWorkOffset = nullptr;
251+
std::vector<size_t> GlobalWorkOffset;
253252
std::vector<size_t> LocalWorkSize;
254253
uint32_t WorkDim = 0;
255254

@@ -259,12 +258,19 @@ struct LaunchInfo {
259258
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
260259
const size_t *GlobalWorkOffset, uint32_t WorkDim)
261260
: Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize),
262-
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim),
263-
Data(Context, Device) {
261+
WorkDim(WorkDim), Data(Context, Device) {
264262
if (LocalWorkSize) {
265263
this->LocalWorkSize =
266264
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
267265
}
266+
// UR doesn't allow GlobalWorkOffset is null, we need to construct a zero
267+
// value array if user doesn't specify its value.
268+
if (GlobalWorkOffset) {
269+
this->GlobalWorkOffset =
270+
std::vector<size_t>(GlobalWorkOffset, GlobalWorkOffset + WorkDim);
271+
} else {
272+
this->GlobalWorkOffset = std::vector<size_t>(WorkDim, 0);
273+
}
268274
[[maybe_unused]] auto Result =
269275
getContext()->urDdiTable.Context.pfnRetain(Context);
270276
assert(Result == UR_RESULT_SUCCESS);
@@ -375,8 +381,7 @@ class AsanInterceptor {
375381
ur_shared_mutex KernelLaunchMutex;
376382

377383
private:
378-
ur_result_t updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
379-
std::shared_ptr<DeviceInfo> &DeviceInfo,
384+
ur_result_t updateShadowMemory(std::shared_ptr<DeviceInfo> &DeviceInfo,
380385
ur_queue_handle_t Queue);
381386

382387
ur_result_t enqueueAllocInfo(std::shared_ptr<DeviceInfo> &DeviceInfo,

unified-runtime/source/loader/layers/sanitizer/asan/asan_validator.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ bool IsSameDevice(ur_device_handle_t Device1, ur_device_handle_t Device2) {
3636

3737
} // namespace
3838

39-
ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context,
39+
ValidateUSMResult ValidateUSMPointer(ur_kernel_handle_t Kernel,
40+
ur_context_handle_t Context,
4041
ur_device_handle_t Device, uptr Ptr) {
4142
assert(Ptr != 0 && "Don't validate nullptr here");
4243

@@ -53,7 +54,8 @@ ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context,
5354

5455
auto AllocInfo = AllocInfoItOp.value()->second;
5556

56-
if (AllocInfo->Context != Context) {
57+
auto &KI = getAsanInterceptor()->getOrCreateKernelInfo(Kernel);
58+
if (!KI.IsIndirectAccess && AllocInfo->Context != Context) {
5759
return ValidateUSMResult::fail(ValidateUSMResult::BAD_CONTEXT, AllocInfo);
5860
}
5961

unified-runtime/source/loader/layers/sanitizer/asan/asan_validator.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ struct ValidateUSMResult {
4646
}
4747
};
4848

49-
ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context,
49+
ValidateUSMResult ValidateUSMPointer(ur_kernel_handle_t Kernel,
50+
ur_context_handle_t Context,
5051
ur_device_handle_t Device, uptr Ptr);
5152

5253
} // namespace asan

unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ ur_result_t MsanInterceptor::prepareLaunch(
509509
if (LaunchInfo.LocalWorkSize.empty()) {
510510
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
511511
auto URes = getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
512-
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
512+
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset.data(),
513513
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
514514
if (URes != UR_RESULT_SUCCESS) {
515515
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {

unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,20 +225,27 @@ struct USMLaunchInfo {
225225
ur_context_handle_t Context = nullptr;
226226
ur_device_handle_t Device = nullptr;
227227
const size_t *GlobalWorkSize = nullptr;
228-
const size_t *GlobalWorkOffset = nullptr;
228+
std::vector<size_t> GlobalWorkOffset;
229229
std::vector<size_t> LocalWorkSize;
230230
uint32_t WorkDim = 0;
231231

232232
USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device,
233233
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
234234
const size_t *GlobalWorkOffset, uint32_t WorkDim)
235235
: Data(Context, Device), Context(Context), Device(Device),
236-
GlobalWorkSize(GlobalWorkSize), GlobalWorkOffset(GlobalWorkOffset),
237-
WorkDim(WorkDim) {
236+
GlobalWorkSize(GlobalWorkSize), WorkDim(WorkDim) {
238237
if (LocalWorkSize) {
239238
this->LocalWorkSize =
240239
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
241240
}
241+
// UR doesn't allow GlobalWorkOffset is null, we need to construct a zero
242+
// value array if user doesn't specify its value.
243+
if (GlobalWorkOffset) {
244+
this->GlobalWorkOffset =
245+
std::vector<size_t>(GlobalWorkOffset, GlobalWorkOffset + WorkDim);
246+
} else {
247+
this->GlobalWorkOffset = std::vector<size_t>(WorkDim, 0);
248+
}
242249
}
243250
~USMLaunchInfo();
244251

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ ur_result_t TsanInterceptor::prepareLaunch(std::shared_ptr<ContextInfo> &,
408408
if (LaunchInfo.LocalWorkSize.empty()) {
409409
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
410410
auto URes = getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
411-
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
411+
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset.data(),
412412
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
413413
if (URes != UR_RESULT_SUCCESS) {
414414
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ struct LaunchInfo {
181181
ur_context_handle_t Context = nullptr;
182182
ur_device_handle_t Device = nullptr;
183183
const size_t *GlobalWorkSize = nullptr;
184-
const size_t *GlobalWorkOffset = nullptr;
184+
std::vector<size_t> GlobalWorkOffset;
185185
std::vector<size_t> LocalWorkSize;
186186
uint32_t WorkDim = 0;
187187
TsanRuntimeDataWrapper Data;
@@ -190,8 +190,7 @@ struct LaunchInfo {
190190
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
191191
const size_t *GlobalWorkOffset, uint32_t WorkDim)
192192
: Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize),
193-
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim),
194-
Data(Context, Device) {
193+
WorkDim(WorkDim), Data(Context, Device) {
195194
[[maybe_unused]] auto Result =
196195
getContext()->urDdiTable.Context.pfnRetain(Context);
197196
assert(Result == UR_RESULT_SUCCESS);
@@ -201,6 +200,14 @@ struct LaunchInfo {
201200
this->LocalWorkSize =
202201
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
203202
}
203+
// UR doesn't allow GlobalWorkOffset is null, we need to construct a zero
204+
// value array if user doesn't specify its value.
205+
if (GlobalWorkOffset) {
206+
this->GlobalWorkOffset =
207+
std::vector<size_t>(GlobalWorkOffset, GlobalWorkOffset + WorkDim);
208+
} else {
209+
this->GlobalWorkOffset = std::vector<size_t>(WorkDim, 0);
210+
}
204211
}
205212

206213
~LaunchInfo() {

0 commit comments

Comments
 (0)