Skip to content

Commit 8408f4b

Browse files
committed
sync with latest code
1 parent b5e4aa5 commit 8408f4b

File tree

2 files changed

+126
-211
lines changed

2 files changed

+126
-211
lines changed

source/loader/layers/sanitizer/asan_interceptor.cpp

Lines changed: 113 additions & 205 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,106 @@ ur_result_t urEnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, char Value,
4141
size_t Size, uint32_t NumEvents = 0,
4242
const ur_event_handle_t *EventWaitList = nullptr,
4343
ur_event_handle_t *OutEvent = nullptr) {
44+
if (Size == 0) {
45+
return UR_RESULT_SUCCESS;
46+
}
4447
return context.urDdiTable.Enqueue.pfnUSMFill(
4548
Queue, Ptr, 1, &Value, Size, NumEvents, EventWaitList, OutEvent);
4649
}
4750

51+
ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,
52+
std::shared_ptr<DeviceInfo> &DeviceInfo,
53+
ur_queue_handle_t Queue, uptr Ptr, uptr Size,
54+
u8 Value) {
55+
if (Size == 0) {
56+
return UR_RESULT_SUCCESS;
57+
}
58+
if (DeviceInfo->Type == DeviceType::CPU) {
59+
uptr ShadowBegin = MemToShadow_CPU(DeviceInfo->ShadowOffset, Ptr);
60+
uptr ShadowEnd =
61+
MemToShadow_CPU(DeviceInfo->ShadowOffset, Ptr + Size - 1);
62+
63+
// Poison shadow memory outside of asan runtime is not allowed, so we
64+
// need to avoid memset's call from being intercepted.
65+
static auto MemSet =
66+
(void *(*)(void *, int, size_t))GetMemFunctionPointer("memset");
67+
if (!MemSet) {
68+
return UR_RESULT_ERROR_UNKNOWN;
69+
}
70+
context.logger.debug("enqueueMemSetShadow(addr={}, count={}, value={})",
71+
(void *)ShadowBegin, ShadowEnd - ShadowBegin + 1,
72+
(void *)(size_t)Value);
73+
MemSet((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1);
74+
} else if (DeviceInfo->Type == DeviceType::GPU_PVC) {
75+
uptr ShadowBegin = MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr);
76+
uptr ShadowEnd =
77+
MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr + Size - 1);
78+
assert(ShadowBegin <= ShadowEnd);
79+
{
80+
static const size_t PageSize =
81+
GetVirtualMemGranularity(Context, DeviceInfo->Handle);
82+
83+
ur_physical_mem_properties_t Desc{
84+
UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, nullptr, 0};
85+
static ur_physical_mem_handle_t PhysicalMem{};
86+
87+
// Make sure [Ptr, Ptr + Size] is mapped to physical memory
88+
for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize);
89+
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
90+
if (!PhysicalMem) {
91+
auto URes = context.urDdiTable.PhysicalMem.pfnCreate(
92+
Context, DeviceInfo->Handle, PageSize, &Desc,
93+
&PhysicalMem);
94+
if (URes != UR_RESULT_SUCCESS) {
95+
context.logger.error("urPhysicalMemCreate(): {}", URes);
96+
return URes;
97+
}
98+
}
99+
100+
// context.logger.debug("urVirtualMemMap: {} ~ {}",
101+
// (void *)MappedPtr,
102+
// (void *)(MappedPtr + PageSize - 1));
103+
104+
// FIXME: No flag to check the failed reason is VA is already mapped
105+
auto URes = context.urDdiTable.VirtualMem.pfnMap(
106+
Context, (void *)MappedPtr, PageSize, PhysicalMem, 0,
107+
UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE);
108+
// if (URes != UR_RESULT_SUCCESS) {
109+
// context.logger.debug("urVirtualMemMap(): {}", URes);
110+
// }
111+
112+
// Initialize to zero
113+
if (URes == UR_RESULT_SUCCESS) {
114+
// Reset PhysicalMem to null since it's been mapped
115+
PhysicalMem = nullptr;
116+
117+
auto URes =
118+
urEnqueueUSMSet(Queue, (void *)MappedPtr, 0, PageSize);
119+
if (URes != UR_RESULT_SUCCESS) {
120+
context.logger.error("urEnqueueUSMFill(): {}", URes);
121+
return URes;
122+
}
123+
}
124+
}
125+
}
126+
127+
auto URes = urEnqueueUSMSet(Queue, (void *)ShadowBegin, Value,
128+
ShadowEnd - ShadowBegin + 1);
129+
context.logger.debug(
130+
"enqueueMemSetShadow (addr={}, count={}, value={}): {}",
131+
(void *)ShadowBegin, ShadowEnd - ShadowBegin + 1,
132+
(void *)(size_t)Value, URes);
133+
if (URes != UR_RESULT_SUCCESS) {
134+
context.logger.error("urEnqueueUSMFill(): {}", URes);
135+
return URes;
136+
}
137+
} else {
138+
context.logger.error("Unsupport device type");
139+
return UR_RESULT_ERROR_INVALID_ARGUMENT;
140+
}
141+
return UR_RESULT_SUCCESS;
142+
}
143+
48144
} // namespace
49145

50146
SanitizerInterceptor::SanitizerInterceptor() {
@@ -320,179 +416,6 @@ ur_result_t DeviceInfo::allocShadowMemory(ur_context_handle_t Context) {
320416
return UR_RESULT_SUCCESS;
321417
}
322418

323-
ur_result_t SanitizerInterceptor::enqueueMemSetShadow(
324-
ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
325-
ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value) {
326-
327-
if (DeviceInfo->Type == DeviceType::CPU) {
328-
uptr ShadowBegin = MemToShadow_CPU(DeviceInfo->ShadowOffset, Ptr);
329-
uptr ShadowEnd =
330-
MemToShadow_CPU(DeviceInfo->ShadowOffset, Ptr + Size - 1);
331-
332-
// Poison shadow memory outside of asan runtime is not allowed, so we
333-
// need to avoid memset's call from being intercepted.
334-
static auto MemSet =
335-
(void *(*)(void *, int, size_t))GetMemFunctionPointer("memset");
336-
if (!MemSet) {
337-
return UR_RESULT_ERROR_UNKNOWN;
338-
}
339-
340-
MemSet((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1);
341-
context.logger.debug("enqueueMemSetShadow(addr={}, count={}, value={})",
342-
(void *)ShadowBegin, ShadowEnd - ShadowBegin + 1,
343-
(void *)(size_t)Value);
344-
} else if (DeviceInfo->Type == DeviceType::GPU_PVC) {
345-
uptr ShadowBegin = MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr);
346-
uptr ShadowEnd =
347-
MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr + Size - 1);
348-
349-
{
350-
static const size_t PageSize =
351-
GetVirtualMemGranularity(Context, DeviceInfo->Handle);
352-
353-
ur_physical_mem_properties_t Desc{
354-
UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, nullptr, 0};
355-
static ur_physical_mem_handle_t PhysicalMem{};
356-
357-
// Make sure [Ptr, Ptr + Size] is mapped to physical memory
358-
for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize);
359-
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
360-
if (!PhysicalMem) {
361-
auto URes = context.urDdiTable.PhysicalMem.pfnCreate(
362-
Context, DeviceInfo->Handle, PageSize, &Desc,
363-
&PhysicalMem);
364-
if (URes != UR_RESULT_SUCCESS) {
365-
context.logger.error("urPhysicalMemCreate(): {}", URes);
366-
return URes;
367-
}
368-
}
369-
370-
context.logger.debug("urVirtualMemMap: {} ~ {}",
371-
(void *)MappedPtr,
372-
(void *)(MappedPtr + PageSize - 1));
373-
374-
// FIXME: No flag to check the failed reason is VA is already mapped
375-
auto URes = context.urDdiTable.VirtualMem.pfnMap(
376-
Context, (void *)MappedPtr, PageSize, PhysicalMem, 0,
377-
UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE);
378-
if (URes != UR_RESULT_SUCCESS) {
379-
context.logger.debug("urVirtualMemMap(): {}", URes);
380-
}
381-
382-
// Initialize to zero
383-
if (URes == UR_RESULT_SUCCESS) {
384-
// Reset PhysicalMem to null since it's been mapped
385-
PhysicalMem = nullptr;
386-
387-
auto URes =
388-
urEnqueueUSMSet(Queue, (void *)MappedPtr, 0, PageSize);
389-
if (URes != UR_RESULT_SUCCESS) {
390-
context.logger.error("urEnqueueUSMFill(): {}", URes);
391-
return URes;
392-
}
393-
}
394-
}
395-
}
396-
397-
auto URes = urEnqueueUSMSet(Queue, (void *)ShadowBegin, Value,
398-
ShadowEnd - ShadowBegin + 1);
399-
context.logger.debug(
400-
"enqueueMemSetShadow (addr={}, count={}, value={}): {}",
401-
(void *)ShadowBegin, ShadowEnd - ShadowBegin + 1,
402-
(void *)(size_t)Value, URes);
403-
if (URes != UR_RESULT_SUCCESS) {
404-
context.logger.error("urEnqueueUSMFill(): {}", URes);
405-
return URes;
406-
}
407-
} else {
408-
context.logger.error("Unsupport device type");
409-
return UR_RESULT_ERROR_INVALID_ARGUMENT;
410-
}
411-
return UR_RESULT_SUCCESS;
412-
}
413-
414-
/// Each 8 bytes of application memory are mapped into one byte of shadow memory
415-
/// The meaning of that byte:
416-
/// - Negative: All bytes are not accessible (poisoned)
417-
/// - 0: All bytes are accessible
418-
/// - 1 <= k <= 7: Only the first k bytes is accessible
419-
///
420-
/// ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#mapping
421-
ur_result_t SanitizerInterceptor::enqueueAllocInfo(
422-
ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
423-
ur_queue_handle_t Queue, std::shared_ptr<AllocInfo> &AI) {
424-
if (AI->IsReleased) {
425-
int ShadowByte;
426-
switch (AI->Type) {
427-
case AllocType::HOST_USM:
428-
ShadowByte = kUsmHostDeallocatedMagic;
429-
break;
430-
case AllocType::DEVICE_USM:
431-
ShadowByte = kUsmDeviceDeallocatedMagic;
432-
break;
433-
case AllocType::SHARED_USM:
434-
ShadowByte = kUsmSharedDeallocatedMagic;
435-
break;
436-
case AllocType::MEM_BUFFER:
437-
ShadowByte = kMemBufferDeallocatedMagic;
438-
break;
439-
default:
440-
ShadowByte = 0xff;
441-
assert(false && "Unknow AllocInfo Type");
442-
}
443-
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, AI->AllocBegin,
444-
AI->AllocSize, ShadowByte));
445-
return UR_RESULT_SUCCESS;
446-
}
447-
448-
// Init zero
449-
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, AI->AllocBegin,
450-
AI->AllocSize, 0));
451-
452-
uptr TailBegin = RoundUpTo(AI->UserEnd, ASAN_SHADOW_GRANULARITY);
453-
uptr TailEnd = AI->AllocBegin + AI->AllocSize;
454-
455-
// User tail
456-
if (TailBegin != AI->UserEnd) {
457-
auto Value =
458-
AI->UserEnd - RoundDownTo(AI->UserEnd, ASAN_SHADOW_GRANULARITY);
459-
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, AI->UserEnd, 1,
460-
static_cast<u8>(Value)));
461-
}
462-
463-
int ShadowByte;
464-
switch (AI->Type) {
465-
case AllocType::HOST_USM:
466-
ShadowByte = kUsmHostRedzoneMagic;
467-
break;
468-
case AllocType::DEVICE_USM:
469-
ShadowByte = kUsmDeviceRedzoneMagic;
470-
break;
471-
case AllocType::SHARED_USM:
472-
ShadowByte = kUsmSharedRedzoneMagic;
473-
break;
474-
case AllocType::MEM_BUFFER:
475-
ShadowByte = kMemBufferRedzoneMagic;
476-
break;
477-
case AllocType::DEVICE_GLOBAL:
478-
ShadowByte = kDeviceGlobalRedzoneMagic;
479-
break;
480-
default:
481-
ShadowByte = 0xff;
482-
assert(false && "Unknow AllocInfo Type");
483-
}
484-
485-
// Left red zone
486-
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, AI->AllocBegin,
487-
AI->UserBegin - AI->AllocBegin, ShadowByte));
488-
489-
// Right red zone
490-
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, TailBegin,
491-
TailEnd - TailBegin, ShadowByte));
492-
493-
return UR_RESULT_SUCCESS;
494-
}
495-
496419
ur_result_t SanitizerInterceptor::updateShadowMemory(
497420
std::shared_ptr<ContextInfo> &ContextInfo,
498421
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue) {
@@ -628,19 +551,20 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
628551

629552
do {
630553
// Write global variable to program
631-
auto EnqueueWriteGlobal =
632-
[Queue, Program](const char *Name, const void *Value, size_t Size) {
633-
auto Result =
634-
context.urDdiTable.Enqueue.pfnDeviceGlobalVariableWrite(
635-
Queue, Program, Name, false, Size, 0, Value, 0, nullptr,
636-
nullptr);
637-
if (Result != UR_RESULT_SUCCESS) {
638-
context.logger.warning("Device Global[{}] Write Failed: {}",
639-
Name, Result);
640-
return false;
641-
}
642-
return true;
643-
};
554+
auto EnqueueWriteGlobal = [Queue, Program](const char *Name,
555+
const void *Value,
556+
size_t Size) {
557+
auto Result =
558+
context.urDdiTable.Enqueue.pfnDeviceGlobalVariableWrite(
559+
Queue, Program, Name, false, Size, 0, Value, 0, nullptr,
560+
nullptr);
561+
if (Result != UR_RESULT_SUCCESS) {
562+
context.logger.warning(
563+
"Failed to write device global \"{}\": {}", Name, Result);
564+
return false;
565+
}
566+
return true;
567+
};
644568

645569
// Write debug
646570
EnqueueWriteGlobal(kSPIR_AsanDebug, &cl_Debug, sizeof(cl_Debug));
@@ -741,30 +665,14 @@ SanitizerInterceptor::findAllocInfoByAddress(uptr Address) {
741665
return --It;
742666
}
743667

744-
LaunchInfo::LaunchInfo(ur_context_handle_t Context,
745-
const size_t *GlobalWorkSize,
746-
const size_t *LocalWorkSize,
747-
const size_t *GlobalWorkOffset, uint32_t WorkDim)
748-
: Context(Context), GlobalWorkSize(GlobalWorkSize),
749-
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) {
750-
[[maybe_unused]] auto Result =
751-
context.urDdiTable.Context.pfnRetain(Context);
752-
assert(Result == UR_RESULT_SUCCESS);
753-
754-
if (LocalWorkSize) {
755-
this->LocalWorkSize =
756-
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
757-
}
758-
}
759-
760668
LaunchInfo::~LaunchInfo() {
669+
[[maybe_unused]] ur_result_t Result;
761670
if (LocalShadowOffset) {
762-
[[maybe_unused]] auto Result =
671+
Result =
763672
context.urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset);
764673
assert(Result == UR_RESULT_SUCCESS);
765674
}
766-
[[maybe_unused]] auto Result =
767-
context.urDdiTable.Context.pfnRelease(Context);
675+
Result = context.urDdiTable.Context.pfnRelease(Context);
768676
assert(Result == UR_RESULT_SUCCESS);
769677
}
770678

source/loader/layers/sanitizer/asan_interceptor.hpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,17 @@ struct LaunchInfo {
120120

121121
LaunchInfo(ur_context_handle_t Context, const size_t *GlobalWorkSize,
122122
const size_t *LocalWorkSize, const size_t *GlobalWorkOffset,
123-
uint32_t WorkDim);
123+
uint32_t WorkDim)
124+
: Context(Context), GlobalWorkSize(GlobalWorkSize),
125+
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) {
126+
[[maybe_unused]] auto Result =
127+
context.urDdiTable.Context.pfnRetain(Context);
128+
assert(Result == UR_RESULT_SUCCESS);
129+
if (LocalWorkSize) {
130+
this->LocalWorkSize =
131+
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
132+
}
133+
}
124134
~LaunchInfo();
125135
};
126136

@@ -189,10 +199,6 @@ class SanitizerInterceptor {
189199

190200
ur_result_t allocShadowMemory(ur_context_handle_t Context,
191201
std::shared_ptr<DeviceInfo> &DeviceInfo);
192-
ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,
193-
std::shared_ptr<DeviceInfo> &DeviceInfo,
194-
ur_queue_handle_t Queue, uptr Addr,
195-
uptr Size, u8 Value);
196202

197203
std::shared_ptr<DeviceInfo> getDeviceInfo(ur_device_handle_t Device) {
198204
std::shared_lock<ur_shared_mutex> Guard(m_DeviceMapMutex);
@@ -209,10 +215,11 @@ class SanitizerInterceptor {
209215
m_DeviceMap;
210216
ur_shared_mutex m_DeviceMapMutex;
211217

212-
/// Assumption: all usm chunks are allocated in one VA
218+
/// Assumption: all USM chunks are allocated in one VA
213219
AllocationMap m_AllocationMap;
214220
ur_shared_mutex m_AllocationMapMutex;
215221

222+
// We use "uint64_t" here because EnqueueWriteGlobal will fail when it's "uint32_t"
216223
uint64_t cl_Debug = 0;
217224
uint32_t cl_MaxQuarantineSizeMB = 0;
218225
bool cl_DetectLocals = true;

0 commit comments

Comments
 (0)