Skip to content

Commit dc588e4

Browse files
committed
[DeviceASAN] Re-use shadow if required size is not larger than last one
1 parent dbd168c commit dc588e4

File tree

3 files changed

+163
-124
lines changed

3 files changed

+163
-124
lines changed

source/loader/layers/sanitizer/asan_interceptor.cpp

Lines changed: 34 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -682,28 +682,6 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
682682
LocalWorkSize[Dim];
683683
}
684684

685-
auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle,
686-
Device = DeviceInfo->Handle,
687-
Queue](size_t Size, uptr &Ptr) {
688-
void *Allocated = nullptr;
689-
auto URes = getContext()->urDdiTable.USM.pfnDeviceAlloc(
690-
Context, Device, nullptr, nullptr, Size, &Allocated);
691-
if (URes != UR_RESULT_SUCCESS) {
692-
return URes;
693-
}
694-
// Initialize shadow memory
695-
URes = EnqueueUSMBlockingSet(Queue, Allocated, 0, Size);
696-
if (URes != UR_RESULT_SUCCESS) {
697-
[[maybe_unused]] auto URes =
698-
getContext()->urDdiTable.USM.pfnFree(Context, Allocated);
699-
assert(URes == UR_RESULT_SUCCESS &&
700-
"urUSMFree failed at allocating shadow memory");
701-
Allocated = nullptr;
702-
}
703-
Ptr = (uptr)Allocated;
704-
return URes;
705-
};
706-
707685
auto LocalMemoryUsage =
708686
GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
709687
auto PrivateMemoryUsage =
@@ -715,86 +693,45 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
715693

716694
// Write shadow memory offset for local memory
717695
if (getOptions().DetectLocals) {
718-
// CPU needn't this
719-
if (DeviceInfo->Type == DeviceType::GPU_PVC ||
720-
DeviceInfo->Type == DeviceType::GPU_DG2) {
721-
const size_t LocalMemorySize =
722-
GetDeviceLocalMemorySize(DeviceInfo->Handle);
723-
const size_t LocalShadowMemorySize =
724-
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
725-
726-
getContext()->logger.debug(
727-
"LocalMemory(WorkGroup={}, LocalMemorySize={}, "
728-
"LocalShadowMemorySize={})",
729-
NumWG, LocalMemorySize, LocalShadowMemorySize);
730-
731-
if (EnqueueAllocateShadowMemory(
732-
LocalShadowMemorySize,
733-
LaunchInfo.Data->LocalShadowOffset) !=
734-
UR_RESULT_SUCCESS) {
735-
getContext()->logger.warning(
736-
"Failed to allocate shadow memory for local "
737-
"memory, maybe the number of workgroup ({}) is too "
738-
"large",
739-
NumWG);
740-
getContext()->logger.warning(
741-
"Skip checking local memory of kernel <{}>",
742-
GetKernelName(Kernel));
743-
} else {
744-
LaunchInfo.Data->LocalShadowOffsetEnd =
745-
LaunchInfo.Data->LocalShadowOffset +
746-
LocalShadowMemorySize - 1;
747-
748-
ContextInfo->Stats.UpdateShadowMalloced(
749-
LocalShadowMemorySize);
750-
751-
getContext()->logger.info(
752-
"ShadowMemory(Local, {} - {})",
753-
(void *)LaunchInfo.Data->LocalShadowOffset,
754-
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
755-
}
696+
if (DeviceInfo->Shadow->AllocLocalShadow(
697+
Queue, NumWG, LaunchInfo.Data->LocalShadowOffset,
698+
LaunchInfo.Data->LocalShadowOffsetEnd) !=
699+
UR_RESULT_SUCCESS) {
700+
getContext()->logger.warning(
701+
"Failed to allocate shadow memory for local "
702+
"memory, maybe the number of workgroup ({}) is too "
703+
"large",
704+
NumWG);
705+
getContext()->logger.warning(
706+
"Skip checking local memory of kernel <{}>",
707+
GetKernelName(Kernel));
708+
} else {
709+
getContext()->logger.info(
710+
"ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG,
711+
(void *)LaunchInfo.Data->LocalShadowOffset,
712+
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
756713
}
757714
}
758715

759716
// Write shadow memory offset for private memory
760717
if (getOptions().DetectPrivates) {
761-
if (DeviceInfo->Type == DeviceType::CPU) {
762-
LaunchInfo.Data->PrivateShadowOffset =
763-
DeviceInfo->Shadow->ShadowBegin;
764-
} else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
765-
DeviceInfo->Type == DeviceType::GPU_DG2) {
766-
const size_t PrivateShadowMemorySize =
767-
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
768-
769-
getContext()->logger.debug("PrivateMemory(WorkGroup={}, "
770-
"PrivateShadowMemorySize={})",
771-
NumWG, PrivateShadowMemorySize);
772-
773-
if (EnqueueAllocateShadowMemory(
774-
PrivateShadowMemorySize,
775-
LaunchInfo.Data->PrivateShadowOffset) !=
776-
UR_RESULT_SUCCESS) {
777-
getContext()->logger.warning(
778-
"Failed to allocate shadow memory for private "
779-
"memory, maybe the number of workgroup ({}) is too "
780-
"large",
781-
NumWG);
782-
getContext()->logger.warning(
783-
"Skip checking private memory of kernel <{}>",
784-
GetKernelName(Kernel));
785-
} else {
786-
LaunchInfo.Data->PrivateShadowOffsetEnd =
787-
LaunchInfo.Data->PrivateShadowOffset +
788-
PrivateShadowMemorySize - 1;
789-
790-
ContextInfo->Stats.UpdateShadowMalloced(
791-
PrivateShadowMemorySize);
792-
793-
getContext()->logger.info(
794-
"ShadowMemory(Private, {} - {})",
795-
(void *)LaunchInfo.Data->PrivateShadowOffset,
796-
(void *)LaunchInfo.Data->PrivateShadowOffsetEnd);
797-
}
718+
if (DeviceInfo->Shadow->AllocPrivateShadow(
719+
Queue, NumWG, LaunchInfo.Data->PrivateShadowOffset,
720+
LaunchInfo.Data->PrivateShadowOffsetEnd) !=
721+
UR_RESULT_SUCCESS) {
722+
getContext()->logger.warning(
723+
"Failed to allocate shadow memory for private "
724+
"memory, maybe the number of workgroup ({}) is too "
725+
"large",
726+
NumWG);
727+
getContext()->logger.warning(
728+
"Skip checking private memory of kernel <{}>",
729+
GetKernelName(Kernel));
730+
} else {
731+
getContext()->logger.info(
732+
"ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG,
733+
(void *)LaunchInfo.Data->PrivateShadowOffset,
734+
(void *)LaunchInfo.Data->PrivateShadowOffsetEnd);
798735
}
799736
}
800737
} while (false);
@@ -878,25 +815,7 @@ ur_result_t USMLaunchInfo::updateKernelInfo(const KernelInfo &KI) {
878815
USMLaunchInfo::~USMLaunchInfo() {
879816
[[maybe_unused]] ur_result_t Result;
880817
if (Data) {
881-
auto Type = GetDeviceType(Context, Device);
882818
auto ContextInfo = getContext()->interceptor->getContextInfo(Context);
883-
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
884-
if (Data->PrivateShadowOffset) {
885-
ContextInfo->Stats.UpdateShadowFreed(
886-
Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset +
887-
1);
888-
Result = getContext()->urDdiTable.USM.pfnFree(
889-
Context, (void *)Data->PrivateShadowOffset);
890-
assert(Result == UR_RESULT_SUCCESS);
891-
}
892-
if (Data->LocalShadowOffset) {
893-
ContextInfo->Stats.UpdateShadowFreed(
894-
Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1);
895-
Result = getContext()->urDdiTable.USM.pfnFree(
896-
Context, (void *)Data->LocalShadowOffset);
897-
assert(Result == UR_RESULT_SUCCESS);
898-
}
899-
}
900819
if (Data->LocalArgs) {
901820
Result = getContext()->urDdiTable.USM.pfnFree(
902821
Context, (void *)Data->LocalArgs);

source/loader/layers/sanitizer/asan_shadow.cpp

Lines changed: 97 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,16 +131,23 @@ ur_result_t ShadowMemoryGPU::Setup() {
131131
}
132132

133133
ur_result_t ShadowMemoryGPU::Destory() {
134-
if (ShadowBegin == 0) {
135-
return UR_RESULT_SUCCESS;
134+
if (PrivateShadowOffset != 0) {
135+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
136+
Context, (void *)PrivateShadowOffset));
137+
PrivateShadowOffset = 0;
136138
}
137-
static ur_result_t Result = [this]() {
138-
auto Result = getContext()->urDdiTable.VirtualMem.pfnFree(
139-
Context, (const void *)ShadowBegin, GetShadowSize());
140-
getContext()->urDdiTable.Context.pfnRelease(Context);
141-
return Result;
142-
}();
143-
return Result;
139+
if (LocalShadowOffset != 0) {
140+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
141+
Context, (void *)LocalShadowOffset));
142+
LocalShadowOffset = 0;
143+
}
144+
if (ShadowBegin != 0) {
145+
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
146+
Context, (const void *)ShadowBegin, GetShadowSize()));
147+
UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context));
148+
ShadowBegin = ShadowEnd = 0;
149+
}
150+
return UR_RESULT_SUCCESS;
144151
}
145152

146153
ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
@@ -255,6 +262,87 @@ ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr<AllocInfo> AI) {
255262
return UR_RESULT_SUCCESS;
256263
}
257264

265+
ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
266+
uint32_t NumWG, uptr &Begin,
267+
uptr &End) {
268+
const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device);
269+
const size_t RequiredShadowSize =
270+
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
271+
static size_t LastAllocedSize = 0;
272+
if (RequiredShadowSize > LastAllocedSize) {
273+
auto ContextInfo = getContext()->interceptor->getContextInfo(Context);
274+
if (LocalShadowOffset) {
275+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
276+
Context, (void *)LocalShadowOffset));
277+
ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize);
278+
LocalShadowOffset = 0;
279+
LastAllocedSize = 0;
280+
}
281+
282+
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
283+
Context, Device, nullptr, nullptr, RequiredShadowSize,
284+
(void **)&LocalShadowOffset));
285+
286+
// Initialize shadow memory
287+
ur_result_t URes = EnqueueUSMBlockingSet(
288+
Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize);
289+
if (URes != UR_RESULT_SUCCESS) {
290+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
291+
Context, (void *)LocalShadowOffset));
292+
LocalShadowOffset = 0;
293+
LastAllocedSize = 0;
294+
}
295+
296+
ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize);
297+
298+
LastAllocedSize = RequiredShadowSize;
299+
}
300+
301+
Begin = LocalShadowOffset;
302+
End = LocalShadowOffset + RequiredShadowSize - 1;
303+
return UR_RESULT_SUCCESS;
304+
}
305+
306+
ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
307+
uint32_t NumWG, uptr &Begin,
308+
uptr &End) {
309+
const size_t RequiredShadowSize =
310+
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
311+
static size_t LastAllocedSize = 0;
312+
if (RequiredShadowSize > LastAllocedSize) {
313+
auto ContextInfo = getContext()->interceptor->getContextInfo(Context);
314+
if (PrivateShadowOffset) {
315+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
316+
Context, (void *)PrivateShadowOffset));
317+
ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize);
318+
PrivateShadowOffset = 0;
319+
LastAllocedSize = 0;
320+
}
321+
322+
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
323+
Context, Device, nullptr, nullptr, RequiredShadowSize,
324+
(void **)&PrivateShadowOffset));
325+
326+
// Initialize shadow memory
327+
ur_result_t URes = EnqueueUSMBlockingSet(
328+
Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize);
329+
if (URes != UR_RESULT_SUCCESS) {
330+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
331+
Context, (void *)PrivateShadowOffset));
332+
PrivateShadowOffset = 0;
333+
LastAllocedSize = 0;
334+
}
335+
336+
ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize);
337+
338+
LastAllocedSize = RequiredShadowSize;
339+
}
340+
341+
Begin = PrivateShadowOffset;
342+
End = PrivateShadowOffset + RequiredShadowSize - 1;
343+
return UR_RESULT_SUCCESS;
344+
}
345+
258346
uptr ShadowMemoryPVC::MemToShadow(uptr Ptr) {
259347
if (Ptr & 0xFF00000000000000ULL) { // Device USM
260348
return ShadowBegin + 0x80000000000ULL +

source/loader/layers/sanitizer/asan_shadow.hpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ struct ShadowMemory {
3939

4040
virtual size_t GetShadowSize() = 0;
4141

42+
virtual ur_result_t AllocLocalShadow(ur_queue_handle_t Queue,
43+
uint32_t NumWG, uptr &Begin,
44+
uptr &End) = 0;
45+
46+
virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue,
47+
uint32_t NumWG, uptr &Begin,
48+
uptr &End) = 0;
49+
4250
ur_context_handle_t Context{};
4351

4452
ur_device_handle_t Device{};
@@ -62,6 +70,20 @@ struct ShadowMemoryCPU final : public ShadowMemory {
6270
uptr Size, u8 Value) override;
6371

6472
size_t GetShadowSize() override { return 0x80000000000ULL; }
73+
74+
ur_result_t AllocLocalShadow(ur_queue_handle_t, uint32_t, uptr &Begin,
75+
uptr &End) override {
76+
Begin = ShadowBegin;
77+
End = ShadowEnd;
78+
return UR_RESULT_SUCCESS;
79+
}
80+
81+
ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr &Begin,
82+
uptr &End) override {
83+
Begin = ShadowBegin;
84+
End = ShadowEnd;
85+
return UR_RESULT_SUCCESS;
86+
}
6587
};
6688

6789
struct ShadowMemoryGPU : public ShadowMemory {
@@ -76,12 +98,22 @@ struct ShadowMemoryGPU : public ShadowMemory {
7698

7799
ur_result_t ReleaseShadow(std::shared_ptr<AllocInfo> AI) override final;
78100

101+
ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG,
102+
uptr &Begin, uptr &End) override final;
103+
104+
ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumWG,
105+
uptr &Begin, uptr &End) override final;
106+
79107
ur_mutex VirtualMemMapsMutex;
80108

81109
std::unordered_map<
82110
uptr, std::pair<ur_physical_mem_handle_t,
83111
std::unordered_set<std::shared_ptr<AllocInfo>>>>
84112
VirtualMemMaps;
113+
114+
uptr LocalShadowOffset = 0;
115+
116+
uptr PrivateShadowOffset = 0;
85117
};
86118

87119
/// Shadow Memory layout of GPU PVC device

0 commit comments

Comments
 (0)