Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 69 additions & 66 deletions source/loader/layers/sanitizer/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context,

ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
USMLaunchInfo &LaunchInfo) {
LaunchInfo &LaunchInfo) {
auto Context = GetContext(Queue);
auto Device = GetDevice(Queue);
auto ContextInfo = getContextInfo(Context);
Expand All @@ -268,12 +268,14 @@ ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,

ur_result_t SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
USMLaunchInfo &LaunchInfo) {
LaunchInfo &LaunchInfo) {
// FIXME: We must use block operation here, until we support urEventSetCallback
auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue);

UR_CALL(LaunchInfo.Data.syncFromDevice(Queue));

if (Result == UR_RESULT_SUCCESS) {
for (const auto &AH : LaunchInfo.Data->SanitizerReport) {
for (const auto &AH : LaunchInfo.Data.Host.SanitizerReport) {
if (!AH.Flag) {
continue;
}
Expand Down Expand Up @@ -600,7 +602,7 @@ SanitizerInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
ur_result_t SanitizerInterceptor::prepareLaunch(
std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) {
ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {

do {
auto KernelInfo = getKernelInfo(Kernel);
Expand Down Expand Up @@ -635,27 +637,6 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
}
}

// Set launch info argument
auto ArgNums = GetKernelNumArgs(Kernel);
if (ArgNums) {
getContext()->logger.debug(
"launch_info {} (numLocalArgs={}, localArgs={})",
(void *)LaunchInfo.Data, LaunchInfo.Data->NumLocalArgs,
(void *)LaunchInfo.Data->LocalArgs);
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("Failed to set launch info: {}",
URes);
return URes;
}
}

LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
LaunchInfo.Data->DeviceTy = DeviceInfo->Type;
LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0;

if (LaunchInfo.LocalWorkSize.empty()) {
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
auto URes =
Expand All @@ -682,6 +663,34 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
LocalWorkSize[Dim];
}

// Set launch info argument
auto ArgNums = GetKernelNumArgs(Kernel);
if (ArgNums == 0) {
return UR_RESULT_SUCCESS;
}

LaunchInfo.Data.Host.GlobalShadowOffset =
DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data.Host.GlobalShadowOffsetEnd =
DeviceInfo->Shadow->ShadowEnd;
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;

UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr,
sizeof(LaunchInfo), (void **)&LaunchInfo.Data.DevicePtr));
getContext()->logger.debug(
"launch_info {} (numLocalArgs={}, localArgs={})",
(void *)LaunchInfo.Data.DevicePtr,
LaunchInfo.Data.Host.NumLocalArgs,
(void *)LaunchInfo.Data.Host.LocalArgs);
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.DevicePtr);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("Failed to set launch info: {}", URes);
return URes;
}

auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle,
Device = DeviceInfo->Handle,
Queue](size_t Size, uptr &Ptr) {
Expand Down Expand Up @@ -730,7 +739,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch(

if (EnqueueAllocateShadowMemory(
LocalShadowMemorySize,
LaunchInfo.Data->LocalShadowOffset) !=
LaunchInfo.Data.Host.LocalShadowOffset) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for local "
Expand All @@ -741,25 +750,25 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
"Skip checking local memory of kernel <{}>",
GetKernelName(Kernel));
} else {
LaunchInfo.Data->LocalShadowOffsetEnd =
LaunchInfo.Data->LocalShadowOffset +
LaunchInfo.Data.Host.LocalShadowOffsetEnd =
LaunchInfo.Data.Host.LocalShadowOffset +
LocalShadowMemorySize - 1;

ContextInfo->Stats.UpdateShadowMalloced(
LocalShadowMemorySize);

getContext()->logger.info(
"ShadowMemory(Local, {} - {})",
(void *)LaunchInfo.Data->LocalShadowOffset,
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
}
}
}

// Write shadow memory offset for private memory
if (getOptions().DetectPrivates) {
if (DeviceInfo->Type == DeviceType::CPU) {
LaunchInfo.Data->PrivateShadowOffset =
LaunchInfo.Data.Host.PrivateShadowOffset =
DeviceInfo->Shadow->ShadowBegin;
} else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
DeviceInfo->Type == DeviceType::GPU_DG2) {
Expand All @@ -772,7 +781,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch(

if (EnqueueAllocateShadowMemory(
PrivateShadowMemorySize,
LaunchInfo.Data->PrivateShadowOffset) !=
LaunchInfo.Data.Host.PrivateShadowOffset) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for private "
Expand All @@ -783,20 +792,23 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
"Skip checking private memory of kernel <{}>",
GetKernelName(Kernel));
} else {
LaunchInfo.Data->PrivateShadowOffsetEnd =
LaunchInfo.Data->PrivateShadowOffset +
LaunchInfo.Data.Host.PrivateShadowOffsetEnd =
LaunchInfo.Data.Host.PrivateShadowOffset +
PrivateShadowMemorySize - 1;

ContextInfo->Stats.UpdateShadowMalloced(
PrivateShadowMemorySize);

getContext()->logger.info(
"ShadowMemory(Private, {} - {})",
(void *)LaunchInfo.Data->PrivateShadowOffset,
(void *)LaunchInfo.Data->PrivateShadowOffsetEnd);
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
}
}

// Prepare launch info for device side
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));
} while (false);

return UR_RESULT_SUCCESS;
Expand Down Expand Up @@ -848,61 +860,52 @@ ContextInfo::~ContextInfo() {
}
}

ur_result_t USMLaunchInfo::initialize() {
UR_CALL(getContext()->urDdiTable.Context.pfnRetain(Context));
UR_CALL(getContext()->urDdiTable.Device.pfnRetain(Device));
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
Context, Device, nullptr, nullptr, sizeof(LaunchInfo), (void **)&Data));
*Data = LaunchInfo{};
return UR_RESULT_SUCCESS;
}

ur_result_t USMLaunchInfo::updateKernelInfo(const KernelInfo &KI) {
auto NumArgs = KI.LocalArgs.size();
if (NumArgs) {
Data->NumLocalArgs = NumArgs;
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
Context, Device, nullptr, nullptr, sizeof(LocalArgsInfo) * NumArgs,
(void **)&Data->LocalArgs));
uint32_t i = 0;
ur_result_t LaunchInfo::updateKernelInfo(const KernelInfo &KI) {
if (!KI.LocalArgs.empty()) {
std::vector<LocalArgsInfo> LocalArgsInfo;
for (auto [ArgIndex, ArgInfo] : KI.LocalArgs) {
Data->LocalArgs[i++] = ArgInfo;
LocalArgsInfo.push_back(ArgInfo);
getContext()->logger.debug(
"local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex,
ArgInfo.Size, ArgInfo.SizeWithRedZone);
}
ManagedQueue Queue(Context, Device);
UR_CALL(
Data.importLocalArgsInfo(Context, Device, Queue, LocalArgsInfo));
}
return UR_RESULT_SUCCESS;
}

USMLaunchInfo::~USMLaunchInfo() {
LaunchInfo::~LaunchInfo() {
[[maybe_unused]] ur_result_t Result;
if (Data) {
if (Data.DevicePtr) {
auto Type = GetDeviceType(Context, Device);
auto ContextInfo = getContext()->interceptor->getContextInfo(Context);
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
if (Data->PrivateShadowOffset) {
if (Data.Host.PrivateShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(
Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset +
1);
Data.Host.PrivateShadowOffsetEnd -
Data.Host.PrivateShadowOffset + 1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Data->PrivateShadowOffset);
Context, (void *)Data.Host.PrivateShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
if (Data->LocalShadowOffset) {
if (Data.Host.LocalShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(
Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1);
Data.Host.LocalShadowOffsetEnd -
Data.Host.LocalShadowOffset + 1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Data->LocalShadowOffset);
Context, (void *)Data.Host.LocalShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
}
if (Data->LocalArgs) {
if (Data.Host.LocalArgs) {
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Data->LocalArgs);
Context, (void *)Data.Host.LocalArgs);
assert(Result == UR_RESULT_SUCCESS);
}
Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data);
Result = getContext()->urDdiTable.USM.pfnFree(Context,
(void *)Data.DevicePtr);
assert(Result == UR_RESULT_SUCCESS);
}
Result = getContext()->urDdiTable.Context.pfnRelease(Context);
Expand Down
66 changes: 56 additions & 10 deletions source/loader/layers/sanitizer/asan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,50 @@ struct ContextInfo {
}
};

struct USMLaunchInfo {
LaunchInfo *Data = nullptr;
struct AsanRuntimeDataWrapper {
AsanRuntimeData Host{};

AsanRuntimeData *DevicePtr = nullptr;

ur_result_t syncFromDevice(ur_queue_handle_t Queue) {
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, ur_cast<void *>(&Host), DevicePtr,
sizeof(AsanRuntimeData), 0, nullptr, nullptr));

return UR_RESULT_SUCCESS;
}

ur_result_t syncToDevice(ur_queue_handle_t Queue) {
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, DevicePtr, ur_cast<void *>(&Host),
sizeof(AsanRuntimeData), 0, nullptr, nullptr));

return UR_RESULT_SUCCESS;
}

ur_result_t
importLocalArgsInfo(ur_context_handle_t Context, ur_device_handle_t Device,
ur_queue_handle_t Queue,
const std::vector<LocalArgsInfo> &LocalArgs) {
assert(!LocalArgs.empty());

Host.NumLocalArgs = LocalArgs.size();
const size_t LocalArgsInfoSize =
sizeof(LocalArgsInfo) * Host.NumLocalArgs;
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, nullptr, nullptr, LocalArgsInfoSize,
ur_cast<void **>(&Host.LocalArgs)));

UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, Host.LocalArgs, &LocalArgs[0], LocalArgsInfoSize, 0,
nullptr, nullptr));

return UR_RESULT_SUCCESS;
}
};

struct LaunchInfo {
AsanRuntimeDataWrapper Data{};

ur_context_handle_t Context = nullptr;
ur_device_handle_t Device = nullptr;
Expand All @@ -164,19 +206,23 @@ struct USMLaunchInfo {
std::vector<size_t> LocalWorkSize;
uint32_t WorkDim = 0;

USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device,
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
const size_t *GlobalWorkOffset, uint32_t WorkDim)
LaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device,
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
const size_t *GlobalWorkOffset, uint32_t WorkDim)
: Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize),
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) {
if (LocalWorkSize) {
this->LocalWorkSize =
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
}
[[maybe_unused]] auto Result =
getContext()->urDdiTable.Context.pfnRetain(Context);
assert(Result == UR_RESULT_SUCCESS);
Result = getContext()->urDdiTable.Device.pfnRetain(Device);
assert(Result == UR_RESULT_SUCCESS);
}
~USMLaunchInfo();
~LaunchInfo();

ur_result_t initialize();
ur_result_t updateKernelInfo(const KernelInfo &KI);
};

Expand Down Expand Up @@ -206,11 +252,11 @@ class SanitizerInterceptor {

ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
USMLaunchInfo &LaunchInfo);
LaunchInfo &LaunchInfo);

ur_result_t postLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
USMLaunchInfo &LaunchInfo);
LaunchInfo &LaunchInfo);

ur_result_t insertContext(ur_context_handle_t Context,
std::shared_ptr<ContextInfo> &CI);
Expand Down Expand Up @@ -285,7 +331,7 @@ class SanitizerInterceptor {
std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue,
ur_kernel_handle_t Kernel,
USMLaunchInfo &LaunchInfo);
LaunchInfo &LaunchInfo);

ur_result_t allocShadowMemory(ur_context_handle_t Context,
std::shared_ptr<DeviceInfo> &DeviceInfo);
Expand Down
3 changes: 1 addition & 2 deletions source/loader/layers/sanitizer/asan_libdevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,9 @@ struct LocalArgsInfo {

constexpr std::size_t ASAN_MAX_NUM_REPORTS = 10;

struct LaunchInfo {
struct AsanRuntimeData {
uintptr_t GlobalShadowOffset = 0;
uintptr_t GlobalShadowOffsetEnd = 0;

uintptr_t PrivateShadowOffset = 0;
uintptr_t PrivateShadowOffsetEnd = 0;

Expand Down
7 changes: 3 additions & 4 deletions source/loader/layers/sanitizer/ur_sanddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,10 +458,9 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(

getContext()->logger.debug("==== urEnqueueKernelLaunch");

USMLaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue),
pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
workDim);
UR_CALL(LaunchInfo.initialize());
LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue),
pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
workDim);

UR_CALL(getContext()->interceptor->preLaunchKernel(hKernel, hQueue,
LaunchInfo));
Expand Down
Loading