Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions source/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ if(UR_ENABLE_SANITIZER)
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_quarantine.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_report.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_report.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow_setup.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow_setup.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_statistics.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_statistics.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_validator.cpp
Expand Down
353 changes: 114 additions & 239 deletions source/loader/layers/sanitizer/asan_interceptor.cpp

Large diffs are not rendered by default.

48 changes: 42 additions & 6 deletions source/loader/layers/sanitizer/asan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "asan_buffer.hpp"
#include "asan_libdevice.hpp"
#include "asan_options.hpp"
#include "asan_shadow.hpp"
#include "asan_statistics.hpp"
#include "common.hpp"
#include "ur_sanitizer_layer.hpp"
Expand All @@ -41,8 +42,7 @@ struct DeviceInfo {

DeviceType Type = DeviceType::UNKNOWN;
size_t Alignment = 0;
uptr ShadowOffset = 0;
uptr ShadowOffsetEnd = 0;
std::shared_ptr<ShadowMemory> Shadow;

// Device features
bool IsSupportSharedSystemUSM = false;
Expand Down Expand Up @@ -106,6 +106,27 @@ struct KernelInfo {
}
};

struct ProgramInfo {
ur_program_handle_t Handle;
std::atomic<int32_t> RefCount = 1;

// lock this mutex if following fields are accessed
ur_shared_mutex Mutex;
std::unordered_set<std::shared_ptr<AllocInfo>> AllocInfoForGlobals;

explicit ProgramInfo(ur_program_handle_t Program) : Handle(Program) {
[[maybe_unused]] auto Result =
getContext()->urDdiTable.Program.pfnRetain(Handle);
assert(Result == UR_RESULT_SUCCESS);
}

~ProgramInfo() {
[[maybe_unused]] auto Result =
getContext()->urDdiTable.Program.pfnRelease(Handle);
assert(Result == UR_RESULT_SUCCESS);
}
};

struct ContextInfo {
ur_context_handle_t Handle;
std::atomic<int32_t> RefCount = 1;
Expand Down Expand Up @@ -178,8 +199,10 @@ class SanitizerInterceptor {
AllocType Type, void **ResultPtr);
ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr);

ur_result_t registerDeviceGlobals(ur_context_handle_t Context,
ur_program_handle_t Program);
ur_result_t registerProgram(ur_context_handle_t Context,
ur_program_handle_t Program);

ur_result_t unregisterProgram(ur_program_handle_t Program);

ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
Expand All @@ -197,6 +220,9 @@ class SanitizerInterceptor {
std::shared_ptr<DeviceInfo> &CI);
ur_result_t eraseDevice(ur_device_handle_t Device);

ur_result_t insertProgram(ur_program_handle_t Program);
ur_result_t eraseProgram(ur_program_handle_t Program);

ur_result_t insertKernel(ur_kernel_handle_t Kernel);
ur_result_t eraseKernel(ur_kernel_handle_t Kernel);

Expand Down Expand Up @@ -231,6 +257,12 @@ class SanitizerInterceptor {
return m_DeviceMap[Device];
}

std::shared_ptr<ProgramInfo> getProgramInfo(ur_program_handle_t Program) {
std::shared_lock<ur_shared_mutex> Guard(m_ProgramMapMutex);
assert(m_ProgramMap.find(Program) != m_ProgramMap.end());
return m_ProgramMap[Program];
}

std::shared_ptr<KernelInfo> getKernelInfo(ur_kernel_handle_t Kernel) {
std::shared_lock<ur_shared_mutex> Guard(m_KernelMapMutex);
assert(m_KernelMap.find(Kernel) != m_KernelMap.end());
Expand All @@ -243,8 +275,8 @@ class SanitizerInterceptor {
ur_result_t updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue);
ur_result_t enqueueAllocInfo(std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo,

ur_result_t enqueueAllocInfo(std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue,
std::shared_ptr<AllocInfo> &AI);

Expand All @@ -266,6 +298,10 @@ class SanitizerInterceptor {
m_DeviceMap;
ur_shared_mutex m_DeviceMapMutex;

std::unordered_map<ur_program_handle_t, std::shared_ptr<ProgramInfo>>
m_ProgramMap;
ur_shared_mutex m_ProgramMapMutex;

std::unordered_map<ur_kernel_handle_t, std::shared_ptr<KernelInfo>>
m_KernelMap;
ur_shared_mutex m_KernelMapMutex;
Expand Down
2 changes: 1 addition & 1 deletion source/loader/layers/sanitizer/asan_options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ struct AsanOptions {
bool Debug = false;
uint64_t MinRZSize = 16;
uint64_t MaxRZSize = 2048;
uint32_t MaxQuarantineSizeMB = 0;
uint32_t MaxQuarantineSizeMB = 8;
bool DetectLocals = true;
bool DetectPrivates = true;
bool PrintStats = false;
Expand Down
270 changes: 270 additions & 0 deletions source/loader/layers/sanitizer/asan_shadow.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
/*
*
* Copyright (C) 2024 Intel Corporation
*
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
* See LICENSE.TXT
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* @file asan_shadow.cpp
*
*/

#include "asan_shadow.hpp"
#include "asan_interceptor.hpp"
#include "asan_libdevice.hpp"
#include "ur_sanitizer_layer.hpp"
#include "ur_sanitizer_utils.hpp"

namespace ur_sanitizer_layer {

std::shared_ptr<ShadowMemory> GetShadowMemory(ur_context_handle_t Context,
ur_device_handle_t Device,
DeviceType Type) {
if (Type == DeviceType::CPU) {
static std::shared_ptr<ShadowMemory> ShadowCPU =
std::make_shared<ShadowMemoryCPU>(Context, Device);
return ShadowCPU;
} else if (Type == DeviceType::GPU_PVC) {
static std::shared_ptr<ShadowMemory> ShadowPVC =
std::make_shared<ShadowMemoryPVC>(Context, Device);
return ShadowPVC;
} else if (Type == DeviceType::GPU_DG2) {
static std::shared_ptr<ShadowMemory> ShadowDG2 =
std::make_shared<ShadowMemoryDG2>(Context, Device);
return ShadowDG2;
} else {
getContext()->logger.error("Unsupport device type");
return nullptr;
}
}

ur_result_t ShadowMemoryCPU::Setup() {
static ur_result_t Result = [this]() {
size_t ShadowSize = GetShadowSize();
ShadowBegin = MmapNoReserve(0, ShadowSize);
if (ShadowBegin == 0) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
DontCoredumpRange(ShadowBegin, ShadowSize);
ShadowEnd = ShadowBegin + ShadowSize;

// Set shadow memory for null pointer
auto URes = EnqueuePoisonShadow({}, 0, 1, kNullPointerRedzoneMagic);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("EnqueuePoisonShadow(NullPointerRZ): {}",
URes);
return URes;
}
return URes;
}();
return Result;
}

ur_result_t ShadowMemoryCPU::Destory() {
if (ShadowBegin == 0) {
return UR_RESULT_SUCCESS;
}
static ur_result_t Result = [this]() {
if (!Munmap(ShadowBegin, GetShadowSize())) {
return UR_RESULT_ERROR_UNKNOWN;
}
return UR_RESULT_SUCCESS;
}();
return Result;
}

uptr ShadowMemoryCPU::MemToShadow(uptr Ptr) {
return ShadowBegin + (Ptr >> ASAN_SHADOW_SCALE);
}

ur_result_t ShadowMemoryCPU::EnqueuePoisonShadow(ur_queue_handle_t, uptr Ptr,
uptr Size, u8 Value) {
if (Size == 0) {
return UR_RESULT_SUCCESS;
}

uptr ShadowBegin = MemToShadow(Ptr);
uptr ShadowEnd = MemToShadow(Ptr + Size - 1);
assert(ShadowBegin <= ShadowEnd);
getContext()->logger.debug(
"EnqueuePoisonShadow(addr={}, count={}, value={})", (void *)ShadowBegin,
ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value);
memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1);

return UR_RESULT_SUCCESS;
}

ur_result_t ShadowMemoryGPU::Setup() {
// Currently, Level-Zero doesn't create independent VAs for each contexts, if we reserve
// shadow memory for each contexts, this will cause out-of-resource error when user uses
// multiple contexts. Therefore, we just create one shadow memory here.
static ur_result_t Result = [this]() {
size_t ShadowSize = GetShadowSize();
// TODO: Protect Bad Zone
auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve(
Context, nullptr, ShadowSize, (void **)&ShadowBegin);
if (Result == UR_RESULT_SUCCESS) {
ShadowEnd = ShadowBegin + ShadowSize;
// Retain the context which reserves shadow memory
getContext()->urDdiTable.Context.pfnRetain(Context);
}

// Set shadow memory for null pointer
ManagedQueue Queue(Context, Device);

Result = EnqueuePoisonShadow(Queue, 0, 1, kNullPointerRedzoneMagic);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.error("EnqueuePoisonShadow(NullPointerRZ): {}",
Result);
return Result;
}
return Result;
}();
return Result;
}

ur_result_t ShadowMemoryGPU::Destory() {
if (ShadowBegin == 0) {
return UR_RESULT_SUCCESS;
}
static ur_result_t Result = [this]() {
auto Result = getContext()->urDdiTable.VirtualMem.pfnFree(
Context, (const void *)ShadowBegin, GetShadowSize());
getContext()->urDdiTable.Context.pfnRelease(Context);
return Result;
}();
return Result;
}

ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
uptr Ptr, uptr Size,
u8 Value) {
if (Size == 0) {
return UR_RESULT_SUCCESS;
}

uptr ShadowBegin = MemToShadow(Ptr);
uptr ShadowEnd = MemToShadow(Ptr + Size - 1);
assert(ShadowBegin <= ShadowEnd);
{
static const size_t PageSize =
GetVirtualMemGranularity(Context, Device);

ur_physical_mem_properties_t Desc{
UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, nullptr, 0};

// Make sure [Ptr, Ptr + Size] is mapped to physical memory
for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize);
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
std::scoped_lock<ur_mutex> Guard(VirtualMemMapsMutex);
if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) {
ur_physical_mem_handle_t PhysicalMem{};
auto URes = getContext()->urDdiTable.PhysicalMem.pfnCreate(
Context, Device, PageSize, &Desc, &PhysicalMem);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("urPhysicalMemCreate(): {}",
URes);
return URes;
}

URes = getContext()->urDdiTable.VirtualMem.pfnMap(
Context, (void *)MappedPtr, PageSize, PhysicalMem, 0,
UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("urVirtualMemMap({}, {}): {}",
(void *)MappedPtr, PageSize,
URes);
return URes;
}

getContext()->logger.debug("urVirtualMemMap: {} ~ {}",
(void *)MappedPtr,
(void *)(MappedPtr + PageSize - 1));

// Initialize to zero
URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0,
PageSize);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("EnqueueUSMBlockingSet(): {}",
URes);
return URes;
}

VirtualMemMaps[MappedPtr].first = PhysicalMem;
}

// We don't need to record virtual memory map for null pointer,
// since it doesn't have an alloc info.
if (Ptr == 0) {
continue;
}

auto AllocInfoIt =
getContext()->interceptor->findAllocInfoByAddress(Ptr);
assert(AllocInfoIt);
VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second);
}
}

auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value,
ShadowEnd - ShadowBegin + 1);
getContext()->logger.debug(
"EnqueuePoisonShadow (addr={}, count={}, value={}): {}",
(void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value,
URes);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("EnqueueUSMBlockingSet(): {}", URes);
return URes;
}

return UR_RESULT_SUCCESS;
}

ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr<AllocInfo> AI) {
uptr ShadowBegin = MemToShadow(AI->AllocBegin);
uptr ShadowEnd = MemToShadow(AI->AllocBegin + AI->AllocSize);
assert(ShadowBegin <= ShadowEnd);

static const size_t PageSize = GetVirtualMemGranularity(Context, Device);

for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize);
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
std::scoped_lock<ur_mutex> Guard(VirtualMemMapsMutex);
if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) {
continue;
}
VirtualMemMaps[MappedPtr].second.erase(AI);
if (VirtualMemMaps[MappedPtr].second.empty()) {
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap(
Context, (void *)MappedPtr, PageSize));
UR_CALL(getContext()->urDdiTable.PhysicalMem.pfnRelease(
VirtualMemMaps[MappedPtr].first));
getContext()->logger.debug("urVirtualMemUnmap: {} ~ {}",
(void *)MappedPtr,
(void *)(MappedPtr + PageSize - 1));
}
}

return UR_RESULT_SUCCESS;
}

uptr ShadowMemoryPVC::MemToShadow(uptr Ptr) {
if (Ptr & 0xFF00000000000000ULL) { // Device USM
return ShadowBegin + 0x80000000000ULL +
((Ptr & 0xFFFFFFFFFFFFULL) >> ASAN_SHADOW_SCALE);
} else { // Only consider 47bit VA
return ShadowBegin + ((Ptr & 0x7FFFFFFFFFFFULL) >> ASAN_SHADOW_SCALE);
}
}

uptr ShadowMemoryDG2::MemToShadow(uptr Ptr) {
if (Ptr & 0xFFFF000000000000ULL) { // Device USM
return ShadowBegin + 0x80000000000ULL +
((Ptr & 0x7FFFFFFFFFFFULL) >> ASAN_SHADOW_SCALE);
} else { // Host/Shared USM
return ShadowBegin + (Ptr >> ASAN_SHADOW_SCALE);
}
}

} // namespace ur_sanitizer_layer
Loading
Loading