diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index c75b1e3cc4a36..209e84901ab1e 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -839,8 +839,6 @@ struct AddressSanitizer { bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool maybeInsertDynamicShadowAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); - void instrumentSyclStaticLocalMemory(CallInst *CI, - ArrayRef RetVec); bool instrumentSyclDynamicLocalMemory(Function &F, ArrayRef RetVec); void instrumentInitAsanLaunchInfo(Function &F, const TargetLibraryInfo *TLI); @@ -889,8 +887,6 @@ struct AddressSanitizer { ShadowMapping Mapping; FunctionCallee AsanHandleNoReturnFunc; FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction; - FunctionCallee AsanSetShadowStaticLocalFunc; - FunctionCallee AsanUnpoisonShadowStaticLocalFunc; FunctionCallee AsanSetShadowDynamicLocalFunc; FunctionCallee AsanUnpoisonShadowDynamicLocalFunc; Constant *AsanShadowGlobal; @@ -972,6 +968,10 @@ class ModuleAddressSanitizer { void initializeCallbacks(); void instrumentDeviceGlobal(IRBuilder<> &IRB); + void instrumentSyclStaticLocalMemory(IRBuilder<> &IRB); + void initializeRetVecMap(Function *F); + void initializeKernelCallerMap(Function *F); + void instrumentGlobals(IRBuilder<> &IRB, bool *CtorComdat); void InstrumentGlobalsCOFF(IRBuilder<> &IRB, ArrayRef ExtendedGlobals, @@ -1030,10 +1030,15 @@ class ModuleAddressSanitizer { FunctionCallee AsanUnregisterImageGlobals; FunctionCallee AsanRegisterElfGlobals; FunctionCallee AsanUnregisterElfGlobals; + FunctionCallee AsanSetShadowStaticLocalFunc; + FunctionCallee AsanUnpoisonShadowStaticLocalFunc; Function *AsanCtorFunction = nullptr; Function *AsanDtorFunction = nullptr; GlobalVariable *ModuleName = nullptr; + + DenseMap> KernelToRetVecMap; + DenseMap> FuncToKernelCallerMap; }; // Stack poisoning does not play well with exception handling. @@ -1661,6 +1666,9 @@ static bool isUnsupportedDeviceGlobal(GlobalVariable *G) { if (G->getName().starts_with("__Asan")) return true; + if (G->getAddressSpace() == kSpirOffloadLocalAS) + return true; + Attribute Attr = G->getAttribute("sycl-device-image-scope"); return (!Attr.isStringAttribute() || Attr.getValueAsString() == "false"); } @@ -1765,68 +1773,6 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB, return IRB.CreateAdd(Shadow, ShadowBase); } -static uint64_t getSizeAndRedzoneSizeForLocal(uint64_t Size, - uint64_t Granularity, - uint64_t Alignment) { - uint64_t Res = 0; - if (Size <= 4) - Res = 16; - else if (Size <= 16) - Res = 32; - else if (Size <= 128) - Res = Size + 32; - else if (Size <= 512) - Res = Size + 64; - else if (Size <= 4096) - Res = Size + 128; - else - Res = Size + 256; - return alignTo(std::max(Res, 2 * Granularity), Alignment); -} - -// Instument static local memory -void AddressSanitizer::instrumentSyclStaticLocalMemory( - CallInst *CI, ArrayRef RetVec) { - InstrumentationIRBuilder IRB(CI->getNextNode()); - auto *Size = cast(CI->getArgOperand(0)); - auto *Alignment = cast(CI->getArgOperand(1)); - - const auto Granularity = 1 << Mapping.Scale; - // The base address of local memory needs to align to granularity - const auto Align = alignTo(Alignment->getZExtValue(), Granularity); - - auto *SizeWithRedZone = ConstantInt::get( - IntptrTy, getSizeAndRedzoneSizeForLocal(Size->getZExtValue(), Granularity, - Alignment->getZExtValue())); - - auto *NewCI = - IRB.CreateCall(CI->getCalledFunction(), - {SizeWithRedZone, ConstantInt::get(IntptrTy, Align)}); - - // __asan_set_shadow_static_local( - // uptr beg, - // size_t size, - // size_t size_with_redzone, - // ) - auto LocalAddr = IRB.CreatePointerCast(NewCI, IntptrTy); - IRB.CreateCall(AsanSetShadowStaticLocalFunc, - {LocalAddr, Size, SizeWithRedZone}); - - // __asan_unpoison_shadow_static_local( - // uptr beg, - // size_t size, - // size_t size_with_redzone, - // ) - for (Instruction *Ret : RetVec) { - IRBuilder<> IRBRet(Ret); - IRBRet.CreateCall(AsanUnpoisonShadowStaticLocalFunc, - {LocalAddr, Size, SizeWithRedZone}); - } - - CI->replaceAllUsesWith(NewCI); - CI->eraseFromParent(); -} - // Instument dynamic local memory bool AddressSanitizer::instrumentSyclDynamicLocalMemory( Function &F, ArrayRef RetVec) { @@ -2810,6 +2756,24 @@ void ModuleAddressSanitizer::initializeCallbacks() { AsanUnregisterElfGlobals = M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, IntptrTy); + + // __asan_set_shadow_static_local( + // uptr ptr, + // size_t size, + // size_t size_with_redzone + // ) + AsanSetShadowStaticLocalFunc = + M.getOrInsertFunction("__asan_set_shadow_static_local", IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy); + + // __asan_unpoison_shadow_static_local( + // uptr ptr, + // size_t size, + // size_t size_with_redzone + // ) + AsanUnpoisonShadowStaticLocalFunc = + M.getOrInsertFunction("__asan_unpoison_shadow_static_local", + IRB.getVoidTy(), IntptrTy, IntptrTy, IntptrTy); } // Put the metadata and the instrumented global in the same group. This ensures @@ -2949,6 +2913,164 @@ void ModuleAddressSanitizer::instrumentDeviceGlobal(IRBuilder<> &IRB) { G->eraseFromParent(); } +static void getFunctionsOfUser(User *User, DenseSet &Functions) { + if (Instruction *Inst = dyn_cast(User)) { + Functions.insert(Inst->getFunction()); + } else if (ConstantExpr *CE = dyn_cast(User)) { + for (auto *U : CE->users()) + getFunctionsOfUser(U, Functions); + } +} + +void ModuleAddressSanitizer::initializeRetVecMap(Function *F) { + if (KernelToRetVecMap.find(F) != KernelToRetVecMap.end()) + return; + + SmallVector RetVec; + for (auto &BB : *F) { + for (auto &Inst : BB) { + if (ReturnInst *RI = dyn_cast(&Inst)) { + if (CallInst *CI = RI->getParent()->getTerminatingMustTailCall()) + RetVec.push_back(CI); + else + RetVec.push_back(RI); + } else if (ResumeInst *RI = dyn_cast(&Inst)) { + RetVec.push_back(RI); + } else if (CleanupReturnInst *CRI = dyn_cast(&Inst)) { + RetVec.push_back(CRI); + } + } + } + + KernelToRetVecMap[F] = std::move(RetVec); +} + +void ModuleAddressSanitizer::initializeKernelCallerMap(Function *F) { + if (FuncToKernelCallerMap.find(F) != FuncToKernelCallerMap.end()) + return; + + for (auto *U : F->users()) { + if (Instruction *Inst = dyn_cast(U)) { + Function *Caller = Inst->getFunction(); + if (Caller->getCallingConv() == CallingConv::SPIR_KERNEL) { + FuncToKernelCallerMap[F].insert(Caller); + continue; + } + initializeKernelCallerMap(Caller); + FuncToKernelCallerMap[F].insert(FuncToKernelCallerMap[Caller].begin(), + FuncToKernelCallerMap[Caller].end()); + } + } +} + +// Instument static local memory +void ModuleAddressSanitizer::instrumentSyclStaticLocalMemory(IRBuilder<> &IRB) { + auto &DL = M.getDataLayout(); + SmallVector GlobalsToRemove; + SmallVector LocalGlobals; + + Type *IntptrTy = M.getDataLayout().getIntPtrType(*C, kSpirOffloadGlobalAS); + + // Step1. Create a new global variable with enough space for a redzone. + for (auto &G : M.globals()) { + if (G.getAddressSpace() != kSpirOffloadLocalAS) + continue; + if (G.getName().starts_with("__Asan")) + continue; + + Type *Ty = G.getValueType(); + const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); + const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes); + Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); + StructType *NewTy = StructType::get(Ty, RightRedZoneTy); + Constant *NewInitializer = + G.hasInitializer() + ? ConstantStruct::get(NewTy, G.getInitializer(), + Constant::getNullValue(RightRedZoneTy)) + : nullptr; + + GlobalVariable *NewGlobal = new GlobalVariable( + M, NewTy, G.isConstant(), G.getLinkage(), NewInitializer, "", &G, + G.getThreadLocalMode(), G.getAddressSpace()); + NewGlobal->copyAttributesFrom(&G); + NewGlobal->setComdat(G.getComdat()); + NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal())); + NewGlobal->copyMetadata(&G, 0); + + Value *Indices2[2]; + Indices2[0] = IRB.getInt32(0); + Indices2[1] = IRB.getInt32(0); + + G.replaceAllUsesWith( + ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); + NewGlobal->takeName(&G); + GlobalsToRemove.push_back(&G); + LocalGlobals.push_back(NewGlobal); + } + + if (GlobalsToRemove.empty()) + return; + + for (auto *G : GlobalsToRemove) + G->eraseFromParent(); + + // Step2. Instrument initialization functions on kernel + DenseMap FuncToLaunchInfoMap; + auto Instrument = [&](GlobalVariable *G, Function *F) { + StructType *Type = cast(G->getValueType()); + const uint64_t Size = DL.getTypeAllocSize(Type->getElementType(0)); + const uint64_t SizeWithRedZone = DL.getTypeAllocSize(Type); + + // Poison shadow of static local memory + if (FuncToLaunchInfoMap.find(F) == FuncToLaunchInfoMap.end()) { + for (auto &Inst : F->getEntryBlock()) { + auto *SI = dyn_cast(&Inst); + if (SI && (SI->getPointerOperand()->getName() == "__AsanLaunchInfo")) { + FuncToLaunchInfoMap[F] = &Inst; + break; + } + } + } + assert(FuncToLaunchInfoMap.find(F) != FuncToLaunchInfoMap.end() && + "All spir kernels should be instrumented."); + + IRBuilder<> Builder(FuncToLaunchInfoMap[F]->getNextNode()); + Builder.CreateCall(AsanSetShadowStaticLocalFunc, + {Builder.CreatePointerCast(G, IntptrTy), + ConstantInt::get(IntptrTy, Size), + ConstantInt::get(IntptrTy, SizeWithRedZone)}); + + // Unpoison shadow of static local memory, required by CPU device + initializeRetVecMap(F); + for (auto *RI : KernelToRetVecMap[F]) { + IRBuilder<> Builder(RI); + Builder.CreateCall(AsanUnpoisonShadowStaticLocalFunc, + {Builder.CreatePointerCast(G, IntptrTy), + ConstantInt::get(IntptrTy, Size), + ConstantInt::get(IntptrTy, SizeWithRedZone)}); + } + }; + + // We only instrument on spir_kernel, because local variables are + // kind of global variable + for (auto *G : LocalGlobals) { + DenseSet InstrumentedFunc; + for (auto *User : G->users()) + getFunctionsOfUser(User, InstrumentedFunc); + for (Function *F : InstrumentedFunc) { + if (F->getCallingConv() == CallingConv::SPIR_KERNEL) { + Instrument(G, F); + continue; + } + // Get root spir_kernel of spir_func + initializeKernelCallerMap(F); + for (Function *Kernel : FuncToKernelCallerMap[F]) + if (!InstrumentedFunc.contains(Kernel)) + Instrument(G, Kernel); + } + } +} + void ModuleAddressSanitizer::InstrumentGlobalsCOFF( IRBuilder<> &IRB, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers) { @@ -3412,12 +3534,10 @@ bool ModuleAddressSanitizer::instrumentModule() { } if (TargetTriple.isSPIROrSPIRV()) { - // Add module metadata "device.sanitizer" for sycl-post-link - LLVMContext &Ctx = M.getContext(); - auto *MD = M.getOrInsertNamedMetadata("device.sanitizer"); - Metadata *MDVals[] = {MDString::get(Ctx, "asan")}; - MD->addOperand(MDNode::get(Ctx, MDVals)); - + if (ClSpirOffloadLocals) { + IRBuilder<> IRB(*C); + instrumentSyclStaticLocalMemory(IRB); + } if (ClDeviceGlobals) { IRBuilder<> IRB(*C); instrumentDeviceGlobal(IRB); @@ -3564,23 +3684,6 @@ void AddressSanitizer::initializeCallbacks(const TargetLibraryInfo *TLI) { ArrayType::get(IRB.getInt8Ty(), 0)); if (TargetTriple.isSPIROrSPIRV()) { - // __asan_set_shadow_static_local( - // uptr ptr, - // size_t size, - // size_t size_with_redzone - // ) - AsanSetShadowStaticLocalFunc = - M.getOrInsertFunction("__asan_set_shadow_static_local", IRB.getVoidTy(), - IntptrTy, IntptrTy, IntptrTy); - - // __asan_unpoison_shadow_static_local( - // uptr ptr, - // size_t size, - // ) - AsanUnpoisonShadowStaticLocalFunc = - M.getOrInsertFunction("__asan_unpoison_shadow_static_local", - IRB.getVoidTy(), IntptrTy, IntptrTy, IntptrTy); - // __asan_set_shadow_dynamic_local( // uptr ptr, // uint32_t num_args @@ -3747,7 +3850,6 @@ bool AddressSanitizer::instrumentFunction(Function &F, SmallVector NoReturnCalls; SmallVector AllBlocks; SmallVector PointerComparisonsOrSubtracts; - SmallVector SyclAllocateLocalMemoryCalls; // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -3800,16 +3902,8 @@ bool AddressSanitizer::instrumentFunction(Function &F, NoReturnCalls.push_back(CB); } } - if (CallInst *CI = dyn_cast(&Inst)) { - if (TargetTriple.isSPIROrSPIRV() && CI->getCalledFunction() && - CI->getCalledFunction()->getCallingConv() == - llvm::CallingConv::SPIR_FUNC && - CI->getCalledFunction()->getName() == - "__sycl_allocateLocalMemory") - SyclAllocateLocalMemoryCalls.push_back(CI); - else - maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); - } + if (CallInst *CI = dyn_cast(&Inst)) + maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); } if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; } @@ -3855,13 +3949,9 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (ChangedStack || !NoReturnCalls.empty()) FunctionModified = true; - // We need to instrument dynamic/static local arguments after stack poisoner + // We need to instrument dynamic local arguments after stack poisoner if (TargetTriple.isSPIROrSPIRV()) { - for (auto *CI : SyclAllocateLocalMemoryCalls) { - instrumentSyclStaticLocalMemory(CI, FSP.RetVec); - FunctionModified = true; - } - if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { + if (ClSpirOffloadLocals && F.getCallingConv() == CallingConv::SPIR_KERNEL) { FunctionModified |= instrumentSyclDynamicLocalMemory(F, FSP.RetVec); } } diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_local_addess_space.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_local_addess_space.ll index cdce149f39270..adbc1873c4c43 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_local_addess_space.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_local_addess_space.ll @@ -7,20 +7,20 @@ target triple = "spir64-unknown-unknown" %"class.sycl::_V1::detail::array" = type { [1 x i64] } %"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } -declare dso_local spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 noundef %0, i64 noundef %1) local_unnamed_addr #1 +@WGLocalMem = internal addrspace(3) global [64 x i8] poison, align 4 -define spir_kernel void @kernel_static_local() #0 { +define spir_kernel void @kernel_static_local() sanitize_address { ; CHECK-LABEL: define spir_kernel void @kernel_static_local entry: - %1 = tail call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 noundef 16, i64 noundef 4) - ; CHECK: [[T0:%.*]] = call ptr addrspace(3) @__sycl_allocateLocalMemory(i64 32, i64 8) - ; CHECK-NEXT: [[T1:%.*]] = ptrtoint ptr addrspace(3) [[T0]] to i64 - ; CHECK-NEXT: call void @__asan_set_shadow_static_local(i64 [[T1]], i64 16, i64 32) - ; CHECK-NEXT: call void @__asan_unpoison_shadow_static_local(i64 %1, i64 16, i64 32) + store i32 0, ptr addrspace(3) @WGLocalMem + ; CHECK: store ptr addrspace(1) %__asan_launch, ptr addrspace(3) @__AsanLaunchInfo, align 8 + ; CHECK-NEXT: call void @__asan_set_shadow_static_local(i64 ptrtoint (ptr addrspace(3) @WGLocalMem to i64), i64 64, i64 96) + ; CHECK-NEXT: store i32 0, ptr addrspace(3) @WGLocalMem, align 4 + ; CHECK-NEXT: call void @__asan_unpoison_shadow_static_local(i64 ptrtoint (ptr addrspace(3) @WGLocalMem to i64), i64 64, i64 96) ret void } -define spir_kernel void @kernel_dynamic_local(ptr addrspace(3) noundef align 4 %_arg_acc, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_acc1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_acc2, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_acc3) #0 { +define spir_kernel void @kernel_dynamic_local(ptr addrspace(3) noundef align 4 %_arg_acc, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_acc1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_acc2, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_acc3) sanitize_address { ; CHECK-LABEL: define spir_kernel void @kernel_dynamic_local entry: ; CHECK: %local_args = alloca i64, align 8 @@ -32,6 +32,3 @@ entry: ; CHECK: call void @__asan_unpoison_shadow_dynamic_local(i64 %2, i32 1) ret void } - -attributes #0 = { sanitize_address } -attributes #1 = { convergent nounwind } diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory.cpp index 5d7232f04d5a3..0544032a034d8 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory.cpp @@ -6,9 +6,6 @@ // RUN: %{build} %device_asan_flags -g -O2 -o %t3.out // RUN: %{run} not %t3.out 2>&1 | FileCheck %s -// UNSUPPORTED: cpu || gpu -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16979 - #include #include @@ -17,8 +14,14 @@ constexpr std::size_t N = 16; constexpr std::size_t group_size = 8; +__attribute__((noinline)) int check(int *ref, int index) { return ref[index]; } +// CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Local Memory +// CHECK: READ of size 4 at kernel {{<.*MyKernel>}} LID(6, 0, 0) GID({{.*}}, 0, 0) +// CHECK: #0 {{.*}} {{.*group_local_memory.cpp}}:[[@LINE-3]] + int main() { sycl::queue Q; + auto data = sycl::malloc_device(1, Q); Q.submit([&](sycl::handler &h) { h.parallel_for( @@ -27,13 +30,12 @@ int main() { ptr = sycl::ext::oneapi::group_local_memory( item.get_group()); auto &ref = *ptr; - ref[item.get_local_linear_id() * 2 + 4] = 42; - // CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Local Memory - // CHECK: WRITE of size 4 at kernel {{<.*MyKernel>}} LID(6, 0, 0) GID({{.*}}, 0, 0) - // CHECK: #0 {{.*}} {{.*group_local_memory.cpp}}:[[@LINE-3]] + // NOTE: direct access will be optimized out + data[0] = check(ref, item.get_local_linear_id() * 2 + 4); }); }); - Q.wait(); + + sycl::free(data, Q); return 0; } diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory_func.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory_func.cpp new file mode 100644 index 0000000000000..d275e6e381fdd --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory_func.cpp @@ -0,0 +1,42 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_asan_flags -g -O0 -o %t1.out +// RUN: %{run} not %t1.out 2>&1 | FileCheck %s +// RUN: %{build} %device_asan_flags -g -O1 -o %t2.out +// RUN: %{run} not %t2.out 2>&1 | FileCheck %s +// RUN: %{build} %device_asan_flags -g -O2 -o %t3.out +// RUN: %{run} not %t3.out 2>&1 | FileCheck %s + +#include + +#include +#include + +constexpr std::size_t N = 16; +constexpr std::size_t group_size = 8; + +__attribute__((noinline)) int check(int *ref, int index) { return ref[index]; } +// CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Local Memory +// CHECK: READ of size 4 at kernel {{<.*MyKernel>}} LID({{.*}}, 0, 0) GID({{.*}}, 0, 0) +// CHECK: #0 {{.*}} {{.*group_local_memory_func.cpp}}:[[@LINE-3]] + +__attribute__((noinline)) int test_local(sycl::nd_item<1> &item) { + auto local_mem = + sycl::ext::oneapi::group_local_memory(item.get_group()); + // NOTE: direct access will be optimized out + return check(*local_mem, group_size); +} + +int main() { + sycl::queue Q; + auto data = sycl::malloc_device(N, Q); + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(N, group_size), + [=](sycl::nd_item<1> item) { data[0] = test_local(item); }); + }); + Q.wait(); + + sycl::free(data, Q); + return 0; +}