diff --git a/libdevice/sanitizer/tsan_rtl.cpp b/libdevice/sanitizer/tsan_rtl.cpp index 8df68bbf535ea..7f547d180ac30 100644 --- a/libdevice/sanitizer/tsan_rtl.cpp +++ b/libdevice/sanitizer/tsan_rtl.cpp @@ -28,6 +28,9 @@ static const __SYCL_CONSTANT__ char __tsan_print_shadow_value[] = "[kernel] %p(%d) : {size: %d, access: %x, sid: %d, clock: %d, is_write: " "%d}\n"; +static const __SYCL_CONSTANT__ char __tsan_print_cleanup_private[] = + "[kernel] cleanup private shadow: %p ~ %p\n"; + static const __SYCL_CONSTANT__ char __tsan_print_unsupport_device_type[] = "[kernel] Unsupport device type: %d\n"; @@ -47,6 +50,10 @@ static const __SYCL_CONSTANT__ char __tsan_report_race[] = namespace { +inline constexpr uptr RoundUpTo(uptr x, uptr boundary) { + return (x + boundary - 1) & ~(boundary - 1); +} + inline constexpr uptr RoundDownTo(uptr x, uptr boundary) { return x & ~(boundary - 1); } @@ -334,4 +341,21 @@ TSAN_CHECK(write, true, 2) TSAN_CHECK(write, true, 4) TSAN_CHECK(write, true, 8) +DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) { + if (TsanLaunchInfo->DeviceTy != DeviceType::CPU) + return; + + if (size) { + addr = RoundDownTo(addr, kShadowCell); + size = RoundUpTo(size, kShadowCell); + + RawShadow *Begin = MemToShadow_CPU(addr, 0); + TSAN_DEBUG(__spirv_ocl_printf( + __tsan_print_cleanup_private, Begin, + (uptr)Begin + size / kShadowCell * kShadowCnt * kShadowSize - 1)); + for (uptr i = 0; i < size / kShadowCell * kShadowCnt; i++) + Begin[i] = 0; + } +} + #endif // __SPIR__ || __SPIRV__ diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index f5373df0e6696..5e66a54fe488a 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -120,6 +120,9 @@ struct ThreadSanitizerOnSpirv { void instrumentModule(); + bool instrumentAllocInst(Function *F, + SmallVectorImpl &AllocaInsts); + void appendDebugInfoToArgs(Instruction *I, SmallVectorImpl &Args); private: @@ -144,6 +147,7 @@ struct ThreadSanitizerOnSpirv { // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; + FunctionCallee TsanCleanupPrivate; FunctionCallee TsanRead[kNumberOfAccessSizes]; FunctionCallee TsanWrite[kNumberOfAccessSizes]; @@ -261,6 +265,10 @@ void ThreadSanitizerOnSpirv::initialize() { Attr = Attr.addFnAttribute(C, Attribute::NoUnwind); Type *Int8PtrTy = IRB.getInt8PtrTy(kSpirOffloadConstantAS); + TsanCleanupPrivate = + M.getOrInsertFunction("__tsan_cleanup_private", Attr, IRB.getVoidTy(), + IntptrTy, IRB.getInt32Ty()); + for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { const unsigned ByteSize = 1U << i; std::string ByteSizeStr = utostr(ByteSize); @@ -282,6 +290,28 @@ void ThreadSanitizerOnSpirv::initialize() { } } +bool ThreadSanitizerOnSpirv::instrumentAllocInst( + Function *F, SmallVectorImpl &AllocaInsts) { + bool Changed = false; + + EscapeEnumerator EE(*F, "tsan_cleanup", false); + while (IRBuilder<> *AtExit = EE.Next()) { + InstrumentationIRBuilder::ensureDebugInfo(*AtExit, *F); + for (auto *Inst : AllocaInsts) { + AllocaInst *AI = cast(Inst); + if (auto AllocSize = AI->getAllocationSize(DL)) { + AtExit->CreateCall( + TsanCleanupPrivate, + {AtExit->CreatePtrToInt(AI, IntptrTy), + ConstantInt::get(AtExit->getInt32Ty(), *AllocSize)}); + Changed |= true; + } + } + } + + return Changed; +} + void ThreadSanitizerOnSpirv::appendDebugInfoToArgs( Instruction *I, SmallVectorImpl &Args) { auto &Loc = I->getDebugLoc(); @@ -793,6 +823,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, SmallVector LocalLoadsAndStores; SmallVector AtomicAccesses; SmallVector MemIntrinCalls; + SmallVector Allocas; bool Res = false; bool HasCalls = false; bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread); @@ -808,6 +839,9 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, AtomicAccesses.push_back(&Inst); else if (isa(Inst) || isa(Inst)) LocalLoadsAndStores.push_back(&Inst); + else if (Spirv && isa(Inst) && + cast(Inst).getAllocatedType()->isSized()) + Allocas.push_back(&Inst); else if ((isa(Inst) && !isa(Inst)) || isa(Inst)) { if (CallInst *CI = dyn_cast(&Inst)) @@ -850,6 +884,14 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, InsertRuntimeIgnores(F); } + // FIXME: We need to skip the check for private memory, otherwise OpenCL CPU + // device may generate false positive reports due to stack re-use in different + // threads. However, SPIR-V builts 'ToPrivate' doesn't work as expected on + // OpenCL CPU device. So we need to manually cleanup private shadow before + // each function exit point. + if (Spirv && !Allocas.empty()) + Res |= Spirv->instrumentAllocInst(&F, Allocas); + // Instrument function entry/exit points if there were instrumented accesses. if ((Res || HasCalls) && ClInstrumentFuncEntryExit) { InstrumentationIRBuilder IRB(&F.getEntryBlock(), diff --git a/llvm/test/Instrumentation/ThreadSanitizer/SPIRV/cleanup_private_shadow.ll b/llvm/test/Instrumentation/ThreadSanitizer/SPIRV/cleanup_private_shadow.ll new file mode 100644 index 0000000000000..08a1ac30e9092 --- /dev/null +++ b/llvm/test/Instrumentation/ThreadSanitizer/SPIRV/cleanup_private_shadow.ll @@ -0,0 +1,14 @@ +; RUN: opt < %s -passes='function(tsan),module(tsan-module)' -tsan-instrument-func-entry-exit=0 -tsan-instrument-memintrinsics=0 -S | FileCheck %s +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [1 x i64] } + +define spir_kernel void @test() { +entry: + %agg.tmp = alloca %"class.sycl::_V1::range", align 8 +; CHECK: [[REG1:%[0-9]+]] = ptrtoint ptr %agg.tmp to i64 +; CHECK-NEXT: call void @__tsan_cleanup_private(i64 [[REG1]], i32 8) + ret void +} diff --git a/sycl/test-e2e/ThreadSanitizer/check_both_read.cpp b/sycl/test-e2e/ThreadSanitizer/check_both_read.cpp index 5f9d5d6050302..edc40d78ebb87 100644 --- a/sycl/test-e2e/ThreadSanitizer/check_both_read.cpp +++ b/sycl/test-e2e/ThreadSanitizer/check_both_read.cpp @@ -1,8 +1,6 @@ // REQUIRES: linux, cpu || (gpu && level_zero) // RUN: %{build} %device_tsan_flags -O0 -g -o %t1.out // RUN: %{run} %t1.out 2>&1 | FileCheck %s -// UNSUPPORTED: true -// UNSUPPORTED-TRACKER: CMPLRLLVM-66203 #include "sycl/detail/core.hpp" #include "sycl/usm.hpp" diff --git a/sycl/test-e2e/ThreadSanitizer/check_no_race.cpp b/sycl/test-e2e/ThreadSanitizer/check_no_race.cpp index 1e9f93df61fe1..0f32a49644fbd 100644 --- a/sycl/test-e2e/ThreadSanitizer/check_no_race.cpp +++ b/sycl/test-e2e/ThreadSanitizer/check_no_race.cpp @@ -3,8 +3,6 @@ // RUN: %{run} %t1.out 2>&1 | FileCheck %s // RUN: %{build} %device_tsan_flags -O2 -g -o %t2.out // RUN: %{run} %t2.out 2>&1 | FileCheck %s -// UNSUPPORTED: true -// UNSUPPORTED-TRACKER: CMPLRLLVM-66203 #include "sycl/detail/core.hpp" #include "sycl/usm.hpp"