Skip to content

Commit c130a25

Browse files
committed
[Offload] Introduce the offload sanitizer (initially for traps)
This is the first commit for a new "OffloadSanitizer" that is designed to work well on GPUs. To keep the commit small, only traps are sanitized and we only report information about the encountering thread. It is also restricted to AMD GPUs for now, though that is not a conceptual requirement. The communication between the instrumented device code and the runtime is performed via host initialized pinned memory. If an error is detected, one encountering thread will setup this sanitizer environment and a hardware trap is executed to end the kernel. The host trap handler can check the sanitizer environment to determine if the trap was issued by the sanitizer code or not. If so, we report the reason (for now only that a trap was encountered), the encountering thread id, and the PC.
1 parent 7f19686 commit c130a25

File tree

21 files changed

+533
-31
lines changed

21 files changed

+533
-31
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Pass to instrument offload code in order to detect errors and communicate
10+
// them to the LLVM/Offload runtimes.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
15+
#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
16+
17+
#include "llvm/IR/PassManager.h"
18+
19+
namespace llvm {
20+
21+
class OffloadSanitizerPass : public PassInfoMixin<OffloadSanitizerPass> {
22+
public:
23+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
24+
};
25+
} // end namespace llvm
26+
27+
#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@
181181
#include "llvm/Transforms/InstCombine/InstCombine.h"
182182
#include "llvm/Transforms/Instrumentation.h"
183183
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
184+
#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
184185
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
185186
#include "llvm/Transforms/Instrumentation/CGProfile.h"
186187
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
144144
MODULE_PASS("verify", VerifierPass())
145145
MODULE_PASS("view-callgraph", CallGraphViewerPass())
146146
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
147+
MODULE_PASS("offload-sanitizer", OffloadSanitizerPass())
147148
#undef MODULE_PASS
148149

149150
#ifndef MODULE_PASS_WITH_PARAMS

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
#include "llvm/Transforms/IPO/ExpandVariadics.h"
6161
#include "llvm/Transforms/IPO/GlobalDCE.h"
6262
#include "llvm/Transforms/IPO/Internalize.h"
63+
#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
6364
#include "llvm/Transforms/Scalar.h"
6465
#include "llvm/Transforms/Scalar/GVN.h"
6566
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
@@ -380,6 +381,11 @@ static cl::opt<bool> EnableHipStdPar(
380381
cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
381382
cl::Hidden);
382383

384+
static cl::opt<bool>
385+
EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer",
386+
cl::desc("Enable the offload sanitizer"),
387+
cl::init(false), cl::Hidden);
388+
383389
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
384390
// Register the target
385391
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
@@ -744,6 +750,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
744750

745751
PB.registerFullLinkTimeOptimizationLastEPCallback(
746752
[this](ModulePassManager &PM, OptimizationLevel Level) {
753+
if (EnableOffloadSanitizer)
754+
PM.addPass(OffloadSanitizerPass());
755+
747756
// We want to support the -lto-partitions=N option as "best effort".
748757
// For that, we need to lower LDS earlier in the pipeline before the
749758
// module is partitioned for codegen.

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen
185185
Core
186186
GlobalISel
187187
HipStdPar
188+
Instrumentation
188189
IPO
189190
IRPrinter
190191
MC

llvm/lib/Transforms/Instrumentation/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation
99
MemProfiler.cpp
1010
MemorySanitizer.cpp
1111
NumericalStabilitySanitizer.cpp
12+
OffloadSanitizer.cpp
1213
IndirectCallPromotion.cpp
1314
Instrumentation.cpp
1415
InstrOrderFile.cpp
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
12+
13+
#include "llvm/ADT/SetVector.h"
14+
#include "llvm/ADT/SmallVector.h"
15+
#include "llvm/IR/DebugInfoMetadata.h"
16+
#include "llvm/IR/DerivedTypes.h"
17+
#include "llvm/IR/IRBuilder.h"
18+
#include "llvm/IR/InstIterator.h"
19+
#include "llvm/IR/Instructions.h"
20+
#include "llvm/IR/IntrinsicInst.h"
21+
#include "llvm/IR/Intrinsics.h"
22+
#include "llvm/IR/IntrinsicsAMDGPU.h"
23+
#include "llvm/IR/Module.h"
24+
#include "llvm/IR/Value.h"
25+
#include "llvm/Transforms/Utils/Cloning.h"
26+
#include "llvm/Transforms/Utils/ModuleUtils.h"
27+
28+
using namespace llvm;
29+
30+
#define DEBUG_TYPE "offload-sanitizer"
31+
32+
namespace {
33+
34+
class OffloadSanitizerImpl final {
35+
public:
36+
OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM)
37+
: M(M), FAM(FAM), Ctx(M.getContext()) {}
38+
39+
bool instrument();
40+
41+
private:
42+
bool shouldInstrumentFunction(Function &Fn);
43+
bool instrumentFunction(Function &Fn);
44+
bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);
45+
46+
FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
47+
ArrayRef<Type *> ArgTys) {
48+
if (!FC) {
49+
auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false);
50+
FC = M.getOrInsertFunction(Name, NewAllocationFnTy);
51+
}
52+
return FC;
53+
}
54+
55+
/// void __offload_san_trap_info(Int64Ty);
56+
FunctionCallee TrapInfoFn;
57+
FunctionCallee getTrapInfoFn() {
58+
return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy,
59+
{/*PC*/ Int64Ty});
60+
}
61+
62+
CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
63+
ArrayRef<Value *> Args = std::nullopt,
64+
const Twine &Name = "") {
65+
Calls.push_back(IRB.CreateCall(Callee, Args, Name));
66+
return Calls.back();
67+
}
68+
SmallVector<CallInst *> Calls;
69+
70+
Value *getPC(IRBuilder<> &IRB) {
71+
return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr,
72+
"PC");
73+
}
74+
75+
Module &M;
76+
FunctionAnalysisManager &FAM;
77+
LLVMContext &Ctx;
78+
79+
Type *VoidTy = Type::getVoidTy(Ctx);
80+
Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
81+
PointerType *PtrTy = PointerType::getUnqual(Ctx);
82+
IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
83+
IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
84+
IntegerType *Int64Ty = Type::getInt64Ty(Ctx);
85+
86+
const DataLayout &DL = M.getDataLayout();
87+
};
88+
89+
} // end anonymous namespace
90+
91+
bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) {
92+
if (Fn.isDeclaration())
93+
return false;
94+
if (Fn.getName().contains("ompx") || Fn.getName().contains("__kmpc") ||
95+
Fn.getName().starts_with("rpc_"))
96+
return false;
97+
return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
98+
}
99+
100+
bool OffloadSanitizerImpl::instrumentTrapInstructions(
101+
SmallVectorImpl<IntrinsicInst *> &TrapCalls) {
102+
bool Changed = false;
103+
for (auto *II : TrapCalls) {
104+
IRBuilder<> IRB(II);
105+
createCall(IRB, getTrapInfoFn(), {getPC(IRB)});
106+
}
107+
return Changed;
108+
}
109+
110+
bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
111+
if (!shouldInstrumentFunction(Fn))
112+
return false;
113+
114+
SmallVector<IntrinsicInst *> TrapCalls;
115+
116+
bool Changed = false;
117+
for (auto &I : instructions(Fn)) {
118+
switch (I.getOpcode()) {
119+
case Instruction::Call: {
120+
auto &CI = cast<CallInst>(I);
121+
if (auto *II = dyn_cast<IntrinsicInst>(&CI))
122+
if (II->getIntrinsicID() == Intrinsic::trap)
123+
TrapCalls.push_back(II);
124+
break;
125+
}
126+
default:
127+
break;
128+
}
129+
}
130+
131+
Changed |= instrumentTrapInstructions(TrapCalls);
132+
133+
return Changed;
134+
}
135+
136+
bool OffloadSanitizerImpl::instrument() {
137+
bool Changed = false;
138+
139+
for (Function &Fn : M)
140+
Changed |= instrumentFunction(Fn);
141+
142+
removeFromUsedLists(M, [&](Constant *C) {
143+
if (!C->getName().starts_with("__offload_san"))
144+
return false;
145+
return Changed = true;
146+
});
147+
148+
return Changed;
149+
}
150+
151+
PreservedAnalyses OffloadSanitizerPass::run(Module &M,
152+
ModuleAnalysisManager &AM) {
153+
FunctionAnalysisManager &FAM =
154+
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
155+
OffloadSanitizerImpl Impl(M, FAM);
156+
if (!Impl.instrument())
157+
return PreservedAnalyses::all();
158+
LLVM_DEBUG(M.dump());
159+
return PreservedAnalyses::none();
160+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
3+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
4+
target triple = "amdgcn-amd-amdhsa"
5+
6+
; Test basic offload sanitizer trap instrumentation.
7+
8+
; RUN: opt < %s -passes=offload-sanitizer -S | FileCheck --check-prefixes=CHECK %s
9+
10+
define void @test_trap1() {
11+
; CHECK-LABEL: define void @test_trap1() {
12+
; CHECK-NEXT: [[ENTRY:.*:]]
13+
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
14+
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
15+
; CHECK-NEXT: call void @llvm.trap()
16+
; CHECK-NEXT: ret void
17+
;
18+
entry:
19+
call void @llvm.trap()
20+
ret void
21+
}
22+
23+
define void @test_trap2() {
24+
; CHECK-LABEL: define void @test_trap2() {
25+
; CHECK-NEXT: [[ENTRY:.*:]]
26+
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
27+
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
28+
; CHECK-NEXT: call void @llvm.trap()
29+
; CHECK-NEXT: unreachable
30+
;
31+
entry:
32+
call void @llvm.trap()
33+
unreachable
34+
}
35+
36+
define void @test_trap3(i1 %c) {
37+
; CHECK-LABEL: define void @test_trap3(
38+
; CHECK-SAME: i1 [[C:%.*]]) {
39+
; CHECK-NEXT: [[ENTRY:.*:]]
40+
; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
41+
; CHECK: [[T]]:
42+
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
43+
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
44+
; CHECK-NEXT: call void @llvm.trap()
45+
; CHECK-NEXT: unreachable
46+
; CHECK: [[F]]:
47+
; CHECK-NEXT: ret void
48+
;
49+
entry:
50+
br i1 %c, label %t ,label %f
51+
t:
52+
call void @llvm.trap()
53+
unreachable
54+
f:
55+
ret void
56+
}

offload/DeviceRTL/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ set(src_files
9494
${source_directory}/Misc.cpp
9595
${source_directory}/Parallelism.cpp
9696
${source_directory}/Reduction.cpp
97+
${source_directory}/Sanitizer.cpp
9798
${source_directory}/State.cpp
9899
${source_directory}/Synchronization.cpp
99100
${source_directory}/Tasking.cpp

offload/DeviceRTL/include/Utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
2929

3030
uint64_t ballotSync(uint64_t Mask, int32_t Pred);
3131

32+
/// Terminate the execution of this warp.
33+
void terminateWarp();
34+
3235
/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
3336
uint64_t pack(uint32_t LowBits, uint32_t HighBits);
3437

0 commit comments

Comments
 (0)