Skip to content

Commit 30c58f5

Browse files
committed
Inline calls
1 parent 1e4a4d5 commit 30c58f5

File tree

4 files changed

+78
-49
lines changed

4 files changed

+78
-49
lines changed

llvm/lib/Transforms/Instrumentation/GPUSan.cpp

Lines changed: 59 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
#include "llvm/ADT/StringMap.h"
1919
#include "llvm/ADT/StringRef.h"
2020
#include "llvm/Analysis/LoopInfo.h"
21+
#include "llvm/Analysis/ScalarEvolution.h"
2122
#include "llvm/Analysis/ValueTracking.h"
2223
#include "llvm/IR/BasicBlock.h"
24+
#include "llvm/IR/CallingConv.h"
2325
#include "llvm/IR/Constants.h"
2426
#include "llvm/IR/DataLayout.h"
2527
#include "llvm/IR/DerivedTypes.h"
@@ -41,8 +43,10 @@
4143
#include "llvm/Support/ErrorHandling.h"
4244
#include "llvm/Support/MathExtras.h"
4345
#include "llvm/Support/StringSaver.h"
46+
#include "llvm/Transforms/Utils/Cloning.h"
4447
#include "llvm/Transforms/Utils/ModuleUtils.h"
4548
#include <cstdint>
49+
#include <optional>
4650

4751
using namespace llvm;
4852

@@ -188,6 +192,7 @@ class GPUSanImpl final {
188192
if (!FC) {
189193
auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false);
190194
FC = M.getOrInsertFunction(Name, NewAllocationFnTy);
195+
Function *F = cast<Function>(FC.getCallee());
191196
}
192197
return FC;
193198
}
@@ -256,6 +261,14 @@ class GPUSanImpl final {
256261
return getOrCreateFn(ThreadIDFn, "ompx_global_thread_id", Int32Ty, {});
257262
}
258263

264+
CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
265+
ArrayRef<Value *> Args = std::nullopt,
266+
const Twine &Name = "") {
267+
Calls.push_back(IRB.CreateCall(Callee, Args, Name));
268+
return Calls.back();
269+
}
270+
SmallVector<CallInst *> Calls;
271+
259272
Module &M;
260273
FunctionAnalysisManager &FAM;
261274
LLVMContext &Ctx;
@@ -470,8 +483,8 @@ void GPUSanImpl::getAllocationInfo(Function &Fn, PtrOrigin PO, Value &Object,
470483
else
471484
IP = &*Fn.getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
472485
IRBuilder<> IRB(IP);
473-
auto *CB = IRB.CreateCall(getAllocationInfoFn(PO),
474-
{IRB.CreateAddrSpaceCast(&Object, getPtrTy(PO))});
486+
auto *CB = createCall(IRB, getAllocationInfoFn(PO),
487+
{IRB.CreateAddrSpaceCast(&Object, getPtrTy(PO))});
475488
It.Start = IRB.CreateExtractValue(CB, {0});
476489
It.Length = IRB.CreateExtractValue(CB, {1});
477490
It.Tag = IRB.CreateExtractValue(CB, {2});
@@ -525,7 +538,7 @@ bool GPUSanImpl::instrumentGlobals() {
525538
GlobalValue::PrivateLinkage, "__san.dtor", &M);
526539
BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", DtorFn);
527540
IRBuilder<> IRB(Entry);
528-
IRB.CreateCall(getLeakCheckFn());
541+
createCall(IRB, getLeakCheckFn());
529542
IRB.CreateRetVoid();
530543
appendToGlobalDtors(M, DtorFn, 0, nullptr);
531544

@@ -550,10 +563,10 @@ Value *GPUSanImpl::instrumentAllocation(Instruction &I, Value &Size,
550563
Value *PlainI = IRB.CreatePointerBitCastOrAddrSpaceCast(&I, getPtrTy(PO));
551564
static int AllocationId = 1;
552565
auto *CB =
553-
IRB.CreateCall(Fn,
554-
{PlainI, &Size, ConstantInt::get(Int64Ty, AllocationId++),
555-
getSourceIndex(I)},
556-
I.getName() + ".san");
566+
createCall(IRB, Fn,
567+
{PlainI, &Size, ConstantInt::get(Int64Ty, AllocationId++),
568+
getSourceIndex(I)},
569+
I.getName() + ".san");
557570
SmallVector<LifetimeIntrinsic *> Lifetimes;
558571
I.replaceUsesWithIf(
559572
IRB.CreatePointerBitCastOrAddrSpaceCast(CB, I.getType()), [&](Use &U) {
@@ -570,10 +583,10 @@ Value *GPUSanImpl::instrumentAllocation(Instruction &I, Value &Size,
570583
for (auto *LT : Lifetimes) {
571584
if (LT->getIntrinsicID() == Intrinsic::lifetime_start) {
572585
IRB.SetInsertPoint(LT);
573-
IRB.CreateCall(getLifetimeStart(), {CB, LT->getArgOperand(0)});
586+
createCall(IRB, getLifetimeStart(), {CB, LT->getArgOperand(0)});
574587
} else {
575588
IRB.SetInsertPoint(LT);
576-
IRB.CreateCall(getLifetimeEnd(), {CB, LT->getArgOperand(0)});
589+
createCall(IRB, getLifetimeEnd(), {CB, LT->getArgOperand(0)});
577590
}
578591
}
579592
return CB;
@@ -602,6 +615,12 @@ void GPUSanImpl::instrumentAccess(LoopInfo &LI, Instruction &I, int PtrIdx,
602615
getAllocationInfo(*I.getFunction(), PO, *const_cast<Value *>(Object), Start,
603616
Length, Tag);
604617

618+
if (Loop *L = LI.getLoopFor(I.getParent())) {
619+
auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(*I.getFunction());
620+
const auto &LD = SE.getLoopDisposition(SE.getSCEVAtScope(PtrOp, L), L);
621+
LD->
622+
}
623+
605624
static int32_t ReadAccessId = -1;
606625
static int32_t WriteAccessId = 1;
607626
const int32_t &AccessId = IsRead ? ReadAccessId-- : WriteAccessId++;
@@ -614,16 +633,15 @@ void GPUSanImpl::instrumentAccess(LoopInfo &LI, Instruction &I, int PtrIdx,
614633
IRB.CreatePointerBitCastOrAddrSpaceCast(PtrOp, getPtrTy(PO));
615634
CallInst *CB;
616635
if (Start) {
617-
CB =
618-
IRB.CreateCall(getCheckWithBaseFn(PO),
619-
{PlainPtrOp, Start, Length, Tag, Size,
620-
ConstantInt::get(Int64Ty, AccessId), getSourceIndex(I)},
621-
I.getName() + ".san");
636+
CB = createCall(IRB, getCheckWithBaseFn(PO),
637+
{PlainPtrOp, Start, Length, Tag, Size,
638+
ConstantInt::get(Int64Ty, AccessId), getSourceIndex(I)},
639+
I.getName() + ".san");
622640
} else {
623-
CB = IRB.CreateCall(getCheckFn(PO),
624-
{PlainPtrOp, Size, ConstantInt::get(Int64Ty, AccessId),
625-
getSourceIndex(I)},
626-
I.getName() + ".san");
641+
CB = createCall(IRB, getCheckFn(PO),
642+
{PlainPtrOp, Size, ConstantInt::get(Int64Ty, AccessId),
643+
getSourceIndex(I)},
644+
I.getName() + ".san");
627645
}
628646
I.setOperand(PtrIdx,
629647
IRB.CreatePointerBitCastOrAddrSpaceCast(CB, PtrOp->getType()));
@@ -651,9 +669,9 @@ void GPUSanImpl::instrumentGEPInst(LoopInfo &LI, GetElementPtrInst &GEP) {
651669
IRBuilder<> IRB(GEP.getNextNode());
652670
Value *PlainPtrOp =
653671
IRB.CreatePointerBitCastOrAddrSpaceCast(PtrOp, getPtrTy(PO));
654-
auto *CB = IRB.CreateCall(getGEPFn(PO),
655-
{PlainPtrOp, UndefValue::get(Int64Ty), getPC(IRB)},
656-
GEP.getName() + ".san");
672+
auto *CB = createCall(IRB, getGEPFn(PO),
673+
{PlainPtrOp, UndefValue::get(Int64Ty), getPC(IRB)},
674+
GEP.getName() + ".san");
657675
GEP.replaceAllUsesWith(
658676
IRB.CreatePointerBitCastOrAddrSpaceCast(CB, GEP.getType()));
659677
Value *Offset =
@@ -681,8 +699,8 @@ bool GPUSanImpl::instrumentCallInst(LoopInfo &LI, CallInst &CI) {
681699
continue;
682700
Value *PlainOp =
683701
IRB.CreatePointerBitCastOrAddrSpaceCast(Op, getPtrTy(PO));
684-
auto *CB = IRB.CreateCall(getUnpackFn(PO), {PlainOp, getPC(IRB)},
685-
Op->getName() + ".unpack");
702+
auto *CB = createCall(IRB, getUnpackFn(PO), {PlainOp, getPC(IRB)},
703+
Op->getName() + ".unpack");
686704
CI.setArgOperand(
687705
I, IRB.CreatePointerBitCastOrAddrSpaceCast(CB, Op->getType()));
688706
Changed = true;
@@ -763,8 +781,8 @@ void GPUSanImpl::instrumentReturns(
763781
return;
764782
for (auto *RI : Returns) {
765783
IRBuilder<> IRB(RI);
766-
IRB.CreateCall(getFreeNLocalFn(),
767-
{ConstantInt::get(Int32Ty, Allocas.size())});
784+
createCall(IRB, getFreeNLocalFn(),
785+
{ConstantInt::get(Int32Ty, Allocas.size())});
768786
}
769787
}
770788

@@ -783,9 +801,13 @@ bool GPUSanImpl::instrument() {
783801
if (Fn.hasFnAttribute("kernel"))
784802
Kernels.push_back(&Fn);
785803
if (!Fn.getName().contains("ompx") && !Fn.getName().contains("__kmpc") &&
786-
!Fn.getName().starts_with("rpc_"))
787-
if (!Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
804+
!Fn.getName().starts_with("rpc_")) {
805+
if (!Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) {
788806
Changed |= instrumentFunction(Fn);
807+
} else if (!Fn.isDeclaration() &&
808+
Fn.getName().contains("SanitizerTrapInfoTy")) {
809+
}
810+
}
789811
}
790812

791813
SmallVector<CallBase *> AmbiguousCallsOrdered;
@@ -836,7 +858,7 @@ bool GPUSanImpl::instrument() {
836858
}
837859
auto *EntryBB = BasicBlock::Create(Ctx, "entry", LocationGetter);
838860
IRBuilder<> IRB(EntryBB);
839-
Value *Idx = IRB.CreateCall(getThreadIdFn(), {}, "san.gtid");
861+
Value *Idx = createCall(IRB, getThreadIdFn(), {}, "san.gtid");
840862
Value *Ptr = IRB.CreateGEP(Int64Ty, LocationsArray, {Idx});
841863
auto *LocationValue = IRB.CreateLoad(Int64Ty, Ptr);
842864
IRB.CreateRet(LocationValue);
@@ -848,20 +870,20 @@ bool GPUSanImpl::instrument() {
848870
auto *EntryBB = BasicBlock::Create(Ctx, "entry", InitSharedFn);
849871
IRBuilder<> IRB(EntryBB);
850872
if (!AmbiguousCalls.empty()) {
851-
Value *Idx = IRB.CreateCall(getThreadIdFn(), {}, "san.gtid");
873+
Value *Idx = createCall(IRB, getThreadIdFn(), {}, "san.gtid");
852874
Value *Ptr = IRB.CreateGEP(Int64Ty, LocationsArray, {Idx});
853875
IRB.CreateStore(ConstantInt::get(Int64Ty, 0), Ptr);
854876
}
855877
IRB.CreateRetVoid();
856878

857879
for (auto *KernelFn : Kernels) {
858880
IRBuilder<> IRB(&*KernelFn->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
859-
IRB.CreateCall(InitSharedFn, {});
881+
createCall(IRB, InitSharedFn, {});
860882
}
861883

862884
for (const auto &It : llvm::enumerate(AmbiguousCallsOrdered)) {
863885
IRBuilder<> IRB(It.value());
864-
Value *Idx = IRB.CreateCall(getThreadIdFn(), {}, "san.gtid");
886+
Value *Idx = createCall(IRB, getThreadIdFn(), {}, "san.gtid");
865887
Value *Ptr = IRB.CreateGEP(Int64Ty, LocationsArray, {Idx});
866888
Value *OldVal = IRB.CreateLoad(Int64Ty, Ptr);
867889
Value *OldValShifted = IRB.CreateShl(
@@ -888,7 +910,12 @@ bool GPUSanImpl::instrument() {
888910
GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
889911
GV->setVisibility(GlobalValue::ProtectedVisibility);
890912

891-
M.dump();
913+
for (auto *CI : Calls) {
914+
InlineFunctionInfo IFI;
915+
if (InlineFunction(*CI, IFI).isSuccess())
916+
Changed = true;
917+
}
918+
892919
return Changed;
893920
}
894921

@@ -899,6 +926,5 @@ PreservedAnalyses GPUSanPass::run(Module &M, ModuleAnalysisManager &AM) {
899926
if (!Lowerer.instrument())
900927
return PreservedAnalyses::all();
901928
LLVM_DEBUG(M.dump());
902-
903929
return PreservedAnalyses::none();
904930
}

offload/DeviceRTL/src/Sanitizer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "DeviceUtils.h"
1313
#include "Interface.h"
1414
#include "LibC.h"
15+
#include "Mapping.h"
1516
#include "Shared/Environment.h"
1617
#include "Synchronization.h"
1718

offload/include/Shared/Sanitizer.h

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ int ompx_thread_id(int Dim);
2121
int64_t __san_get_location_value();
2222
}
2323

24+
#define INLINE gnu::always_inline
25+
2426
enum class AllocationKind { LOCAL, GLOBAL, LAST = GLOBAL };
2527

2628
template <AllocationKind AK> struct ASTypes {
@@ -182,7 +184,7 @@ struct SanitizerTrapInfoTy {
182184
int64_t CallId;
183185
/// }
184186

185-
[[clang::disable_sanitizer_instrumentation]] void
187+
[[clang::disable_sanitizer_instrumentation, INLINE]] void
186188
setCoordinates(int64_t SourceId) {
187189
for (int32_t Dim = 0; Dim < 3; ++Dim) {
188190
BlockId[Dim] = ompx_block_id(Dim);
@@ -193,7 +195,7 @@ struct SanitizerTrapInfoTy {
193195
}
194196

195197
template <enum AllocationKind AK>
196-
[[clang::disable_sanitizer_instrumentation, gnu::always_inline]] void
198+
[[clang::disable_sanitizer_instrumentation, INLINE]] void
197199
allocationError(ErrorCodeTy EC, _AS_PTR(void, AK) Start, uint64_t Length,
198200
int64_t Id, int64_t Tag, uint64_t Slot, int64_t SourceId) {
199201
AllocationStart = (void *)Start;
@@ -208,7 +210,7 @@ struct SanitizerTrapInfoTy {
208210
}
209211

210212
template <enum AllocationKind AK>
211-
[[clang::disable_sanitizer_instrumentation, gnu::always_inline]] void
213+
[[clang::disable_sanitizer_instrumentation, INLINE]] void
212214
propagateAccessError(ErrorCodeTy EC, const AllocationTy<AK> &A,
213215
const AllocationPtrTy<AK> &AP, uint64_t Size, int64_t Id,
214216
int64_t SourceId) {
@@ -232,7 +234,7 @@ struct SanitizerTrapInfoTy {
232234
}
233235

234236
template <enum AllocationKind AK>
235-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
237+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
236238
exceedsAllocationLength(_AS_PTR(void, AK) Start, uint64_t Length,
237239
int64_t AllocationId, uint64_t Slot,
238240
int64_t SourceId) {
@@ -242,7 +244,7 @@ struct SanitizerTrapInfoTy {
242244
}
243245

244246
template <enum AllocationKind AK>
245-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
247+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
246248
exceedsAllocationSlots(_AS_PTR(void, AK) Start, uint64_t Length,
247249
int64_t AllocationId, uint64_t Slot,
248250
int64_t SourceId) {
@@ -252,7 +254,7 @@ struct SanitizerTrapInfoTy {
252254
}
253255

254256
template <enum AllocationKind AK>
255-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
257+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
256258
pointerOutsideAllocation(_AS_PTR(void, AK) Start, uint64_t Length,
257259
int64_t AllocationId, uint64_t Slot, uint64_t PC) {
258260
allocationError<AK>(PointerOutsideAllocation, Start, Length, AllocationId,
@@ -261,36 +263,36 @@ struct SanitizerTrapInfoTy {
261263
}
262264

263265
template <enum AllocationKind AK>
264-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
266+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
265267
outOfBoundAccess(const AllocationTy<AK> A, const AllocationPtrTy<AK> AP,
266268
uint64_t Size, int64_t AccessId, int64_t SourceId) {
267269
propagateAccessError(OutOfBounds, A, AP, Size, AccessId, SourceId);
268270
__builtin_trap();
269271
}
270272

271273
template <enum AllocationKind AK>
272-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
274+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
273275
useAfterScope(const AllocationTy<AK> A, const AllocationPtrTy<AK> AP,
274276
uint64_t Size, int64_t AccessId, int64_t SourceId) {
275277
propagateAccessError(UseAfterScope, A, AP, Size, AccessId, SourceId);
276278
__builtin_trap();
277279
}
278280

279281
template <enum AllocationKind AK>
280-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
282+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
281283
useAfterFree(const AllocationTy<AK> A, const AllocationPtrTy<AK> AP,
282284
uint64_t Size, int64_t AccessId, int64_t SourceId) {
283285
propagateAccessError(UseAfterFree, A, AP, Size, AccessId, SourceId);
284286
__builtin_trap();
285287
}
286288

287289
template <enum AllocationKind AK>
288-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
290+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
289291
accessError(const AllocationPtrTy<AK> AP, int64_t Size, int64_t AccessId,
290292
int64_t SourceId);
291293

292294
template <enum AllocationKind AK>
293-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
295+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
294296
garbagePointer(const AllocationPtrTy<AK> AP, void *P, int64_t SourceId) {
295297
ErrorCode = GarbagePointer;
296298
AllocationStart = P;
@@ -304,7 +306,7 @@ struct SanitizerTrapInfoTy {
304306
}
305307

306308
template <enum AllocationKind AK>
307-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
309+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
308310
memoryLeak(const AllocationTy<AK> A, uint64_t Slot) {
309311
allocationError<AK>(MemoryLeak, A.Start, A.Length, A.Id, A.Tag, Slot,
310312
/*SourceId=*/-1);
@@ -341,7 +343,7 @@ getAllocation(const AllocationPtrTy<AK> AP, int64_t AccessId = 0) {
341343
}
342344

343345
template <enum AllocationKind AK>
344-
[[clang::disable_sanitizer_instrumentation, noreturn, gnu::noinline]] void
346+
[[clang::disable_sanitizer_instrumentation, noreturn, INLINE, gnu::cold]] void
345347
SanitizerTrapInfoTy::accessError(const AllocationPtrTy<AK> AP, int64_t Size,
346348
int64_t AccessId, int64_t SourceId) {
347349
auto &A = getAllocationArray<AK>().Arr[AP.AllocationId];

offload/include/Shared/Types.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
#ifndef OMPTARGET_SHARED_TYPES_H
1414
#define OMPTARGET_SHARED_TYPES_H
1515

16-
#ifdef OMPTARGET_DEVICE_RUNTIME
17-
#include "DeviceTypes.h"
18-
#else
16+
#ifndef OMPTARGET_DEVICE_RUNTIME
1917
#include <cstdint>
18+
#else
19+
#include "DeviceTypes.h"
2020
#endif
2121

2222
#endif // OMPTARGET_SHARED_TYPES_H

0 commit comments

Comments
 (0)