Skip to content

Commit fec5377

Browse files
committed
Add allocas for arguments passed in by ref.
Initially the PR was adding alloca because of the problem with using Argument as location. Although that problem has been resolved and I tried to use them, the debug info that comes out is very poor and mostly ususable. This commit now adds alloca and all the case so that code is same with and without debug information enabled.
1 parent 9b75f26 commit fec5377

File tree

4 files changed

+83
-15
lines changed

4 files changed

+83
-15
lines changed

flang/test/Integration/amdgpu/debug-declare-target-function-var.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ function add(a, b) result(ret)
1313
end
1414

1515
!CHECK: define float @add_({{.*}}){{.*}}!dbg ![[SP:[0-9]+]] {
16-
!CHECK: #dbg_declare({{.*}}, ![[A:[0-9]+]], !DIExpression(DIOpArg(0, ptr), DIOpDeref(ptr)), !{{.*}})
17-
!CHECK: #dbg_declare({{.*}}, ![[B:[0-9]+]], !DIExpression(DIOpArg(0, ptr), DIOpDeref(ptr)), !{{.*}})
16+
!CHECK: #dbg_declare({{.*}}, ![[A:[0-9]+]], !DIExpression(DIOpArg(0, ptr addrspace(5)), DIOpDeref(ptr), DIOpDeref(ptr)), !{{.*}})
17+
!CHECK: #dbg_declare({{.*}}, ![[B:[0-9]+]], !DIExpression(DIOpArg(0, ptr addrspace(5)), DIOpDeref(ptr), DIOpDeref(ptr)), !{{.*}})
1818
!CHECK: #dbg_declare({{.*}}, ![[RET:[0-9]+]], !DIExpression(DIOpArg(0, ptr addrspace(5)), DIOpDeref(ptr)), !{{.*}})
1919
!CHECK: }
2020
!CHECK: ![[SP]] = {{.*}}!DISubprogram(name: "add"{{.*}})

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,17 @@
3131
#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
3232
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
3333
#include "llvm/IR/Constants.h"
34+
#include "llvm/IR/DebugInfo.h"
3435
#include "llvm/IR/DebugInfoMetadata.h"
3536
#include "llvm/IR/DerivedTypes.h"
3637
#include "llvm/IR/IRBuilder.h"
38+
#include "llvm/IR/InstIterator.h"
39+
#include "llvm/IR/IntrinsicInst.h"
3740
#include "llvm/IR/ReplaceConstant.h"
3841
#include "llvm/Support/FileSystem.h"
3942
#include "llvm/TargetParser/Triple.h"
4043
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
4144
#include "llvm/Transforms/Utils/ModuleUtils.h"
42-
#include "llvm/IR/InstIterator.h"
43-
#include "llvm/IR/IntrinsicInst.h"
4445

4546
#include <any>
4647
#include <cstdint>
@@ -5464,8 +5465,10 @@ static void updateDebugInfoForDeclareTargetVariables(
54645465
}
54655466
}
54665467

5467-
// This function adds DIOp based expressions in declare target function to
5468-
// generate valid debug info for AMDGPU.
5468+
// This function handle any adjustments needed in declare target function to
5469+
// generate valid debug info for AMDGPU. It does 2 main things:
5470+
// 1. Add alloca for arguments passed by reference.
5471+
// 2. Add DIOp based expressions
54695472

54705473
static void updateDebugInfoForDeclareTargetFunctions(
54715474
llvm::Function *Fn, LLVM::ModuleTranslation &moduleTranslation) {
@@ -5475,23 +5478,80 @@ static void updateDebugInfoForDeclareTargetFunctions(
54755478
if (!llvm::Triple(M.getTargetTriple()).isAMDGPU())
54765479
return;
54775480

5478-
auto UpdateDebugRecord = [&](auto *DR) {
5481+
llvm::IRBuilderBase &builder = ompBuilder->Builder;
5482+
llvm::OpenMPIRBuilder::InsertPointTy curInsert = builder.saveIP();
5483+
unsigned int allocaAS = M.getDataLayout().getAllocaAddrSpace();
5484+
unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
5485+
5486+
builder.SetInsertPoint(Fn->getEntryBlock().getFirstInsertionPt());
5487+
5488+
llvm::Type *PtrTy = builder.getPtrTy(defaultAS);
5489+
llvm::Type *AllocaPtrTy = builder.getPtrTy(allocaAS);
5490+
llvm::DIExprBuilder EB(Fn->getContext());
5491+
EB.append<llvm::DIOp::Arg>(0u, AllocaPtrTy);
5492+
EB.append<llvm::DIOp::Deref>(PtrTy);
5493+
EB.append<llvm::DIOp::Deref>(PtrTy);
5494+
llvm::DIExpression *Expr = EB.intoExpression();
5495+
5496+
// flang does not generate allocas for the arguments that are passed by ref.
5497+
// When the Argument is the location, the quality of the debug information is
5498+
// poor. The variables are defines on very few addresses and show up as
5499+
// optimized in most places. One of the reason is the interaction of DI-Op
5500+
// based ops and regular ones.
5501+
// Generating alloca seems like the best thing which is done in the loop
5502+
// below. The users are updated accordingly.
5503+
for (auto &Arg : Fn->args()) {
5504+
if (Arg.getType()->isPointerTy()) {
5505+
llvm::Value *V = builder.CreateAlloca(Arg.getType(), allocaAS, nullptr);
5506+
if (allocaAS != defaultAS)
5507+
V = ompBuilder->Builder.CreateAddrSpaceCast(
5508+
V, builder.getPtrTy(defaultAS));
5509+
llvm::StoreInst *Store = builder.CreateStore(&Arg, V);
5510+
llvm::Value *Load = builder.CreateLoad(Arg.getType(), V);
5511+
llvm::SmallVector<llvm::DbgVariableIntrinsic *> DbgUsers;
5512+
llvm::SmallVector<llvm::DbgVariableRecord *> DPUsers;
5513+
llvm::findDbgUsers(DbgUsers, &Arg, &DPUsers);
5514+
for (auto *DVI : DbgUsers) {
5515+
DVI->replaceVariableLocationOp(&Arg, V);
5516+
DVI->setExpression(Expr);
5517+
}
5518+
for (auto *DVR : DPUsers) {
5519+
DVR->replaceVariableLocationOp(&Arg, V);
5520+
DVR->setExpression(Expr);
5521+
}
5522+
Arg.replaceUsesWithIf(Load, [&](const llvm::Use &U) -> bool {
5523+
// We dont want to replace Arg from the store we created above.
5524+
if (const auto *SI = dyn_cast<llvm::StoreInst>(U.getUser()))
5525+
return SI != Store;
5526+
return true;
5527+
});
5528+
}
5529+
}
5530+
builder.restoreIP(curInsert);
5531+
5532+
auto AddExpression = [&](auto *DR) {
5533+
llvm::DIExpression *Old = DR->getExpression();
5534+
// Skip if an expression is already present.
5535+
if ((Old != nullptr) && (Old->getNumElements() != 0))
5536+
return;
54795537
for (auto Loc : DR->location_ops()) {
5480-
llvm::DIExprBuilder ExprBuilder(Fn->getContext());
54815538
llvm::Type *Ty = Loc->getType();
54825539
if (auto *Ref = dyn_cast<llvm::AddrSpaceCastInst>(Loc))
54835540
Ty = Ref->getPointerOperand()->getType();
5484-
ExprBuilder.append<llvm::DIOp::Arg>(0u, Ty);
5485-
ExprBuilder.append<llvm::DIOp::Deref>(Loc->getType());
5486-
DR->setExpression(ExprBuilder.intoExpression());
5541+
llvm::DIExprBuilder EB(Fn->getContext());
5542+
EB.append<llvm::DIOp::Arg>(0u, Ty);
5543+
EB.append<llvm::DIOp::Deref>(Loc->getType());
5544+
DR->setExpression(EB.intoExpression());
5545+
break;
54875546
}
54885547
};
5548+
54895549
for (llvm::Instruction &I : instructions(Fn)) {
54905550
if (auto *DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&I))
5491-
UpdateDebugRecord(DDI);
5551+
AddExpression(DDI);
54925552

54935553
for (llvm::DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
5494-
UpdateDebugRecord(&DVR);
5554+
AddExpression(&DVR);
54955555
}
54965556
}
54975557

mlir/test/Target/LLVMIR/omptarget-decl-target-fn-debug-amdgpu.mlir

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
2323
#loc3 = loc(fused<#sp>[#loc1])
2424

2525
// CHECK: define{{.*}}@add(ptr %[[ARG:[0-9]+]]){{.*}}!dbg ![[SP:[0-9]+]] {
26-
// CHECK: #dbg_declare(ptr %[[ARG]], ![[A:[0-9]+]], !DIExpression(DIOpArg(0, ptr), DIOpDeref(ptr)), !{{.*}})
26+
// CHECK: %[[AL:[0-9]+]] = alloca{{.*}}
27+
// CHECK: %[[CAST:[0-9]+]] = addrspacecast ptr addrspace(5) %[[AL]]
28+
// CHECK: store ptr %[[ARG]], ptr %[[CAST]]{{.*}}
29+
// CHECK: load ptr, ptr %[[CAST]]{{.*}}
30+
// CHECK: #dbg_declare(ptr %[[CAST]], ![[A:[0-9]+]], !DIExpression(DIOpArg(0, ptr addrspace(5)), DIOpDeref(ptr), DIOpDeref(ptr)), !{{.*}})
2731
// CHECK: }
2832
// CHECK: ![[SP]] = {{.*}}!DISubprogram(name: "add"{{.*}})
2933
// CHECK: ![[A]] = !DILocalVariable(name: "a", arg: 1, scope: ![[SP]]{{.*}})

mlir/test/Target/LLVMIR/omptarget-wsloop.mlir

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
3838
// CHECK: define void @[[FUNC0:.*]](ptr %[[ARG0:.*]])
3939
// CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
4040
// CHECK: %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr
41+
// CHECK: %[[AL:[0-9]+]] = alloca{{.*}}
42+
// CHECK: %[[CAST:[0-9]+]] = addrspacecast ptr addrspace(5) %[[AL]]
43+
// CHECK: store ptr %[[ARG0]], ptr %[[CAST]]{{.*}}
44+
// CHECK: %[[LOAD:[0-9]+]] = load ptr, ptr %[[CAST]]{{.*}}
4145
// CHECK: %[[GEP:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG_ASCAST]], i32 0, i32 0
42-
// CHECK: store ptr %[[ARG0]], ptr %[[GEP]], align 8
46+
// CHECK: store ptr %[[LOAD]], ptr %[[GEP]], align 8
4347
// CHECK: %[[NUM_THREADS:.*]] = call i32 @omp_get_num_threads()
4448
// CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), ptr @[[LOOP_BODY_FN:.*]], ptr %[[STRUCTARG_ASCAST]], i32 9, i32 %[[NUM_THREADS]], i32 0)
4549

0 commit comments

Comments
 (0)