Skip to content

Commit e00728b

Browse files
authored
[flang][OpenMP][debug] Adjust debug info for declare target functions. (llvm#647)
The PR makes the following changes to improve the debug information for declare target functions. 1. Add allocas for the arguments that are passed by pointers. 2. Add DIOp-based expressions to debug records.
2 parents 5702a82 + c782f23 commit e00728b

File tree

4 files changed

+170
-3
lines changed

4 files changed

+170
-3
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-is-target-device -debug-info-kind=standalone %s -o - | FileCheck %s
2+
3+
function add(a, b) result(ret)
4+
real ret
5+
real a
6+
real b
7+
!$omp declare target
8+
if (a > b) then
9+
ret = a;
10+
else
11+
ret = b;
12+
end if
13+
end
14+
15+
!CHECK: define float @add_({{.*}}){{.*}}!dbg ![[SP:[0-9]+]] {
16+
!CHECK: #dbg_declare({{.*}}, ![[A:[0-9]+]], !DIExpression(DIOpArg(0, ptr addrspace(5)), DIOpDeref(ptr), DIOpDeref(ptr)), !{{.*}})
17+
!CHECK: #dbg_declare({{.*}}, ![[B:[0-9]+]], !DIExpression(DIOpArg(0, ptr addrspace(5)), DIOpDeref(ptr), DIOpDeref(ptr)), !{{.*}})
18+
!CHECK: #dbg_declare({{.*}}, ![[RET:[0-9]+]], !DIExpression(DIOpArg(0, ptr addrspace(5)), DIOpDeref(ptr)), !{{.*}})
19+
!CHECK: }
20+
!CHECK: ![[SP]] = {{.*}}!DISubprogram(name: "add"{{.*}})
21+
!CHECK: ![[A]] = !DILocalVariable(name: "a", arg: 1, scope: ![[SP]]{{.*}})
22+
!CHECK: ![[B]] = !DILocalVariable(name: "b", arg: 2, scope: ![[SP]]{{.*}})
23+
!CHECK: ![[RET]] = !DILocalVariable(name: "ret", scope: ![[SP]]{{.*}})

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,12 @@
3131
#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
3232
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
3333
#include "llvm/IR/Constants.h"
34+
#include "llvm/IR/DebugInfo.h"
3435
#include "llvm/IR/DebugInfoMetadata.h"
3536
#include "llvm/IR/DerivedTypes.h"
3637
#include "llvm/IR/IRBuilder.h"
38+
#include "llvm/IR/InstIterator.h"
39+
#include "llvm/IR/IntrinsicInst.h"
3740
#include "llvm/IR/ReplaceConstant.h"
3841
#include "llvm/Support/FileSystem.h"
3942
#include "llvm/TargetParser/Triple.h"
@@ -5462,6 +5465,106 @@ static void updateDebugInfoForDeclareTargetVariables(
54625465
}
54635466
}
54645467

5468+
static void addAllocasForDeclareTargetFunctionPointerArgs(
5469+
llvm::Function *Fn, LLVM::ModuleTranslation &moduleTranslation) {
5470+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5471+
llvm::Module &M = ompBuilder->M;
5472+
5473+
if (!llvm::Triple(M.getTargetTriple()).isAMDGPU())
5474+
return;
5475+
5476+
if (Fn->empty())
5477+
return;
5478+
5479+
llvm::IRBuilderBase &builder = ompBuilder->Builder;
5480+
llvm::OpenMPIRBuilder::InsertPointTy curInsert = builder.saveIP();
5481+
unsigned int allocaAS = M.getDataLayout().getAllocaAddrSpace();
5482+
unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
5483+
5484+
builder.SetInsertPoint(Fn->getEntryBlock().getFirstInsertionPt());
5485+
5486+
llvm::Type *PtrTy = builder.getPtrTy(defaultAS);
5487+
llvm::Type *AllocaPtrTy = builder.getPtrTy(allocaAS);
5488+
llvm::DIExprBuilder EB(Fn->getContext());
5489+
EB.append<llvm::DIOp::Arg>(0u, AllocaPtrTy);
5490+
EB.append<llvm::DIOp::Deref>(PtrTy);
5491+
EB.append<llvm::DIOp::Deref>(PtrTy);
5492+
llvm::DIExpression *Expr = EB.intoExpression();
5493+
5494+
// flang does not generate allocas for the arguments that are passed by ref.
5495+
// When the Argument is the location, the quality of the debug information is
5496+
// poor. The variables are defines on very few addresses and show up as
5497+
// optimized in most places. One of the reason is the interaction of DI-Op
5498+
// based ops and regular ones.
5499+
// Generating alloca seems like the best thing which is done in the loop
5500+
// below. The users are updated accordingly.
5501+
for (auto &Arg : Fn->args()) {
5502+
if (Arg.getType()->isPointerTy()) {
5503+
llvm::Value *V = builder.CreateAlloca(Arg.getType(), allocaAS, nullptr);
5504+
if (allocaAS != defaultAS)
5505+
V = ompBuilder->Builder.CreateAddrSpaceCast(
5506+
V, builder.getPtrTy(defaultAS));
5507+
llvm::StoreInst *Store = builder.CreateStore(&Arg, V);
5508+
llvm::Value *Load = builder.CreateLoad(Arg.getType(), V);
5509+
llvm::SmallVector<llvm::DbgVariableIntrinsic *> DbgUsers;
5510+
llvm::SmallVector<llvm::DbgVariableRecord *> DPUsers;
5511+
llvm::findDbgUsers(DbgUsers, &Arg, &DPUsers);
5512+
for (auto *DVI : DbgUsers) {
5513+
DVI->replaceVariableLocationOp(&Arg, V);
5514+
DVI->setExpression(Expr);
5515+
}
5516+
for (auto *DVR : DPUsers) {
5517+
DVR->replaceVariableLocationOp(&Arg, V);
5518+
DVR->setExpression(Expr);
5519+
}
5520+
Arg.replaceUsesWithIf(Load, [&](const llvm::Use &U) -> bool {
5521+
// We dont want to replace Arg from the store we created above.
5522+
if (const auto *SI = dyn_cast<llvm::StoreInst>(U.getUser()))
5523+
return SI != Store;
5524+
return true;
5525+
});
5526+
}
5527+
}
5528+
builder.restoreIP(curInsert);
5529+
}
5530+
5531+
// This function Add DIOp based expressions to the debug records in the
5532+
// declare target functions.
5533+
5534+
static void updateDebugInfoForDeclareTargetFunctions(
5535+
llvm::Function *Fn, LLVM::ModuleTranslation &moduleTranslation) {
5536+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5537+
llvm::Module &M = ompBuilder->M;
5538+
5539+
if (!llvm::Triple(M.getTargetTriple()).isAMDGPU())
5540+
return;
5541+
5542+
auto AddExpression = [&](auto *DR) {
5543+
llvm::DIExpression *Old = DR->getExpression();
5544+
// Skip if an expression is already present.
5545+
if ((Old != nullptr) && (Old->getNumElements() != 0))
5546+
return;
5547+
for (auto Loc : DR->location_ops()) {
5548+
llvm::Type *Ty = Loc->getType();
5549+
if (auto *Ref = dyn_cast<llvm::AddrSpaceCastInst>(Loc))
5550+
Ty = Ref->getPointerOperand()->getType();
5551+
llvm::DIExprBuilder EB(Fn->getContext());
5552+
EB.append<llvm::DIOp::Arg>(0u, Ty);
5553+
EB.append<llvm::DIOp::Deref>(Loc->getType());
5554+
DR->setExpression(EB.intoExpression());
5555+
break;
5556+
}
5557+
};
5558+
5559+
for (llvm::Instruction &I : instructions(Fn)) {
5560+
if (auto *DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&I))
5561+
AddExpression(DDI);
5562+
5563+
for (llvm::DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
5564+
AddExpression(&DVR);
5565+
}
5566+
}
5567+
54655568
static LogicalResult
54665569
convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
54675570
LLVM::ModuleTranslation &moduleTranslation) {
@@ -5481,11 +5584,15 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
54815584
omp::DeclareTargetDeviceType declareType =
54825585
attribute.getDeviceType().getValue();
54835586

5587+
llvm::Function *llvmFunc =
5588+
moduleTranslation.lookupFunction(funcOp.getName());
54845589
if (declareType == omp::DeclareTargetDeviceType::host) {
5485-
llvm::Function *llvmFunc =
5486-
moduleTranslation.lookupFunction(funcOp.getName());
54875590
llvmFunc->dropAllReferences();
54885591
llvmFunc->eraseFromParent();
5592+
} else {
5593+
addAllocasForDeclareTargetFunctionPointerArgs(llvmFunc,
5594+
moduleTranslation);
5595+
updateDebugInfoForDeclareTargetFunctions(llvmFunc, moduleTranslation);
54895596
}
54905597
}
54915598
return success();
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
#file = #llvm.di_file<"target.f90" in "">
4+
#cu = #llvm.di_compile_unit<id = distinct[0]<>,
5+
sourceLanguage = DW_LANG_Fortran95, file = #file, isOptimized = false,
6+
emissionKind = LineTablesOnly>
7+
#sp_ty = #llvm.di_subroutine_type<callingConvention = DW_CC_normal>
8+
#sp = #llvm.di_subprogram<id = distinct[1]<>, compileUnit = #cu, scope = #file,
9+
name = "add", file = #file, subprogramFlags = "Definition", type = #sp_ty>
10+
#ty = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 32, encoding = DW_ATE_float>
11+
#var_a = #llvm.di_local_variable<scope = #sp, name = "a", file = #file, line = 22, arg = 1, type = #ty>
12+
13+
14+
module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : ui64>} {
15+
llvm.func @add(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
16+
llvm.intr.dbg.declare #var_a = %arg0 : !llvm.ptr loc(#loc2)
17+
llvm.return
18+
} loc(#loc3)
19+
}
20+
21+
#loc1 = loc("target.f90":1:1)
22+
#loc2 = loc("target.f90":46:3)
23+
#loc3 = loc(fused<#sp>[#loc1])
24+
25+
// CHECK: define{{.*}}@add(ptr %[[ARG:[0-9]+]]){{.*}}!dbg ![[SP:[0-9]+]] {
26+
// CHECK: %[[AL:[0-9]+]] = alloca{{.*}}
27+
// CHECK: %[[CAST:[0-9]+]] = addrspacecast ptr addrspace(5) %[[AL]]
28+
// CHECK: store ptr %[[ARG]], ptr %[[CAST]]{{.*}}
29+
// CHECK: load ptr, ptr %[[CAST]]{{.*}}
30+
// CHECK: #dbg_declare(ptr %[[CAST]], ![[A:[0-9]+]], !DIExpression(DIOpArg(0, ptr addrspace(5)), DIOpDeref(ptr), DIOpDeref(ptr)), !{{.*}})
31+
// CHECK: }
32+
// CHECK: ![[SP]] = {{.*}}!DISubprogram(name: "add"{{.*}})
33+
// CHECK: ![[A]] = !DILocalVariable(name: "a", arg: 1, scope: ![[SP]]{{.*}})

mlir/test/Target/LLVMIR/omptarget-wsloop.mlir

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
3838
// CHECK: define void @[[FUNC0:.*]](ptr %[[ARG0:.*]])
3939
// CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
4040
// CHECK: %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr
41+
// CHECK: %[[AL:[0-9]+]] = alloca{{.*}}
42+
// CHECK: %[[CAST:[0-9]+]] = addrspacecast ptr addrspace(5) %[[AL]]
43+
// CHECK: store ptr %[[ARG0]], ptr %[[CAST]]{{.*}}
44+
// CHECK: %[[LOAD:[0-9]+]] = load ptr, ptr %[[CAST]]{{.*}}
4145
// CHECK: %[[GEP:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG_ASCAST]], i32 0, i32 0
42-
// CHECK: store ptr %[[ARG0]], ptr %[[GEP]], align 8
46+
// CHECK: store ptr %[[LOAD]], ptr %[[GEP]], align 8
4347
// CHECK: %[[NUM_THREADS:.*]] = call i32 @omp_get_num_threads()
4448
// CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), ptr @[[LOOP_BODY_FN:.*]], ptr %[[STRUCTARG_ASCAST]], i32 9, i32 %[[NUM_THREADS]], i32 0)
4549

0 commit comments

Comments
 (0)