31
31
#include " llvm/Frontend/OpenMP/OMPDeviceConstants.h"
32
32
#include " llvm/Frontend/OpenMP/OMPIRBuilder.h"
33
33
#include " llvm/IR/Constants.h"
34
+ #include " llvm/IR/DebugInfo.h"
34
35
#include " llvm/IR/DebugInfoMetadata.h"
35
36
#include " llvm/IR/DerivedTypes.h"
36
37
#include " llvm/IR/IRBuilder.h"
38
+ #include " llvm/IR/InstIterator.h"
39
+ #include " llvm/IR/IntrinsicInst.h"
37
40
#include " llvm/IR/ReplaceConstant.h"
38
41
#include " llvm/Support/FileSystem.h"
39
42
#include " llvm/TargetParser/Triple.h"
@@ -5462,6 +5465,106 @@ static void updateDebugInfoForDeclareTargetVariables(
5462
5465
}
5463
5466
}
5464
5467
5468
+ static void addAllocasForDeclareTargetFunctionPointerArgs (
5469
+ llvm::Function *Fn, LLVM::ModuleTranslation &moduleTranslation) {
5470
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder ();
5471
+ llvm::Module &M = ompBuilder->M ;
5472
+
5473
+ if (!llvm::Triple (M.getTargetTriple ()).isAMDGPU ())
5474
+ return ;
5475
+
5476
+ if (Fn->empty ())
5477
+ return ;
5478
+
5479
+ llvm::IRBuilderBase &builder = ompBuilder->Builder ;
5480
+ llvm::OpenMPIRBuilder::InsertPointTy curInsert = builder.saveIP ();
5481
+ unsigned int allocaAS = M.getDataLayout ().getAllocaAddrSpace ();
5482
+ unsigned int defaultAS = M.getDataLayout ().getProgramAddressSpace ();
5483
+
5484
+ builder.SetInsertPoint (Fn->getEntryBlock ().getFirstInsertionPt ());
5485
+
5486
+ llvm::Type *PtrTy = builder.getPtrTy (defaultAS);
5487
+ llvm::Type *AllocaPtrTy = builder.getPtrTy (allocaAS);
5488
+ llvm::DIExprBuilder EB (Fn->getContext ());
5489
+ EB.append <llvm::DIOp::Arg>(0u , AllocaPtrTy);
5490
+ EB.append <llvm::DIOp::Deref>(PtrTy);
5491
+ EB.append <llvm::DIOp::Deref>(PtrTy);
5492
+ llvm::DIExpression *Expr = EB.intoExpression ();
5493
+
5494
+ // flang does not generate allocas for the arguments that are passed by ref.
5495
+ // When the Argument is the location, the quality of the debug information is
5496
+ // poor. The variables are defines on very few addresses and show up as
5497
+ // optimized in most places. One of the reason is the interaction of DI-Op
5498
+ // based ops and regular ones.
5499
+ // Generating alloca seems like the best thing which is done in the loop
5500
+ // below. The users are updated accordingly.
5501
+ for (auto &Arg : Fn->args ()) {
5502
+ if (Arg.getType ()->isPointerTy ()) {
5503
+ llvm::Value *V = builder.CreateAlloca (Arg.getType (), allocaAS, nullptr );
5504
+ if (allocaAS != defaultAS)
5505
+ V = ompBuilder->Builder .CreateAddrSpaceCast (
5506
+ V, builder.getPtrTy (defaultAS));
5507
+ llvm::StoreInst *Store = builder.CreateStore (&Arg, V);
5508
+ llvm::Value *Load = builder.CreateLoad (Arg.getType (), V);
5509
+ llvm::SmallVector<llvm::DbgVariableIntrinsic *> DbgUsers;
5510
+ llvm::SmallVector<llvm::DbgVariableRecord *> DPUsers;
5511
+ llvm::findDbgUsers (DbgUsers, &Arg, &DPUsers);
5512
+ for (auto *DVI : DbgUsers) {
5513
+ DVI->replaceVariableLocationOp (&Arg, V);
5514
+ DVI->setExpression (Expr);
5515
+ }
5516
+ for (auto *DVR : DPUsers) {
5517
+ DVR->replaceVariableLocationOp (&Arg, V);
5518
+ DVR->setExpression (Expr);
5519
+ }
5520
+ Arg.replaceUsesWithIf (Load, [&](const llvm::Use &U) -> bool {
5521
+ // We dont want to replace Arg from the store we created above.
5522
+ if (const auto *SI = dyn_cast<llvm::StoreInst>(U.getUser ()))
5523
+ return SI != Store;
5524
+ return true ;
5525
+ });
5526
+ }
5527
+ }
5528
+ builder.restoreIP (curInsert);
5529
+ }
5530
+
5531
+ // This function Add DIOp based expressions to the debug records in the
5532
+ // declare target functions.
5533
+
5534
+ static void updateDebugInfoForDeclareTargetFunctions (
5535
+ llvm::Function *Fn, LLVM::ModuleTranslation &moduleTranslation) {
5536
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder ();
5537
+ llvm::Module &M = ompBuilder->M ;
5538
+
5539
+ if (!llvm::Triple (M.getTargetTriple ()).isAMDGPU ())
5540
+ return ;
5541
+
5542
+ auto AddExpression = [&](auto *DR) {
5543
+ llvm::DIExpression *Old = DR->getExpression ();
5544
+ // Skip if an expression is already present.
5545
+ if ((Old != nullptr ) && (Old->getNumElements () != 0 ))
5546
+ return ;
5547
+ for (auto Loc : DR->location_ops ()) {
5548
+ llvm::Type *Ty = Loc->getType ();
5549
+ if (auto *Ref = dyn_cast<llvm::AddrSpaceCastInst>(Loc))
5550
+ Ty = Ref->getPointerOperand ()->getType ();
5551
+ llvm::DIExprBuilder EB (Fn->getContext ());
5552
+ EB.append <llvm::DIOp::Arg>(0u , Ty);
5553
+ EB.append <llvm::DIOp::Deref>(Loc->getType ());
5554
+ DR->setExpression (EB.intoExpression ());
5555
+ break ;
5556
+ }
5557
+ };
5558
+
5559
+ for (llvm::Instruction &I : instructions (Fn)) {
5560
+ if (auto *DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&I))
5561
+ AddExpression (DDI);
5562
+
5563
+ for (llvm::DbgVariableRecord &DVR : filterDbgVars (I.getDbgRecordRange ()))
5564
+ AddExpression (&DVR);
5565
+ }
5566
+ }
5567
+
5465
5568
static LogicalResult
5466
5569
convertDeclareTargetAttr (Operation *op, mlir::omp::DeclareTargetAttr attribute,
5467
5570
LLVM::ModuleTranslation &moduleTranslation) {
@@ -5481,11 +5584,15 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5481
5584
omp::DeclareTargetDeviceType declareType =
5482
5585
attribute.getDeviceType ().getValue ();
5483
5586
5587
+ llvm::Function *llvmFunc =
5588
+ moduleTranslation.lookupFunction (funcOp.getName ());
5484
5589
if (declareType == omp::DeclareTargetDeviceType::host) {
5485
- llvm::Function *llvmFunc =
5486
- moduleTranslation.lookupFunction (funcOp.getName ());
5487
5590
llvmFunc->dropAllReferences ();
5488
5591
llvmFunc->eraseFromParent ();
5592
+ } else {
5593
+ addAllocasForDeclareTargetFunctionPointerArgs (llvmFunc,
5594
+ moduleTranslation);
5595
+ updateDebugInfoForDeclareTargetFunctions (llvmFunc, moduleTranslation);
5489
5596
}
5490
5597
}
5491
5598
return success ();
0 commit comments