Skip to content

Commit a1d2098

Browse files
committed
Merge branch 'main' into riscv_remove_zext_from_bool_for_atomic_load
2 parents 9537656 + f12fb2f commit a1d2098

File tree

18 files changed

+307
-307
lines changed

18 files changed

+307
-307
lines changed

flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,14 +147,15 @@ struct GPULaunchKernelConversion
147147
stream = adaptor.getAsyncDependencies().front();
148148
}
149149

150-
rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
151-
op, funcTy, cufLaunchClusterKernel,
150+
rewriter.create<mlir::LLVM::CallOp>(
151+
loc, funcTy, cufLaunchClusterKernel,
152152
mlir::ValueRange{kernelPtr, adaptor.getClusterSizeX(),
153153
adaptor.getClusterSizeY(), adaptor.getClusterSizeZ(),
154154
adaptor.getGridSizeX(), adaptor.getGridSizeY(),
155155
adaptor.getGridSizeZ(), adaptor.getBlockSizeX(),
156156
adaptor.getBlockSizeY(), adaptor.getBlockSizeZ(),
157157
stream, dynamicMemorySize, kernelArgs, nullPtr});
158+
rewriter.eraseOp(op);
158159
} else {
159160
auto procAttr =
160161
op->getAttrOfType<cuf::ProcAttributeAttr>(cuf::getProcAttrName());
@@ -189,13 +190,14 @@ struct GPULaunchKernelConversion
189190
stream = adaptor.getAsyncDependencies().front();
190191
}
191192

192-
rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
193-
op, funcTy, cufLaunchKernel,
193+
rewriter.create<mlir::LLVM::CallOp>(
194+
loc, funcTy, cufLaunchKernel,
194195
mlir::ValueRange{kernelPtr, adaptor.getGridSizeX(),
195196
adaptor.getGridSizeY(), adaptor.getGridSizeZ(),
196197
adaptor.getBlockSizeX(), adaptor.getBlockSizeY(),
197198
adaptor.getBlockSizeZ(), stream, dynamicMemorySize,
198199
kernelArgs, nullPtr});
200+
rewriter.eraseOp(op);
199201
}
200202

201203
return mlir::success();

flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,27 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : ve
229229
// CHECK-LABEL: llvm.func @_QMmod1Phost_sub()
230230
// CHECK: %[[STREAM:.*]] = llvm.alloca %{{.*}} x i64 : (i64) -> !llvm.ptr
231231
// CHECK: llvm.call @_FortranACUFLaunchCooperativeKernel(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i64, i64, i64, i64, i64, i64, !llvm.ptr, i32, !llvm.ptr, !llvm.ptr) -> ()
232+
233+
// -----
234+
235+
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 ([email protected]:clementval/llvm-project.git 4116c1370ff76adf1e58eb3c39d0a14721794c70)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
236+
llvm.func @_FortranACUFLaunchClusterKernel(!llvm.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64, !llvm.ptr, i32, !llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"}
237+
llvm.func @_QMmod1Psub1() attributes {cuf.cluster_dims = #cuf.cluster_dims<x = 2 : i64, y = 2 : i64, z = 1 : i64>} {
238+
llvm.return
239+
}
240+
llvm.func @_QQmain() attributes {fir.bindc_name = "test"} {
241+
%0 = llvm.mlir.constant(1 : index) : i64
242+
%1 = llvm.mlir.constant(2 : index) : i64
243+
%2 = llvm.mlir.constant(0 : i32) : i32
244+
%3 = llvm.mlir.constant(10 : index) : i64
245+
%stream = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr
246+
%token = cuf.stream_cast %stream : !llvm.ptr
247+
%4 = gpu.launch_func async [%token] @cuda_device_mod::@_QMmod1Psub1 blocks in (%3, %3, %0) threads in (%3, %3, %0) : i64 dynamic_shared_memory_size %2 {cuf.proc_attr = #cuf.cuda_proc<global>}
248+
llvm.return
249+
}
250+
gpu.binary @cuda_device_mod [#gpu.object<#nvvm.target, "">]
251+
}
252+
253+
// CHECK-LABEL: llvm.func @_QQmain()
254+
// CHECK: %[[STREAM:.*]] = llvm.alloca %{{.*}} x i64 : (i64) -> !llvm.ptr
255+
// CHECK: llvm.call @_FortranACUFLaunchKernel(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}})

llvm/include/llvm/Analysis/DXILResource.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,19 @@ class DXILResourceMap {
457457
unsigned FirstCBuffer = 0;
458458
unsigned FirstSampler = 0;
459459

460-
/// Populate the map given the resource binding calls in the given module.
460+
/// Populate all the resource instance data.
461461
void populate(Module &M, DXILResourceTypeMap &DRTM);
462+
/// Populate the map given the resource binding calls in the given module.
463+
void populateResourceInfos(Module &M, DXILResourceTypeMap &DRTM);
464+
/// Analyze and populate the directions of the resource counters.
465+
void populateCounterDirections(Module &M);
466+
467+
/// Resolves a resource handle into a vector of ResourceInfos that
468+
/// represent the possible unique creations of the handle. Certain cases are
469+
/// ambiguous so multiple creation instructions may be returned. The resulting
470+
/// ResourceInfo can be used to depuplicate unique handles that
471+
/// reference the same resource
472+
SmallVector<dxil::ResourceInfo *> findByUse(const Value *Key);
462473

463474
public:
464475
using iterator = SmallVector<dxil::ResourceInfo>::iterator;
@@ -476,13 +487,6 @@ class DXILResourceMap {
476487
return Pos == CallMap.end() ? Infos.end() : (Infos.begin() + Pos->second);
477488
}
478489

479-
/// Resolves a resource handle into a vector of ResourceInfos that
480-
/// represent the possible unique creations of the handle. Certain cases are
481-
/// ambiguous so multiple creation instructions may be returned. The resulting
482-
/// ResourceInfo can be used to depuplicate unique handles that
483-
/// reference the same resource
484-
SmallVector<dxil::ResourceInfo> findByUse(const Value *Key) const;
485-
486490
const_iterator find(const CallInst *Key) const {
487491
auto Pos = CallMap.find(Key);
488492
return Pos == CallMap.end() ? Infos.end() : (Infos.begin() + Pos->second);

llvm/lib/Analysis/DXILResource.cpp

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,12 @@ bool DXILResourceTypeMap::invalidate(Module &M, const PreservedAnalyses &PA,
697697
}
698698

699699
//===----------------------------------------------------------------------===//
700+
static bool isUpdateCounterIntrinsic(Function &F) {
701+
return F.getIntrinsicID() == Intrinsic::dx_resource_updatecounter;
702+
}
700703

701-
void DXILResourceMap::populate(Module &M, DXILResourceTypeMap &DRTM) {
704+
void DXILResourceMap::populateResourceInfos(Module &M,
705+
DXILResourceTypeMap &DRTM) {
702706
SmallVector<std::tuple<CallInst *, ResourceInfo, ResourceTypeInfo>> CIToInfos;
703707

704708
for (Function &F : M.functions()) {
@@ -777,6 +781,48 @@ void DXILResourceMap::populate(Module &M, DXILResourceTypeMap &DRTM) {
777781
}
778782
}
779783

784+
void DXILResourceMap::populateCounterDirections(Module &M) {
785+
for (Function &F : M.functions()) {
786+
if (!isUpdateCounterIntrinsic(F))
787+
continue;
788+
789+
LLVM_DEBUG(dbgs() << "Update Counter Function: " << F.getName() << "\n");
790+
791+
for (const User *U : F.users()) {
792+
const CallInst *CI = dyn_cast<CallInst>(U);
793+
assert(CI && "Users of dx_resource_updateCounter must be call instrs");
794+
795+
// Determine if the use is an increment or decrement
796+
Value *CountArg = CI->getArgOperand(1);
797+
ConstantInt *CountValue = cast<ConstantInt>(CountArg);
798+
int64_t CountLiteral = CountValue->getSExtValue();
799+
800+
// 0 is an unknown direction and shouldn't result in an insert
801+
if (CountLiteral == 0)
802+
continue;
803+
804+
ResourceCounterDirection Direction = ResourceCounterDirection::Decrement;
805+
if (CountLiteral > 0)
806+
Direction = ResourceCounterDirection::Increment;
807+
808+
// Collect all potential creation points for the handle arg
809+
Value *HandleArg = CI->getArgOperand(0);
810+
SmallVector<ResourceInfo *> RBInfos = findByUse(HandleArg);
811+
for (ResourceInfo *RBInfo : RBInfos) {
812+
if (RBInfo->CounterDirection == ResourceCounterDirection::Unknown)
813+
RBInfo->CounterDirection = Direction;
814+
else if (RBInfo->CounterDirection != Direction)
815+
RBInfo->CounterDirection = ResourceCounterDirection::Invalid;
816+
}
817+
}
818+
}
819+
}
820+
821+
void DXILResourceMap::populate(Module &M, DXILResourceTypeMap &DRTM) {
822+
populateResourceInfos(M, DRTM);
823+
populateCounterDirections(M);
824+
}
825+
780826
void DXILResourceMap::print(raw_ostream &OS, DXILResourceTypeMap &DRTM,
781827
const DataLayout &DL) const {
782828
for (unsigned I = 0, E = Infos.size(); I != E; ++I) {
@@ -793,10 +839,9 @@ void DXILResourceMap::print(raw_ostream &OS, DXILResourceTypeMap &DRTM,
793839
}
794840
}
795841

796-
SmallVector<dxil::ResourceInfo>
797-
DXILResourceMap::findByUse(const Value *Key) const {
842+
SmallVector<dxil::ResourceInfo *> DXILResourceMap::findByUse(const Value *Key) {
798843
if (const PHINode *Phi = dyn_cast<PHINode>(Key)) {
799-
SmallVector<dxil::ResourceInfo> Children;
844+
SmallVector<dxil::ResourceInfo *> Children;
800845
for (const Value *V : Phi->operands()) {
801846
Children.append(findByUse(V));
802847
}
@@ -810,9 +855,9 @@ DXILResourceMap::findByUse(const Value *Key) const {
810855
switch (CI->getIntrinsicID()) {
811856
// Found the create, return the binding
812857
case Intrinsic::dx_resource_handlefrombinding: {
813-
const auto *It = find(CI);
814-
assert(It != Infos.end() && "HandleFromBinding must be in resource map");
815-
return {*It};
858+
auto Pos = CallMap.find(CI);
859+
assert(Pos != CallMap.end() && "HandleFromBinding must be in resource map");
860+
return {&Infos[Pos->second]};
816861
}
817862
default:
818863
break;
@@ -821,7 +866,7 @@ DXILResourceMap::findByUse(const Value *Key) const {
821866
// Check if any of the parameters are the resource we are following. If so
822867
// keep searching. If none of them are return an empty list
823868
const Type *UseType = CI->getType();
824-
SmallVector<dxil::ResourceInfo> Children;
869+
SmallVector<dxil::ResourceInfo *> Children;
825870
for (const Value *V : CI->args()) {
826871
if (V->getType() != UseType)
827872
continue;

llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -782,8 +782,6 @@ DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
782782
assert(!LexicalBlockDIEs.count(DS) &&
783783
"Concrete out-of-line DIE for this scope exists!");
784784
LexicalBlockDIEs[DS] = ScopeDIE;
785-
} else {
786-
InlinedLocalScopeDIEs[DS].push_back(ScopeDIE);
787785
}
788786

789787
attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
@@ -1493,19 +1491,6 @@ void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) {
14931491
getDwarfDebug().addAccelName(*this, CUNode->getNameTableKind(), Name, *Die);
14941492
}
14951493

1496-
void DwarfCompileUnit::attachLexicalScopesAbstractOrigins() {
1497-
auto AttachAO = [&](const DILocalScope *LS, DIE *ScopeDIE) {
1498-
if (auto *AbsLSDie = getAbstractScopeDIEs().lookup(LS))
1499-
addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *AbsLSDie);
1500-
};
1501-
1502-
for (auto [LScope, ScopeDIE] : LexicalBlockDIEs)
1503-
AttachAO(LScope, ScopeDIE);
1504-
for (auto &[LScope, ScopeDIEs] : InlinedLocalScopeDIEs)
1505-
for (auto *ScopeDIE : ScopeDIEs)
1506-
AttachAO(LScope, ScopeDIE);
1507-
}
1508-
15091494
DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) {
15101495
auto &AbstractEntities = getAbstractEntities();
15111496
auto I = AbstractEntities.find(Node);

llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@ class DwarfCompileUnit final : public DwarfUnit {
8282
// List of abstract local scopes (either DISubprogram or DILexicalBlock).
8383
DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
8484

85-
// List of inlined lexical block scopes that belong to subprograms within this
86-
// CU.
87-
DenseMap<const DILocalScope *, SmallVector<DIE *, 2>> InlinedLocalScopeDIEs;
88-
8985
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
9086

9187
/// DWO ID for correlating skeleton and split units.
@@ -303,7 +299,6 @@ class DwarfCompileUnit final : public DwarfUnit {
303299

304300
void finishSubprogramDefinition(const DISubprogram *SP);
305301
void finishEntityDefinition(const DbgEntity *Entity);
306-
void attachLexicalScopesAbstractOrigins();
307302

308303
/// Find abstract variable associated with Var.
309304
using InlinedEntity = DbgValueHistoryMap::InlinedEntity;

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1262,7 +1262,6 @@ void DwarfDebug::finalizeModuleInfo() {
12621262
auto &TheCU = *P.second;
12631263
if (TheCU.getCUNode()->isDebugDirectivesOnly())
12641264
continue;
1265-
TheCU.attachLexicalScopesAbstractOrigins();
12661265
// Emit DW_AT_containing_type attribute to connect types with their
12671266
// vtable holding type.
12681267
TheCU.constructContainingTypeDIEs();

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4382,6 +4382,43 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
43824382
Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits());
43834383
break;
43844384
}
4385+
case ISD::ATOMIC_LOAD: {
4386+
// If we are looking at the loaded value.
4387+
if (Op.getResNo() == 0) {
4388+
auto *AT = cast<AtomicSDNode>(Op);
4389+
unsigned ScalarMemorySize = AT->getMemoryVT().getScalarSizeInBits();
4390+
KnownBits KnownScalarMemory(ScalarMemorySize);
4391+
if (const MDNode *MD = AT->getRanges())
4392+
computeKnownBitsFromRangeMetadata(*MD, KnownScalarMemory);
4393+
4394+
switch (AT->getExtensionType()) {
4395+
case ISD::ZEXTLOAD:
4396+
Known = KnownScalarMemory.zext(BitWidth);
4397+
break;
4398+
case ISD::SEXTLOAD:
4399+
Known = KnownScalarMemory.sext(BitWidth);
4400+
break;
4401+
case ISD::EXTLOAD:
4402+
switch (TLI->getExtendForAtomicOps()) {
4403+
case ISD::ZERO_EXTEND:
4404+
Known = KnownScalarMemory.zext(BitWidth);
4405+
break;
4406+
case ISD::SIGN_EXTEND:
4407+
Known = KnownScalarMemory.sext(BitWidth);
4408+
break;
4409+
default:
4410+
Known = KnownScalarMemory.anyext(BitWidth);
4411+
break;
4412+
}
4413+
break;
4414+
case ISD::NON_EXTLOAD:
4415+
Known = KnownScalarMemory;
4416+
break;
4417+
}
4418+
assert(Known.getBitWidth() == BitWidth);
4419+
}
4420+
break;
4421+
}
43854422
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
43864423
if (Op.getResNo() == 1) {
43874424
// The boolean result conforms to getBooleanContents.
@@ -4407,21 +4444,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
44074444
case ISD::ATOMIC_LOAD_MIN:
44084445
case ISD::ATOMIC_LOAD_MAX:
44094446
case ISD::ATOMIC_LOAD_UMIN:
4410-
case ISD::ATOMIC_LOAD_UMAX:
4411-
case ISD::ATOMIC_LOAD: {
4447+
case ISD::ATOMIC_LOAD_UMAX: {
44124448
// If we are looking at the loaded value.
44134449
if (Op.getResNo() == 0) {
44144450
auto *AT = cast<AtomicSDNode>(Op);
44154451
unsigned MemBits = AT->getMemoryVT().getScalarSizeInBits();
44164452

4417-
// For atomic_load, prefer to use the extension type.
4418-
if (Op->getOpcode() == ISD::ATOMIC_LOAD) {
4419-
if (AT->getExtensionType() == ISD::ZEXTLOAD)
4420-
Known.Zero.setBitsFrom(MemBits);
4421-
else if (AT->getExtensionType() != ISD::SEXTLOAD &&
4422-
TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
4423-
Known.Zero.setBitsFrom(MemBits);
4424-
} else if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
4453+
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
44254454
Known.Zero.setBitsFrom(MemBits);
44264455
}
44274456
break;

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5150,9 +5150,10 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
51505150

51515151
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
51525152

5153+
const MDNode *Ranges = getRangeMetadata(I);
51535154
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
51545155
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
5155-
I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
5156+
I.getAlign(), AAMDNodes(), Ranges, SSID, Order);
51565157

51575158
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
51585159

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9661,8 +9661,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
96619661
if (auto *UncountableExitingBlock =
96629662
Legal->getUncountableEarlyExitingBlock()) {
96639663
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9664-
*PSE.getSE(), OrigLoop, UncountableExitingBlock,
9665-
RecipeBuilder, Range);
9664+
OrigLoop, UncountableExitingBlock, RecipeBuilder,
9665+
Range);
96669666
}
96679667
DenseMap<VPValue *, VPValue *> IVEndValues;
96689668
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);

0 commit comments

Comments
 (0)