Skip to content

Commit bc7e452

Browse files
committed
backend changes
1 parent 5c53494 commit bc7e452

File tree

4 files changed

+82
-13
lines changed

4 files changed

+82
-13
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -munsafe-fp-atomics %s -o -|FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
2+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-ignore-denormal-mode %s -o -|FileCheck -check-prefix=IGNORE-DENORMAL-MODE %s
3+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-fine-grained-memory %s -o -|FileCheck -check-prefix=FINE-GRAINED-MEMORY %s
4+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-remote-memory %s -o -|FileCheck -check-prefix=REMOTE-MEMORY %s
5+
program test
6+
implicit none
7+
integer :: A, threads
8+
threads = 128
9+
A = 0
10+
!$omp target parallel num_threads(threads)
11+
!$omp atomic
12+
A = A + 1
13+
!$omp end target parallel
14+
end program test
15+
16+
!UNSAFE-FP-ATOMICS: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
17+
!IGNORE-DENORMAL-MODE: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
18+
!FINE-GRAINED-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.remote.memory !{{.*}}
19+
!REMOTE-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.fine.grained.memory !{{.*}}

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3282,7 +3282,9 @@ class OpenMPIRBuilder {
32823282
emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
32833283
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
32843284
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
3285-
bool IsXBinopExpr);
3285+
bool IsXBinopExpr, bool IsAmdgpuIgnoreDenormalMode,
3286+
bool IsAmdgpuNoFineGrainedMemory,
3287+
bool IsAmdgpuNoRemoteMemory);
32863288

32873289
/// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
32883290
///
@@ -3355,7 +3357,10 @@ class OpenMPIRBuilder {
33553357
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(
33563358
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
33573359
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3358-
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr);
3360+
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
3361+
bool IsAmdgpuIgnoreDenormalMode = false,
3362+
bool IsAmdgpuNoFineGrainedMemory = false,
3363+
bool IsAmdgpuNoRemoteMemory = false);
33593364

33603365
/// Emit atomic update for constructs: --- Only Scalar data types
33613366
/// V = X; X = X BinOp Expr ,
@@ -3390,7 +3395,10 @@ class OpenMPIRBuilder {
33903395
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
33913396
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
33923397
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
3393-
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr);
3398+
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
3399+
bool IsAmdgpuIgnoreDenormalMode = false,
3400+
bool IsAmdgpuNoFineGrainedMemory = false,
3401+
bool IsAmdgpuNoRemoteMemory = false);
33943402

33953403
/// Emit atomic compare for constructs: --- Only scalar data types
33963404
/// cond-expr-stmt:

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8960,7 +8960,9 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
89608960
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
89618961
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
89628962
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
8963-
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
8963+
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
8964+
bool IsAmdgpuIgnoreDenormalMode, bool IsNoFineGrainedMemory,
8965+
bool IsNoRemoteMemory) {
89648966
assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
89658967
if (!updateToLocation(Loc))
89668968
return Loc.IP;
@@ -8980,7 +8982,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
89808982

89818983
Expected<std::pair<Value *, Value *>> AtomicResult =
89828984
emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
8983-
X.IsVolatile, IsXBinopExpr);
8985+
X.IsVolatile, IsXBinopExpr, IsAmdgpuIgnoreDenormalMode,
8986+
IsNoFineGrainedMemory, IsNoRemoteMemory);
89848987
if (!AtomicResult)
89858988
return AtomicResult.takeError();
89868989
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
@@ -9027,7 +9030,9 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
90279030
Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
90289031
InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
90299032
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
9030-
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
9033+
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr,
9034+
bool IsAmdgpuIgnoreDenormalMode, bool IsAmdgpuNoFineGrainedMemory,
9035+
bool IsAmdgpuNoRemoteMemory) {
90319036
// TODO: handle the case where XElemTy is not byte-sized or not a power of 2
90329037
// or a complex datatype.
90339038
bool emitRMWOp = false;
@@ -9050,7 +9055,18 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
90509055

90519056
std::pair<Value *, Value *> Res;
90529057
if (emitRMWOp) {
9053-
Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
9058+
AtomicRMWInst *atomicRMWInst =
9059+
Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
9060+
if (IsAmdgpuIgnoreDenormalMode)
9061+
atomicRMWInst->setMetadata("amdgpu.ignore.denormal.mode",
9062+
llvm::MDNode::get(Builder.getContext(), {}));
9063+
if (IsAmdgpuNoFineGrainedMemory)
9064+
atomicRMWInst->setMetadata("amdgpu.no.fine.grained.memory",
9065+
llvm::MDNode::get(Builder.getContext(), {}));
9066+
if (IsAmdgpuNoRemoteMemory)
9067+
atomicRMWInst->setMetadata("amdgpu.no.remote.memory",
9068+
llvm::MDNode::get(Builder.getContext(), {}));
9069+
Res.first = atomicRMWInst;
90549070
// not needed except in case of postfix captures. Generate anyway for
90559071
// consistency with the else part. Will be removed with any DCE pass.
90569072
// AtomicRMWInst::Xchg does not have a coressponding instruction.
@@ -9182,7 +9198,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
91829198
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
91839199
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
91849200
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
9185-
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
9201+
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
9202+
bool IsAmdgpuIgnoreDenormalMode, bool IsAmdgpuNoFineGrainedMemory,
9203+
bool IsAmdgpuNoRemoteMemory) {
91869204
if (!updateToLocation(Loc))
91879205
return Loc.IP;
91889206

@@ -9203,7 +9221,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
92039221
AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
92049222
Expected<std::pair<Value *, Value *>> AtomicResult =
92059223
emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
9206-
X.IsVolatile, IsXBinopExpr);
9224+
X.IsVolatile, IsXBinopExpr, IsAmdgpuIgnoreDenormalMode,
9225+
IsAmdgpuNoFineGrainedMemory, IsAmdgpuNoRemoteMemory);
92079226
if (!AtomicResult)
92089227
return AtomicResult.takeError();
92099228
Value *CapturedVal =

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3242,13 +3242,21 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
32423242
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
32433243
};
32443244

3245+
mlir::omp::AtomicControlAttr atomicControlAttr =
3246+
opInst.getAtomicControlAttr();
3247+
bool isAmdgpuIgnoreDenormalMode =
3248+
atomicControlAttr.getAmdgpuIgnoreDenormalMode();
3249+
bool isAmdgpuNoFineGrainedMemory =
3250+
!atomicControlAttr.getAmdgpuFineGrainedMemory();
3251+
bool isAmdgpuNoRemoteMemory = !atomicControlAttr.getAmdgpuRemoteMemory();
32453252
// Handle ambiguous alloca, if any.
32463253
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
32473254
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
32483255
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3249-
ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
3250-
atomicOrdering, binop, updateFn,
3251-
isXBinopExpr);
3256+
ompBuilder->createAtomicUpdate(
3257+
ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop,
3258+
updateFn, isXBinopExpr, isAmdgpuIgnoreDenormalMode,
3259+
isAmdgpuNoFineGrainedMemory, isAmdgpuNoRemoteMemory);
32523260

32533261
if (failed(handleError(afterIP, *opInst)))
32543262
return failure();
@@ -3270,6 +3278,7 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
32703278
llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
32713279

32723280
omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3281+
32733282
omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
32743283

32753284
assert((atomicUpdateOp || atomicWriteOp) &&
@@ -3337,13 +3346,27 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
33373346
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
33383347
};
33393348

3349+
bool isAmdgpuIgnoreDenormalMode = false;
3350+
bool isAmdgpuNoFineGrainedMemory = true;
3351+
bool isAmdgpuNoRemoteMemory = true;
3352+
if (atomicUpdateOp) {
3353+
mlir::omp::AtomicControlAttr atomicControlAttr =
3354+
atomicUpdateOp.getAtomicControlAttr();
3355+
isAmdgpuIgnoreDenormalMode =
3356+
atomicControlAttr.getAmdgpuIgnoreDenormalMode();
3357+
isAmdgpuNoFineGrainedMemory =
3358+
!atomicControlAttr.getAmdgpuFineGrainedMemory();
3359+
isAmdgpuNoRemoteMemory = !atomicControlAttr.getAmdgpuRemoteMemory();
3360+
}
33403361
// Handle ambiguous alloca, if any.
33413362
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
33423363
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
33433364
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
33443365
ompBuilder->createAtomicCapture(
33453366
ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3346-
binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
3367+
binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr,
3368+
isAmdgpuIgnoreDenormalMode, isAmdgpuNoFineGrainedMemory,
3369+
isAmdgpuNoRemoteMemory);
33473370

33483371
if (failed(handleError(afterIP, *atomicCaptureOp)))
33493372
return failure();

0 commit comments

Comments
 (0)