Skip to content

Commit 49cc2dc

Browse files
committed
R2: Addressing review comments
1 parent 70547b5 commit 49cc2dc

File tree

3 files changed

+71
-23
lines changed

3 files changed

+71
-23
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3205,6 +3205,23 @@ llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
32053205
.Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
32063206
}
32073207

3208+
void extractAtomicControlFlags(omp::AtomicUpdateOp atomicUpdateOp,
3209+
bool &isIgnoreDenormalMode,
3210+
bool &isFineGrainedMemory,
3211+
bool &isRemoteMemory) {
3212+
isIgnoreDenormalMode = false;
3213+
isFineGrainedMemory = false;
3214+
isRemoteMemory = false;
3215+
if (atomicUpdateOp &&
3216+
atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
3217+
mlir::omp::AtomicControlAttr atomicControlAttr =
3218+
atomicUpdateOp.getAtomicControlAttr();
3219+
isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
3220+
isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
3221+
isRemoteMemory = atomicControlAttr.getRemoteMemory();
3222+
}
3223+
}
3224+
32083225
/// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
32093226
static LogicalResult
32103227
convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
@@ -3269,17 +3286,11 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
32693286
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
32703287
};
32713288

3272-
bool isIgnoreDenormalMode = false;
3273-
bool isFineGrainedMemory = false;
3274-
bool isRemoteMemory = false;
3275-
if (opInst->hasAttr(opInst.getAtomicControlAttrName())) {
3276-
mlir::omp::AtomicControlAttr atomicControlAttr =
3277-
opInst.getAtomicControlAttr();
3278-
isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
3279-
isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
3280-
isRemoteMemory = atomicControlAttr.getRemoteMemory();
3281-
}
3282-
3289+
bool isIgnoreDenormalMode;
3290+
bool isFineGrainedMemory;
3291+
bool isRemoteMemory;
3292+
extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory,
3293+
isRemoteMemory);
32833294
// Handle ambiguous alloca, if any.
32843295
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
32853296
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
@@ -3376,18 +3387,11 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
33763387
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
33773388
};
33783389

3379-
bool isIgnoreDenormalMode = false;
3380-
bool isFineGrainedMemory = false;
3381-
bool isRemoteMemory = false;
3382-
if (atomicUpdateOp &&
3383-
atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
3384-
mlir::omp::AtomicControlAttr atomicControlAttr =
3385-
atomicUpdateOp.getAtomicControlAttr();
3386-
isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
3387-
isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
3388-
isRemoteMemory = atomicControlAttr.getRemoteMemory();
3389-
}
3390-
3390+
bool isIgnoreDenormalMode;
3391+
bool isFineGrainedMemory;
3392+
bool isRemoteMemory;
3393+
extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode,
3394+
isFineGrainedMemory, isRemoteMemory);
33913395
// Handle ambiguous alloca, if any.
33923396
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
33933397
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
// CHECK: atomicrmw add ptr %loadgep_, i32 1 monotonic, align 4, !amdgpu.no.remote.memory !{{.*}}
4+
5+
module attributes {dlti.dl_spec = #dlti.dl_spec<!llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<1> = dense<64> : vector<4xi64>, !llvm.ptr<2> = dense<32> : vector<4xi64>, !llvm.ptr<3> = dense<32> : vector<4xi64>, !llvm.ptr<4> = dense<64> : vector<4xi64>, !llvm.ptr<5> = dense<32> : vector<4xi64>, !llvm.ptr<6> = dense<32> : vector<4xi64>, !llvm.ptr<7> = dense<[160, 256, 256, 32]> : vector<4xi64>, !llvm.ptr<8> = dense<[128, 128, 128, 48]> : vector<4xi64>, !llvm.ptr<9> = dense<[192, 256, 256, 32]> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.legal_int_widths" = array<i32: 32, 64>, "dlti.stack_alignment" = 32 : i64, "dlti.alloca_memory_space" = 5 : ui64, "dlti.global_memory_space" = 1 : ui64>, fir.atomic_fine_grained_memory, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", fir.target_cpu = "generic-hsa", llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.flags = #omp.flags<openmp_device_version = 31>, omp.is_gpu = true, omp.is_target_device = true, omp.requires = #omp<clause_requires none>, omp.target_triples = [], omp.version = #omp.version<version = 31>} {
6+
llvm.func @_QQmain() attributes {fir.bindc_name = "TEST", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "generic-hsa"} {
7+
%0 = llvm.mlir.constant(1 : i64) : i64
8+
%1 = llvm.alloca %0 x i32 {bindc_name = "threads"} : (i64) -> !llvm.ptr<5>
9+
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
10+
%3 = llvm.mlir.constant(1 : i64) : i64
11+
%4 = llvm.alloca %3 x i32 {bindc_name = "capture"} : (i64) -> !llvm.ptr<5>
12+
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
13+
%6 = llvm.mlir.constant(1 : i64) : i64
14+
%7 = llvm.alloca %6 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr<5>
15+
%8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
16+
%9 = llvm.mlir.constant(0 : i32) : i32
17+
%10 = llvm.mlir.constant(128 : i32) : i32
18+
%11 = llvm.mlir.constant(1 : i64) : i64
19+
%12 = llvm.mlir.constant(1 : i64) : i64
20+
%13 = llvm.mlir.constant(1 : i64) : i64
21+
llvm.store %10, %2 : i32, !llvm.ptr
22+
llvm.store %9, %8 : i32, !llvm.ptr
23+
%14 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "threads"}
24+
%15 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "capture"}
25+
%16 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "a"}
26+
omp.target map_entries(%14 -> %arg0, %15 -> %arg1, %16 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
27+
%17 = llvm.mlir.constant(1 : i32) : i32
28+
%18 = llvm.load %arg0 : !llvm.ptr -> i32
29+
omp.parallel num_threads(%18 : i32) {
30+
omp.atomic.capture {
31+
omp.atomic.read %arg1 = %arg2 : !llvm.ptr, !llvm.ptr, i32
32+
omp.atomic.update %arg2 : !llvm.ptr {
33+
^bb0(%arg3: i32):
34+
%19 = llvm.add %arg3, %17 : i32
35+
omp.yield(%19 : i32)
36+
} {atomic_control = #omp.atomic_control<fine_grained_memory = true>}
37+
}
38+
omp.terminator
39+
}
40+
omp.terminator
41+
}
42+
llvm.return
43+
}
44+
}

0 commit comments

Comments
 (0)