-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[MLIR][OpenMP] Host lowering of standalone distribute #127817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-flang-openmp Author: Sergio Afonso (skatrak) ChangesThis patch adds MLIR to LLVM IR translation support for standalone Full diff: https://github.com/llvm/llvm-project/pull/127817.diff 3 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a5ff3eff6439f..c8221a9f9854a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -164,6 +164,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (op.getDevice())
result = todo("device");
};
+ auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
+ if (op.getDistScheduleChunkSize())
+ result = todo("dist_schedule with chunk_size");
+ };
auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
if (!op.getHasDeviceAddrVars().empty())
result = todo("has_device_addr");
@@ -255,6 +259,16 @@ static LogicalResult checkImplementationStatus(Operation &op) {
LogicalResult result = success();
llvm::TypeSwitch<Operation &>(op)
+ .Case([&](omp::DistributeOp op) {
+ if (op.isComposite() &&
+ isa_and_present<omp::WsloopOp>(op.getNestedWrapper()))
+ result = op.emitError() << "not yet implemented: "
+ "composite omp.distribute + omp.wsloop";
+ checkAllocate(op, result);
+ checkDistSchedule(op, result);
+ checkOrder(op, result);
+ checkPrivate(op, result);
+ })
.Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
.Case([&](omp::SectionsOp op) {
checkAllocate(op, result);
@@ -3755,6 +3769,67 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
return success();
}
+static LogicalResult
+convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ auto distributeOp = cast<omp::DistributeOp>(opInst);
+ if (failed(checkImplementationStatus(opInst)))
+ return failure();
+
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ auto bodyGenCB = [&](InsertPointTy allocaIP,
+ InsertPointTy codeGenIP) -> llvm::Error {
+ // DistributeOp has only one region associated with it.
+ builder.restoreIP(codeGenIP);
+
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::Expected<llvm::BasicBlock *> regionBlock =
+ convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
+ builder, moduleTranslation);
+ if (!regionBlock)
+ return regionBlock.takeError();
+ builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+
+ // TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
+ // Static schedule is the default.
+ auto schedule = omp::ClauseScheduleKind::Static;
+ bool isOrdered = false;
+ std::optional<omp::ScheduleModifier> scheduleMod;
+ bool isSimd = false;
+ llvm::omp::WorksharingLoopType workshareLoopType =
+ llvm::omp::WorksharingLoopType::DistributeStaticLoop;
+ bool loopNeedsBarrier = false;
+ llvm::Value *chunk = nullptr;
+
+ llvm::CanonicalLoopInfo *loopInfo = *findCurrentLoopInfo(moduleTranslation);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+ ompBuilder->applyWorkshareLoop(
+ ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+ convertToScheduleKind(schedule), chunk, isSimd,
+ scheduleMod == omp::ScheduleModifier::monotonic,
+ scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+ workshareLoopType);
+
+ if (!wsloopIP)
+ return wsloopIP.takeError();
+ return llvm::Error::success();
+ };
+
+ llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+ findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+ ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
+
+ if (failed(handleError(afterIP, opInst)))
+ return failure();
+
+ builder.restoreIP(*afterIP);
+ return success();
+}
+
/// Lowers the FlagsAttr which is applied to the module on the device
/// pass when offloading, this attribute contains OpenMP RTL globals that can
/// be passed as flags to the frontend, otherwise they are set to default
@@ -4685,6 +4760,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
.Case([&](omp::TargetOp) {
return convertOmpTarget(*op, builder, moduleTranslation);
})
+ .Case([&](omp::DistributeOp) {
+ return convertOmpDistribute(*op, builder, moduleTranslation);
+ })
.Case([&](omp::LoopNestOp) {
return convertOmpLoopNest(*op, builder, moduleTranslation);
})
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index cf18c07dd605b..a5a490e527d79 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3270,3 +3270,40 @@ llvm.func @omp_task_if(%boolexpr: i1) {
// -----
module attributes {omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>} {}
+
+// -----
+
+llvm.func @distribute() {
+ %0 = llvm.mlir.constant(42 : index) : i64
+ %1 = llvm.mlir.constant(10 : index) : i64
+ %2 = llvm.mlir.constant(1 : index) : i64
+ omp.distribute {
+ omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define void @distribute
+// CHECK: call void @[[OUTLINED:.*]]({{.*}})
+// CHECK-NEXT: br label %[[EXIT:.*]]
+// CHECK: [[EXIT]]:
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTLINED]]({{.*}})
+// CHECK: %[[LASTITER:.*]] = alloca i32
+// CHECK: %[[LB:.*]] = alloca i64
+// CHECK: %[[UB:.*]] = alloca i64
+// CHECK: %[[STRIDE:.*]] = alloca i64
+// CHECK: br label %[[BODY:.*]]
+// CHECK: [[BODY]]:
+// CHECK-NEXT: br label %[[REGION:.*]]
+// CHECK: [[REGION]]:
+// CHECK-NEXT: br label %[[PREHEADER:.*]]
+// CHECK: [[PREHEADER]]:
+// CHECK: store i64 0, ptr %[[LB]]
+// CHECK: store i64 31, ptr %[[UB]]
+// CHECK: store i64 1, ptr %[[STRIDE]]
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
+// CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0)
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index e97b5e54e6415..71dbc061c3104 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -66,10 +66,70 @@ llvm.func @do_simd(%lb : i32, %ub : i32, %step : i32) {
// -----
-llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) {
- // expected-error@below {{not yet implemented: omp.distribute}}
+llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
+ // expected-error@below {{LLVM Translation failed for operation: omp.parallel}}
+ omp.parallel {
+ // expected-error@below {{not yet implemented: composite omp.distribute + omp.wsloop}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute {
+ omp.wsloop {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ } {omp.composite}
+ } {omp.composite}
+ omp.terminator
+ } {omp.composite}
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
+ // expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) {
+ // expected-error@below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
+ // expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute order(concurrent) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+omp.private {type = private} @x.privatizer : !llvm.ptr
+
+llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
+ // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
- omp.distribute {
+ omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
omp.yield
}
|
tblah
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
a79b7a2 to
40d140e
Compare
128819a to
55089ba
Compare
40d140e to
26638a2
Compare
55089ba to
654c63e
Compare
00cc8b6 to
6c88935
Compare
This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). Co-authored-by: Dominik Adamski <[email protected]>
654c63e to
e780d29
Compare
This patch adds MLIR to LLVM IR translation support for standalone
omp.distributeoperations, as well asdistribute simdthrough ignoring SIMD information (similarly todo/for simd).