[MLIR][OpenMP] Host lowering of standalone distribute #127817

skatrak · 2025-02-19T15:50:08Z

This patch adds MLIR to LLVM IR translation support for standalone omp.distribute operations, as well as distribute simd through ignoring SIMD information (similarly to do/for simd).

llvmbot · 2025-02-19T15:51:02Z

@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-openmp
@llvm/pr-subscribers-mlir-llvm

@llvm/pr-subscribers-flang-openmp

Author: Sergio Afonso (skatrak)

Changes

This patch adds MLIR to LLVM IR translation support for standalone omp.distribute operations, as well as distribute simd through ignoring SIMD information (similarly to do/for simd).

Full diff: https://github.com/llvm/llvm-project/pull/127817.diff

3 Files Affected:

(modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+78)
(modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+37)
(modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (+63-3)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a5ff3eff6439f..c8221a9f9854a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -164,6 +164,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getDevice())
       result = todo("device");
   };
+  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
+    if (op.getDistScheduleChunkSize())
+      result = todo("dist_schedule with chunk_size");
+  };
   auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
     if (!op.getHasDeviceAddrVars().empty())
       result = todo("has_device_addr");
@@ -255,6 +259,16 @@ static LogicalResult checkImplementationStatus(Operation &op) {
 
   LogicalResult result = success();
   llvm::TypeSwitch<Operation &>(op)
+      .Case([&](omp::DistributeOp op) {
+        if (op.isComposite() &&
+            isa_and_present<omp::WsloopOp>(op.getNestedWrapper()))
+          result = op.emitError() << "not yet implemented: "
+                                     "composite omp.distribute + omp.wsloop";
+        checkAllocate(op, result);
+        checkDistSchedule(op, result);
+        checkOrder(op, result);
+        checkPrivate(op, result);
+      })
       .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
       .Case([&](omp::SectionsOp op) {
         checkAllocate(op, result);
@@ -3755,6 +3769,67 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
   return success();
 }
 
+static LogicalResult
+convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
+                     LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  auto distributeOp = cast<omp::DistributeOp>(opInst);
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  auto bodyGenCB = [&](InsertPointTy allocaIP,
+                       InsertPointTy codeGenIP) -> llvm::Error {
+    // DistributeOp has only one region associated with it.
+    builder.restoreIP(codeGenIP);
+
+    llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+    llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+    llvm::Expected<llvm::BasicBlock *> regionBlock =
+        convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
+                            builder, moduleTranslation);
+    if (!regionBlock)
+      return regionBlock.takeError();
+    builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+
+    // TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
+    // Static schedule is the default.
+    auto schedule = omp::ClauseScheduleKind::Static;
+    bool isOrdered = false;
+    std::optional<omp::ScheduleModifier> scheduleMod;
+    bool isSimd = false;
+    llvm::omp::WorksharingLoopType workshareLoopType =
+        llvm::omp::WorksharingLoopType::DistributeStaticLoop;
+    bool loopNeedsBarrier = false;
+    llvm::Value *chunk = nullptr;
+
+    llvm::CanonicalLoopInfo *loopInfo = *findCurrentLoopInfo(moduleTranslation);
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+        ompBuilder->applyWorkshareLoop(
+            ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+            convertToScheduleKind(schedule), chunk, isSimd,
+            scheduleMod == omp::ScheduleModifier::monotonic,
+            scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+            workshareLoopType);
+
+    if (!wsloopIP)
+      return wsloopIP.takeError();
+    return llvm::Error::success();
+  };
+
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
+
+  if (failed(handleError(afterIP, opInst)))
+    return failure();
+
+  builder.restoreIP(*afterIP);
+  return success();
+}
+
 /// Lowers the FlagsAttr which is applied to the module on the device
 /// pass when offloading, this attribute contains OpenMP RTL globals that can
 /// be passed as flags to the frontend, otherwise they are set to default
@@ -4685,6 +4760,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
           .Case([&](omp::TargetOp) {
             return convertOmpTarget(*op, builder, moduleTranslation);
           })
+          .Case([&](omp::DistributeOp) {
+            return convertOmpDistribute(*op, builder, moduleTranslation);
+          })
           .Case([&](omp::LoopNestOp) {
             return convertOmpLoopNest(*op, builder, moduleTranslation);
           })
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index cf18c07dd605b..a5a490e527d79 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3270,3 +3270,40 @@ llvm.func @omp_task_if(%boolexpr: i1) {
 // -----
 
 module attributes {omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>} {}
+
+// -----
+
+llvm.func @distribute() {
+  %0 = llvm.mlir.constant(42 : index) : i64
+  %1 = llvm.mlir.constant(10 : index) : i64
+  %2 = llvm.mlir.constant(1 : index) : i64
+  omp.distribute {
+    omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define void @distribute
+// CHECK:         call void @[[OUTLINED:.*]]({{.*}})
+// CHECK-NEXT:    br label %[[EXIT:.*]]
+// CHECK:       [[EXIT]]:
+// CHECK:         ret void
+
+// CHECK:       define internal void @[[OUTLINED]]({{.*}})
+// CHECK:         %[[LASTITER:.*]] = alloca i32
+// CHECK:         %[[LB:.*]] = alloca i64
+// CHECK:         %[[UB:.*]] = alloca i64
+// CHECK:         %[[STRIDE:.*]] = alloca i64
+// CHECK:         br label %[[BODY:.*]]
+// CHECK:       [[BODY]]:
+// CHECK-NEXT:    br label %[[REGION:.*]]
+// CHECK:       [[REGION]]:
+// CHECK-NEXT:    br label %[[PREHEADER:.*]]
+// CHECK:       [[PREHEADER]]:
+// CHECK:         store i64 0, ptr %[[LB]]
+// CHECK:         store i64 31, ptr %[[UB]]
+// CHECK:         store i64 1, ptr %[[STRIDE]]
+// CHECK:         %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
+// CHECK:         call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0)
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index e97b5e54e6415..71dbc061c3104 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -66,10 +66,70 @@ llvm.func @do_simd(%lb : i32, %ub : i32, %step : i32) {
 
 // -----
 
-llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) {
-  // expected-error@below {{not yet implemented: omp.distribute}}
+llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
+  // expected-error@below {{LLVM Translation failed for operation: omp.parallel}}
+  omp.parallel {
+    // expected-error@below {{not yet implemented: composite omp.distribute + omp.wsloop}}
+    // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+    omp.distribute {
+      omp.wsloop {
+        omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+          omp.yield
+        }
+      } {omp.composite}
+    } {omp.composite}
+    omp.terminator
+  } {omp.composite}
+  llvm.return
+}
+
+// -----
+
+llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
+  // expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}}
+  // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+  omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// -----
+
+llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) {
+  // expected-error@below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}}
+  // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+  omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// -----
+
+llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
+  // expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}}
+  // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+  omp.distribute order(concurrent) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// -----
+
+omp.private {type = private} @x.privatizer : !llvm.ptr
+
+llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
+  // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
   // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
-  omp.distribute {
+  omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
     omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
       omp.yield
     }

skatrak · 2025-02-19T15:57:02Z

PR stack:

tblah

LGTM, thanks

This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). Co-authored-by: Dominik Adamski <[email protected]>

skatrak requested review from DominikAdamski, Meinersbur, jsjodin, kiranchandramohan and tblah February 19, 2025 15:50

llvmbot added mlir:llvm mlir mlir:openmp flang:openmp labels Feb 19, 2025

This was referenced Feb 19, 2025

[MLIR][OpenMP] Normalize lowering of omp.loop_nest #127217

Merged

[OpenMPIRBuilder] Add support for distribute constructs #127816

Merged

tblah approved these changes Feb 19, 2025

View reviewed changes

skatrak force-pushed the users/skatrak/target-spmd-02-ompirbuilder-distribute branch from a79b7a2 to 40d140e Compare February 21, 2025 10:06

skatrak force-pushed the users/skatrak/target-spmd-03-mlir-distribute branch 2 times, most recently from 128819a to 55089ba Compare February 21, 2025 10:19

skatrak force-pushed the users/skatrak/target-spmd-02-ompirbuilder-distribute branch from 40d140e to 26638a2 Compare February 24, 2025 12:52

skatrak force-pushed the users/skatrak/target-spmd-03-mlir-distribute branch from 55089ba to 654c63e Compare February 24, 2025 12:53

skatrak force-pushed the users/skatrak/target-spmd-02-ompirbuilder-distribute branch 2 times, most recently from 00cc8b6 to 6c88935 Compare February 24, 2025 14:47

Base automatically changed from users/skatrak/target-spmd-02-ompirbuilder-distribute to main February 24, 2025 16:18

skatrak force-pushed the users/skatrak/target-spmd-03-mlir-distribute branch from 654c63e to e780d29 Compare February 24, 2025 16:19

skatrak merged commit 7ff87af into main Feb 25, 2025
11 checks passed

skatrak deleted the users/skatrak/target-spmd-03-mlir-distribute branch February 25, 2025 10:28

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[MLIR][OpenMP] Host lowering of standalone distribute #127817

[MLIR][OpenMP] Host lowering of standalone distribute #127817

Uh oh!

skatrak commented Feb 19, 2025

Uh oh!

llvmbot commented Feb 19, 2025 •

edited

Loading

Uh oh!

skatrak commented Feb 19, 2025

Uh oh!

tblah left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[MLIR][OpenMP] Host lowering of standalone distribute #127817

[MLIR][OpenMP] Host lowering of standalone distribute #127817

Uh oh!

Conversation

skatrak commented Feb 19, 2025

Uh oh!

llvmbot commented Feb 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

skatrak commented Feb 19, 2025

Uh oh!

tblah left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Feb 19, 2025 •

edited

Loading