llvm
diff --git a/‎include/polygeist/PolygeistOps.td
Lines changed: 1 addition & 0 deletions b/‎include/polygeist/PolygeistOps.td
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/polygeist/Ops.cpp
Lines changed: 90 additions & 0 deletions b/‎lib/polygeist/Ops.cpp
Lines changed: 90 additions & 0 deletions
diff --git a/‎lib/polygeist/Passes/ConvertPolygeistToLLVM.cpp
Lines changed: 32 additions & 4 deletions b/‎lib/polygeist/Passes/ConvertPolygeistToLLVM.cpp
Lines changed: 32 additions & 4 deletions
diff --git a/‎lib/polygeist/Passes/ParallelLower.cpp
Lines changed: 131 additions & 9 deletions b/‎lib/polygeist/Passes/ParallelLower.cpp
Lines changed: 131 additions & 9 deletions
@@ -118,6 +118,7 @@ def GetFuncOp : Polygeist_Op<"get_func",
   let arguments = (ins FlatSymbolRefAttr:$name);
   let results = (outs LLVM_AnyPointer : $result);
   let assemblyFormat = "$name `:` type($result) attr-dict";
+  let hasCanonicalizer = 1;
 }
 
 def TrivialUseOp : Polygeist_Op<"trivialuse"> {
 
@@ -5432,6 +5432,96 @@ void TypeAlignOp::getCanonicalizationPatterns(RewritePatternSet &results,
 // GetFuncOp
 //===----------------------------------------------------------------------===//
 
+LogicalResult fixupGetFunc(LLVM::CallOp op, OpBuilder &rewriter,
+                           SmallVectorImpl<Value> &vals) {
+  if (op.getCallee())
+    return failure();
+
+  Value pval = op.getOperand(0);
+
+  auto FT = pval.getType()
+                .cast<LLVM::LLVMPointerType>()
+                .getElementType()
+                .cast<LLVM::LLVMFunctionType>();
+  if (FT.isVarArg())
+    return failure();
+
+  while (true) {
+    if (auto bc = pval.getDefiningOp<LLVM::BitcastOp>())
+      pval = bc.getOperand();
+    else if (auto mt = pval.getDefiningOp<Memref2PointerOp>())
+      pval = mt.getOperand();
+    else if (auto mt = pval.getDefiningOp<Pointer2MemrefOp>())
+      pval = mt.getOperand();
+    else
+      break;
+  }
+
+  LLVM::LLVMFunctionType FT2;
+  if (auto MT = pval.getType().dyn_cast<MemRefType>())
+    FT2 = MT.getElementType().cast<LLVM::LLVMFunctionType>();
+  else
+    FT2 = pval.getType()
+              .cast<LLVM::LLVMPointerType>()
+              .getElementType()
+              .cast<LLVM::LLVMFunctionType>();
+
+  if (FT2.getParams().size() != FT.getParams().size())
+    return failure();
+
+  auto gfn = pval.getDefiningOp<GetFuncOp>();
+  if (!gfn)
+    return failure();
+  SmallVector<Value> args(op.getOperands());
+  args.erase(args.begin());
+  for (int i = 0; i < args.size(); i++) {
+    if (FT2.getParams()[i] != args[i].getType()) {
+      if (!FT2.getParams()[i].isa<MemRefType>() ||
+          !args[i].getType().isa<LLVM::LLVMPointerType>())
+        return failure();
+      args[i] = rewriter.create<polygeist::Pointer2MemrefOp>(
+          op.getLoc(), FT2.getParams()[i], args[i]);
+    }
+  }
+
+  if (op.getResultTypes().size() &&
+      (!op.getResultTypes()[0].isa<LLVM::LLVMPointerType>() ||
+       !FT2.getReturnType().isa<MemRefType>()))
+    return failure();
+
+  auto res = rewriter
+                 .create<func::CallOp>(op.getLoc(), gfn.getNameAttr(),
+                                       op.getResultTypes(), args)
+                 .getResults();
+  for (Value r : res) {
+    if (r.getType() != FT.getReturnType())
+      r = rewriter.create<polygeist::Memref2PointerOp>(op.getLoc(),
+                                                       FT.getReturnType(), r);
+    vals.push_back(r);
+  }
+  return success();
+}
+
+class GetFuncFix final : public OpRewritePattern<LLVM::CallOp> {
+public:
+  using OpRewritePattern<LLVM::CallOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::CallOp op,
+                                PatternRewriter &rewriter) const override {
+    SmallVector<Value> vals;
+    if (fixupGetFunc(op, rewriter, vals).failed())
+      return failure();
+    rewriter.replaceOp(op, vals);
+
+    return success();
+  }
+};
+
+void GetFuncOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                            MLIRContext *context) {
+  results.insert<GetFuncFix>(context);
+}
+
 LogicalResult GetFuncOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
   // TODO: Verify that the result type is same as the type of the referenced
   // func.func op.
 
@@ -152,9 +152,14 @@ struct Memref2PointerOpLowering
                   ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
 
+    auto LPT = op.getType().cast<LLVM::LLVMPointerType>();
+    auto space0 = op.getSource().getType().getMemorySpaceAsInt();
     if (transformed.getSource().getType().isa<LLVM::LLVMPointerType>()) {
-      auto ptr = rewriter.create<LLVM::BitcastOp>(loc, op.getType(),
-                                                  transformed.getSource());
+      mlir::Value ptr = rewriter.create<LLVM::BitcastOp>(
+          loc, LLVM::LLVMPointerType::get(LPT.getElementType(), space0),
+          transformed.getSource());
+      if (space0 != LPT.getAddressSpace())
+        ptr = rewriter.create<LLVM::AddrSpaceCastOp>(loc, LPT, ptr);
       rewriter.replaceOp(op, {ptr});
       return success();
     }
@@ -169,7 +174,10 @@ struct Memref2PointerOpLowering
     Value ptr = targetMemRef.alignedPtr(rewriter, loc);
     Value idxs[] = {baseOffset};
     ptr = rewriter.create<LLVM::GEPOp>(loc, ptr.getType(), ptr, idxs);
-    ptr = rewriter.create<LLVM::BitcastOp>(loc, op.getType(), ptr);
+    ptr = rewriter.create<LLVM::BitcastOp>(
+        loc, LLVM::LLVMPointerType::get(LPT.getElementType(), space0), ptr);
+    if (space0 != LPT.getAddressSpace())
+      ptr = rewriter.create<LLVM::AddrSpaceCastOp>(loc, LPT, ptr);
 
     rewriter.replaceOp(op, {ptr});
     return success();
@@ -997,6 +1005,25 @@ struct CLoadOpLowering : public CLoadStoreOpLowering<memref::LoadOp> {
   }
 };
 
+struct CAtomicRMWOpLowering : public CLoadStoreOpLowering<memref::AtomicRMWOp> {
+  using CLoadStoreOpLowering<memref::AtomicRMWOp>::CLoadStoreOpLowering;
+
+  LogicalResult
+  matchAndRewrite(memref::AtomicRMWOp atomicOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto maybeKind = matchSimpleAtomicOp(atomicOp);
+    if (!maybeKind)
+      return failure();
+    auto dataPtr = getAddress(atomicOp, adaptor, rewriter);
+    if (!dataPtr)
+      return failure();
+    rewriter.replaceOpWithNewOp<LLVM::AtomicRMWOp>(
+        atomicOp, atomicOp.getType(), *maybeKind, dataPtr, adaptor.getValue(),
+        LLVM::AtomicOrdering::acq_rel);
+    return success();
+  }
+};
+
 /// Pattern for lowering a memory store.
 struct CStoreOpLowering : public CLoadStoreOpLowering<memref::StoreOp> {
 public:
@@ -1284,7 +1311,8 @@ populateCStyleMemRefLoweringPatterns(RewritePatternSet &patterns,
                                      LLVMTypeConverter &typeConverter) {
   patterns.add<CAllocaOpLowering, CAllocOpLowering, CDeallocOpLowering,
                GetGlobalOpLowering, GlobalOpLowering, CLoadOpLowering,
-               CStoreOpLowering, AllocaScopeOpLowering>(typeConverter);
+               CStoreOpLowering, AllocaScopeOpLowering, CAtomicRMWOpLowering>(
+      typeConverter);
 }
 
 /// Appends the patterns lowering operations from the Func dialect to the LLVM
 
@@ -27,6 +27,7 @@
 #include "mlir/Transforms/Passes.h"
 #include "polygeist/Ops.h"
 #include "polygeist/Passes/Passes.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
 #include <mutex>
@@ -198,18 +199,23 @@ mlir::LLVM::LLVMFuncOp GetOrCreateFreeFunction(ModuleOp module) {
                                           lnk);
 }
 
+LogicalResult fixupGetFunc(LLVM::CallOp, OpBuilder &rewriter,
+                           SmallVectorImpl<Value> &);
+
 void ParallelLower::runOnOperation() {
   // The inliner should only be run on operations that define a symbol table,
   // as the callgraph will need to resolve references.
 
   SymbolTableCollection symbolTable;
   symbolTable.getSymbolTable(getOperation());
+  SymbolUserMap symbolUserMap(symbolTable, getOperation());
 
   getOperation()->walk([&](CallOp bidx) {
     if (bidx.getCallee() == "cudaThreadSynchronize")
       bidx.erase();
   });
 
+  std::function<void(LLVM::CallOp)> LLVMcallInliner;
   std::function<void(CallOp)> callInliner = [&](CallOp caller) {
     // Build the inliner interface.
     AlwaysInlinerInterface interface(&getContext());
@@ -230,10 +236,72 @@ void ParallelLower::runOnOperation() {
       return;
     if (targetRegion->empty())
       return;
-    SmallVector<CallOp> ops;
-    callableOp.walk([&](CallOp caller) { ops.push_back(caller); });
-    for (auto op : ops)
-      callInliner(op);
+    {
+      SmallVector<CallOp> ops;
+      callableOp.walk([&](CallOp caller) { ops.push_back(caller); });
+      for (auto op : ops)
+        callInliner(op);
+    }
+    {
+      SmallVector<LLVM::CallOp> ops;
+      callableOp.walk([&](LLVM::CallOp caller) { ops.push_back(caller); });
+      for (auto op : ops)
+        LLVMcallInliner(op);
+    }
+    OpBuilder b(caller);
+    auto allocScope = b.create<memref::AllocaScopeOp>(caller.getLoc(),
+                                                      caller.getResultTypes());
+    allocScope.getRegion().push_back(new Block());
+    b.setInsertionPointToStart(&allocScope.getRegion().front());
+    auto exOp = b.create<scf::ExecuteRegionOp>(caller.getLoc(),
+                                               caller.getResultTypes());
+    Block *blk = new Block();
+    exOp.getRegion().push_back(blk);
+    caller->moveBefore(blk, blk->begin());
+    caller.replaceAllUsesWith(allocScope.getResults());
+    b.setInsertionPointToEnd(blk);
+    b.create<scf::YieldOp>(caller.getLoc(), caller.getResults());
+    if (inlineCall(interface, caller, callableOp, targetRegion,
+                   /*shouldCloneInlinedRegion=*/true)
+            .succeeded()) {
+      caller.erase();
+    }
+    b.setInsertionPointToEnd(&allocScope.getRegion().front());
+    b.create<memref::AllocaScopeReturnOp>(allocScope.getLoc(),
+                                          exOp.getResults());
+  };
+  LLVMcallInliner = [&](LLVM::CallOp caller) {
+    // Build the inliner interface.
+    AlwaysInlinerInterface interface(&getContext());
+
+    auto callable = caller.getCallableForCallee();
+    CallableOpInterface callableOp;
+    if (SymbolRefAttr symRef = callable.dyn_cast<SymbolRefAttr>()) {
+      if (!symRef.isa<FlatSymbolRefAttr>())
+        return;
+      auto *symbolOp =
+          symbolTable.lookupNearestSymbolFrom(getOperation(), symRef);
+      callableOp = dyn_cast_or_null<CallableOpInterface>(symbolOp);
+    } else {
+      return;
+    }
+    Region *targetRegion = callableOp.getCallableRegion();
+    if (!targetRegion)
+      return;
+    if (targetRegion->empty())
+      return;
+    {
+      SmallVector<CallOp> ops;
+      callableOp.walk([&](CallOp caller) { ops.push_back(caller); });
+      for (auto op : ops)
+        callInliner(op);
+    }
+    {
+      SmallVector<LLVM::CallOp> ops;
+      callableOp.walk([&](LLVM::CallOp caller) { ops.push_back(caller); });
+      for (auto op : ops)
+        LLVMcallInliner(op);
+    }
     OpBuilder b(caller);
     auto allocScope = b.create<memref::AllocaScopeOp>(caller.getLoc(),
                                                       caller.getResultTypes());
@@ -256,6 +324,7 @@ void ParallelLower::runOnOperation() {
     b.create<memref::AllocaScopeReturnOp>(allocScope.getLoc(),
                                           exOp.getResults());
   };
+
   {
     SmallVector<CallOp> dimsToInline;
     getOperation()->walk([&](CallOp bidx) {
@@ -268,15 +337,68 @@ void ParallelLower::runOnOperation() {
   }
 
   // Only supports single block functions at the moment.
+
+  SmallVector<std::pair<Operation *, size_t>> outlineOps;
+  getOperation().walk([&](gpu::LaunchOp launchOp) {
+    launchOp.walk([&](LLVM::CallOp caller) {
+      if (!caller.getCallee()) {
+        outlineOps.push_back(std::make_pair(caller, (size_t)0));
+      }
+    });
+  });
+  SetVector<FunctionOpInterface> toinl;
+  while (outlineOps.size()) {
+    auto opv = outlineOps.back();
+    auto op = std::get<0>(opv);
+    auto idx = std::get<1>(opv);
+    outlineOps.pop_back();
+    if (Value fn = op->getOperand(idx)) {
+      if (auto fn2 = fn.getDefiningOp<polygeist::Memref2PointerOp>())
+        fn = fn2.getOperand();
+      if (auto ba = fn.dyn_cast<BlockArgument>()) {
+        if (auto F =
+                dyn_cast<FunctionOpInterface>(ba.getOwner()->getParentOp())) {
+          if (toinl.count(F))
+            continue;
+          toinl.insert(F);
+          for (Operation *m : symbolUserMap.getUsers(F)) {
+            outlineOps.push_back(std::make_pair(m, (size_t)ba.getArgNumber()));
+          }
+        }
+      }
+    }
+  }
+  for (auto F : toinl) {
+    for (Operation *m : symbolUserMap.getUsers(F)) {
+      callInliner(cast<CallOp>(m));
+    }
+  }
+  getOperation().walk([&](LLVM::CallOp caller) {
+    OpBuilder builder(caller);
+    SmallVector<Value> vals;
+    if (fixupGetFunc(caller, builder, vals).failed())
+      return;
+    if (vals.size())
+      caller.getResult().replaceAllUsesWith(vals[0]);
+    caller.erase();
+  });
+
   SmallVector<gpu::LaunchOp> toHandle;
   getOperation().walk(
       [&](gpu::LaunchOp launchOp) { toHandle.push_back(launchOp); });
-
   for (gpu::LaunchOp launchOp : toHandle) {
-    SmallVector<CallOp> ops;
-    launchOp.walk([&](CallOp caller) { ops.push_back(caller); });
-    for (auto op : ops)
-      callInliner(op);
+    {
+      SmallVector<CallOp> ops;
+      launchOp.walk([&](CallOp caller) { ops.push_back(caller); });
+      for (auto op : ops)
+        callInliner(op);
+    }
+    {
+      SmallVector<LLVM::CallOp> lops;
+      launchOp.walk([&](LLVM::CallOp caller) { lops.push_back(caller); });
+      for (auto op : lops)
+        LLVMcallInliner(op);
+    }
 
     mlir::IRRewriter builder(launchOp.getContext());
     auto loc = launchOp.getLoc();
Original file line number	Diff line number	Diff line change
`@@ -118,6 +118,7 @@ def GetFuncOp : Polygeist_Op<"get_func",`
`118`	`118`	`let arguments = (ins FlatSymbolRefAttr:$name);`
`119`	`119`	`let results = (outs LLVM_AnyPointer : $result);`
`120`	`120`	let assemblyFormat = "$name `:` type($result) attr-dict";
	`121`	`+ let hasCanonicalizer = 1;`
`121`	`122`	`}`
`122`	`123`
`123`	`124`	`def TrivialUseOp : Polygeist_Op<"trivialuse"> {`