Skip to content

Commit 50d4f21

Browse files
committed
Bump LLVM Version
(merge 65fa61d)
1 parent 887bbd5 commit 50d4f21

File tree

189 files changed

+3685
-3525
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

189 files changed

+3685
-3525
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: MLIR-GPU Test CI
1+
name: llvm_project_build
22

33
on:
44
push:
@@ -24,7 +24,7 @@ jobs:
2424
- compiler: clang
2525
cxxcompiler: g++
2626

27-
timeout-minutes: 240
27+
timeout-minutes: 360
2828
steps:
2929
- uses: actions/checkout@v3
3030
with:
@@ -46,13 +46,6 @@ jobs:
4646
- name: add dependencies
4747
run: sudo apt-get install -y ninja-build #cmake binutils-gold binutils binutils-dev ${{ matrix.compiler }} ${{ matrix.linker-pkg }}
4848

49-
#- name: setup cymbl
50-
# run: |
51-
# cd /
52-
# sudo wget --no-verbose https://github.com/cymbl/cymbl.github.io/releases/download/0.0.1/LLVM-11.0.0git-Linux.sh
53-
# printf "y\nn\n" | sudo bash LLVM-11.0.0git-Linux.sh
54-
# printf "{\"refreshToken\":\"%s\"}" "${{ secrets.SuperSecret }}" > ~/.cymblconfig
55-
5649
- name: MLIR build
5750
if: steps.cache-mlir.outputs.cache-hit != 'true'
5851
run: |

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,8 @@ pythonenv*
8080
/clang/utils/analyzer/projects/*/RefScanBuildResults
8181
# automodapi puts generated documentation files here.
8282
/lldb/docs/python_api/
83+
84+
85+
# tmp output from tests
86+
*.exec1
87+
*.out1

include/polygeist/BarrierUtils.h

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,14 @@ allocateTemporaryBuffer(mlir::OpBuilder &rewriter, mlir::Value value,
5151
mlir::ValueRange iterationCounts, bool alloca = true,
5252
mlir::DataLayout *DLI = nullptr) {
5353
using namespace mlir;
54-
SmallVector<int64_t> bufferSize(iterationCounts.size(),
55-
ShapedType::kDynamicSize);
54+
SmallVector<int64_t> bufferSize(iterationCounts.size(), ShapedType::kDynamic);
5655
mlir::Type ty = value.getType();
5756
if (alloca)
5857
if (auto allocaOp = value.getDefiningOp<memref::AllocaOp>()) {
5958
auto mt = allocaOp.getType();
6059
bool hasDynamicSize = false;
6160
for (auto s : mt.getShape()) {
62-
if (s == ShapedType::kDynamicSize) {
61+
if (s == ShapedType::kDynamic) {
6362
hasDynamicSize = true;
6463
break;
6564
}
@@ -84,10 +83,12 @@ mlir::Value allocateTemporaryBuffer<mlir::LLVM::AllocaOp>(
8483
auto sz = val.getArraySize();
8584
assert(DLI);
8685
for (auto iter : iterationCounts) {
87-
sz =
88-
rewriter.create<arith::MulIOp>(value.getLoc(), sz,
89-
rewriter.create<arith::IndexCastOp>(
90-
value.getLoc(), sz.getType(), iter));
86+
sz = cast<TypedValue<IntegerType>>(
87+
rewriter
88+
.create<arith::MulIOp>(value.getLoc(), sz,
89+
rewriter.create<arith::IndexCastOp>(
90+
value.getLoc(), sz.getType(), iter))
91+
.getResult());
9192
}
9293
return rewriter.create<LLVM::AllocaOp>(value.getLoc(), val.getType(), sz);
9394
}
@@ -100,18 +101,24 @@ mlir::Value allocateTemporaryBuffer<mlir::LLVM::CallOp>(
100101
auto val = value.getDefiningOp<LLVM::AllocaOp>();
101102
auto sz = val.getArraySize();
102103
assert(DLI);
103-
sz = rewriter.create<arith::MulIOp>(
104-
value.getLoc(), sz,
105-
rewriter.create<arith::ConstantIntOp>(
106-
value.getLoc(),
107-
DLI->getTypeSize(
108-
val.getType().cast<LLVM::LLVMPointerType>().getElementType()),
109-
sz.getType().cast<IntegerType>().getWidth()));
104+
sz = cast<TypedValue<IntegerType>>(
105+
rewriter
106+
.create<arith::MulIOp>(
107+
value.getLoc(), sz,
108+
rewriter.create<arith::ConstantIntOp>(
109+
value.getLoc(),
110+
DLI->getTypeSize(val.getType()
111+
.cast<LLVM::LLVMPointerType>()
112+
.getElementType()),
113+
sz.getType().cast<IntegerType>().getWidth()))
114+
.getResult());
110115
for (auto iter : iterationCounts) {
111-
sz =
112-
rewriter.create<arith::MulIOp>(value.getLoc(), sz,
113-
rewriter.create<arith::IndexCastOp>(
114-
value.getLoc(), sz.getType(), iter));
116+
sz = cast<TypedValue<IntegerType>>(
117+
rewriter
118+
.create<arith::MulIOp>(value.getLoc(), sz,
119+
rewriter.create<arith::IndexCastOp>(
120+
value.getLoc(), sz.getType(), iter))
121+
.getResult());
115122
}
116123
auto m = val->getParentOfType<ModuleOp>();
117124
return callMalloc(rewriter, m, value.getLoc(), sz);

include/polygeist/Ops.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ class BarrierElim final
8181
}
8282

8383
Operation *op = barrier;
84-
if (NotTopLevel && isa<mlir::scf::ParallelOp, mlir::AffineParallelOp>(
85-
barrier->getParentOp()))
84+
if (NotTopLevel &&
85+
isa<mlir::scf::ParallelOp, mlir::affine::AffineParallelOp>(
86+
barrier->getParentOp()))
8687
return failure();
8788

8889
{

include/polygeist/Passes/Passes.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
#define POLYGEIST_DIALECT_POLYGEIST_PASSES_H
33

44
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
5+
#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
6+
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
57
#include "mlir/Pass/Pass.h"
8+
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
9+
#include "polygeist/Dialect.h"
610
#include <memory>
711

812
enum PolygeistAlternativesMode { PAM_Static, PAM_PGO_Profile, PAM_PGO_Opt };
@@ -19,7 +23,7 @@ class RewritePatternSet;
1923
class DominanceInfo;
2024
namespace polygeist {
2125
std::unique_ptr<Pass> createParallelLICMPass();
22-
std::unique_ptr<Pass> createMem2RegPass();
26+
std::unique_ptr<Pass> createPolygeistMem2RegPass();
2327
std::unique_ptr<Pass> createLoopRestructurePass();
2428
std::unique_ptr<Pass> createInnerSerializationPass();
2529
std::unique_ptr<Pass> createSerializationPass();
@@ -50,8 +54,14 @@ createConvertParallelToGPUPass1(std::string arch = "sm_60");
5054
std::unique_ptr<Pass>
5155
createConvertParallelToGPUPass2(bool emitGPUKernelLaunchBounds = true);
5256
std::unique_ptr<Pass> createMergeGPUModulesPass();
57+
std::unique_ptr<Pass> createConvertToOpaquePtrPass();
5358
std::unique_ptr<Pass> createLowerAlternativesPass();
5459
std::unique_ptr<Pass> createCollectKernelStatisticsPass();
60+
std::unique_ptr<Pass> createPolygeistCanonicalizePass();
61+
std::unique_ptr<Pass>
62+
createPolygeistCanonicalizePass(const GreedyRewriteConfig &config,
63+
ArrayRef<std::string> disabledPatterns,
64+
ArrayRef<std::string> enabledPatterns);
5565
std::unique_ptr<Pass> createGpuSerializeToCubinPass(
5666
StringRef arch, StringRef features, int llvmOptLevel, int ptxasOptLevel,
5767
std::string ptxasPath, std::string libDevicePath, bool outputIntermediate);
@@ -81,10 +91,26 @@ namespace arith {
8191
class ArithDialect;
8292
} // end namespace arith
8393

94+
namespace omp {
95+
class OpenMPDialect;
96+
} // end namespace omp
97+
98+
namespace polygeist {
99+
class PolygeistDialect;
100+
} // end namespace polygeist
101+
84102
namespace scf {
85103
class SCFDialect;
86104
} // end namespace scf
87105

106+
namespace cf {
107+
class ControlFlowDialect;
108+
} // end namespace cf
109+
110+
namespace math {
111+
class MathDialect;
112+
} // end namespace math
113+
88114
namespace memref {
89115
class MemRefDialect;
90116
} // end namespace memref
@@ -93,7 +119,10 @@ namespace func {
93119
class FuncDialect;
94120
}
95121

122+
namespace affine {
96123
class AffineDialect;
124+
}
125+
97126
namespace LLVM {
98127
class LLVMDialect;
99128
}

include/polygeist/Passes/Passes.td

Lines changed: 89 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
1-
#ifndef POlYGEIST_PASSES
1+
#ifndef POLYGEIST_PASSES
22
#define POLYGEIST_PASSES
33

44
include "mlir/Pass/PassBase.td"
5+
include "mlir/Rewrite/PassUtil.td"
56

67
def AffineCFG : Pass<"affine-cfg"> {
78
let summary = "Replace scf.if and similar with affine.if";
89
let constructor = "mlir::polygeist::replaceAffineCFGPass()";
910
}
1011

11-
def Mem2Reg : Pass<"mem2reg"> {
12+
def PolygeistMem2Reg : Pass<"polygeist-mem2reg"> {
1213
let summary = "Replace scf.if and similar with affine.if";
13-
let constructor = "mlir::polygeist::createMem2RegPass()";
14+
let constructor = "mlir::polygeist::createPolygeistMem2RegPass()";
1415
}
1516

1617
def SCFParallelLoopUnroll : Pass<"scf-parallel-loop-unroll"> {
1718
let summary = "Unroll and interleave scf parallel loops";
18-
let dependentDialects =
19-
["::mlir::scf::SCFDialect"];
19+
let dependentDialects = [
20+
"scf::SCFDialect",
21+
"arith::ArithDialect",
22+
];
2023
let constructor = "mlir::polygeist::createSCFParallelLoopUnrollPass()";
2124
let options = [
2225
Option<"unrollFactor", "unrollFactor", "int", /*default=*/"2", "Unroll factor">
@@ -37,8 +40,10 @@ def LowerAlternatives : Pass<"lower-alternatives", "mlir::ModuleOp"> {
3740

3841
def ConvertCudaRTtoCPU : Pass<"convert-cudart-to-cpu", "mlir::ModuleOp"> {
3942
let summary = "Lower cudart functions to cpu versions";
40-
let dependentDialects =
41-
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect"];
43+
let dependentDialects = [
44+
"memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect",
45+
"cf::ControlFlowDialect",
46+
];
4247
let constructor = "mlir::polygeist::createConvertCudaRTtoCPUPass()";
4348
}
4449

@@ -64,8 +69,14 @@ def ConvertCudaRTtoHipRT : Pass<"convert-cudart-to-gpu", "mlir::ModuleOp"> {
6469

6570
def ParallelLower : Pass<"parallel-lower", "mlir::ModuleOp"> {
6671
let summary = "Lower gpu launch op to parallel ops";
67-
let dependentDialects =
68-
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect"];
72+
let dependentDialects = [
73+
"scf::SCFDialect",
74+
"polygeist::PolygeistDialect",
75+
"cf::ControlFlowDialect",
76+
"memref::MemRefDialect",
77+
"func::FuncDialect",
78+
"LLVM::LLVMDialect",
79+
];
6980
let constructor = "mlir::polygeist::createParallelLowerPass()";
7081
}
7182

@@ -87,7 +98,7 @@ def SCFCPUify : Pass<"cpuify"> {
8798
def ConvertParallelToGPU1 : Pass<"convert-parallel-to-gpu1"> {
8899
let summary = "Convert parallel loops to gpu";
89100
let constructor = "mlir::polygeist::createConvertParallelToGPUPass1()";
90-
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "memref::MemRefDialect"];
101+
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "memref::MemRefDialect", "gpu::GPUDialect"];
91102
let options = [
92103
Option<"arch", "arch", "std::string", /*default=*/"\"sm_60\"", "Target GPU architecture">
93104
];
@@ -96,7 +107,13 @@ def ConvertParallelToGPU1 : Pass<"convert-parallel-to-gpu1"> {
96107
def ConvertParallelToGPU2 : Pass<"convert-parallel-to-gpu2"> {
97108
let summary = "Convert parallel loops to gpu";
98109
let constructor = "mlir::polygeist::createConvertParallelToGPUPass2()";
99-
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "memref::MemRefDialect"];
110+
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "memref::MemRefDialect", "gpu::GPUDialect"];
111+
}
112+
113+
def ConvertToOpaquePtrPass : Pass<"convert-to-opaque-ptr"> {
114+
let summary = "Convert typed llvm pointers to opaque";
115+
let constructor = "mlir::polygeist::createConvertToOpaquePtrPass()";
116+
let dependentDialects = ["LLVM::LLVMDialect"];
100117
}
101118

102119
def MergeGPUModulesPass : Pass<"merge-gpu-modules", "mlir::ModuleOp"> {
@@ -111,6 +128,7 @@ def InnerSerialization : Pass<"inner-serialize"> {
111128
let dependentDialects =
112129
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect"];
113130
}
131+
114132
def Serialization : Pass<"serialize"> {
115133
let summary = "remove scf.barrier";
116134
let constructor = "mlir::polygeist::createSerializationPass()";
@@ -127,18 +145,28 @@ def SCFBarrierRemovalContinuation : InterfacePass<"barrier-removal-continuation"
127145
def SCFRaiseToAffine : Pass<"raise-scf-to-affine"> {
128146
let summary = "Raise SCF to affine";
129147
let constructor = "mlir::polygeist::createRaiseSCFToAffinePass()";
130-
let dependentDialects = ["AffineDialect"];
148+
let dependentDialects = [
149+
"affine::AffineDialect",
150+
"scf::SCFDialect",
151+
];
131152
}
132153

133154
def SCFCanonicalizeFor : Pass<"canonicalize-scf-for"> {
134155
let summary = "Run some additional canonicalization for scf::for";
135156
let constructor = "mlir::polygeist::createCanonicalizeForPass()";
157+
let dependentDialects = [
158+
"scf::SCFDialect",
159+
"math::MathDialect",
160+
];
136161
}
137162

138163
def ForBreakToWhile : Pass<"for-break-to-while"> {
139164
let summary = "Rewrite scf.for(scf.if) to scf.while";
140165
let constructor = "mlir::polygeist::createForBreakToWhilePass()";
141-
let dependentDialects = ["arith::ArithDialect"];
166+
let dependentDialects = [
167+
"arith::ArithDialect",
168+
"cf::ControlFlowDialect",
169+
];
142170
}
143171

144172
def ParallelLICM : Pass<"parallel-licm"> {
@@ -149,11 +177,48 @@ def ParallelLICM : Pass<"parallel-licm"> {
149177
def OpenMPOptPass : Pass<"openmp-opt"> {
150178
let summary = "Optimize OpenMP";
151179
let constructor = "mlir::polygeist::createOpenMPOptPass()";
180+
let dependentDialects = [
181+
"memref::MemRefDialect",
182+
"omp::OpenMPDialect",
183+
"LLVM::LLVMDialect",
184+
];
185+
}
186+
187+
def PolygeistCanonicalize : Pass<"canonicalize-polygeist"> {
188+
let constructor = "mlir::polygeist::createPolygeistCanonicalizePass()";
189+
let dependentDialects = [
190+
"func::FuncDialect",
191+
"LLVM::LLVMDialect",
192+
"memref::MemRefDialect",
193+
"gpu::GPUDialect",
194+
"arith::ArithDialect",
195+
"cf::ControlFlowDialect",
196+
"scf::SCFDialect",
197+
"polygeist::PolygeistDialect",
198+
];
199+
let options = [
200+
Option<"topDownProcessingEnabled", "top-down", "bool",
201+
/*default=*/"true",
202+
"Seed the worklist in general top-down order">,
203+
Option<"enableRegionSimplification", "region-simplify", "bool",
204+
/*default=*/"true",
205+
"Perform control flow optimizations to the region tree">,
206+
Option<"maxIterations", "max-iterations", "int64_t",
207+
/*default=*/"10",
208+
"Max. iterations between applying patterns / simplifying regions">,
209+
Option<"maxNumRewrites", "max-num-rewrites", "int64_t", /*default=*/"-1",
210+
"Max. number of pattern rewrites within an iteration">,
211+
Option<"testConvergence", "test-convergence", "bool", /*default=*/"false",
212+
"Test only: Fail pass on non-convergence to detect cyclic pattern">
213+
] # RewritePassUtils.options;
152214
}
153215

154216
def LoopRestructure : Pass<"loop-restructure"> {
155217
let constructor = "mlir::polygeist::createLoopRestructurePass()";
156-
let dependentDialects = ["::mlir::scf::SCFDialect"];
218+
let dependentDialects = [
219+
"scf::SCFDialect",
220+
"polygeist::PolygeistDialect",
221+
];
157222
}
158223

159224
def RemoveTrivialUse : Pass<"trivialuse"> {
@@ -188,7 +253,16 @@ def ConvertPolygeistToLLVM : Pass<"convert-polygeist-to-llvm", "mlir::ModuleOp">
188253
LLVM IR types.
189254
}];
190255
let constructor = "mlir::polygeist::createConvertPolygeistToLLVMPass()";
191-
let dependentDialects = ["LLVM::LLVMDialect"];
256+
let dependentDialects = [
257+
"polygeist::PolygeistDialect",
258+
"func::FuncDialect",
259+
"LLVM::LLVMDialect",
260+
"memref::MemRefDialect",
261+
"gpu::GPUDialect",
262+
"arith::ArithDialect",
263+
"cf::ControlFlowDialect",
264+
"scf::SCFDialect",
265+
];
192266
let options = [
193267
Option<"useBarePtrCallConv", "use-bare-ptr-memref-call-conv", "bool",
194268
/*default=*/"false",

0 commit comments

Comments
 (0)