Skip to content

Commit 938b062

Browse files
authored
[CPU] Refresh CPU pipeline verification. (#21541)
After switching all the pipeline to IREE::CPU::LoweringConfigAttr, we no longer need to use TilingConfig, because the lowering config itself has descriptive enums. The revision makes sure that all the tiling sizes are set properly in multi tiling expert. For convolution, it still requires three level of tiling, but it does not use TilingConfig now. --------- Signed-off-by: hanhanW <[email protected]>
1 parent eeda7ca commit 938b062

File tree

4 files changed

+138
-144
lines changed

4 files changed

+138
-144
lines changed

compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSelectLoweringStrategy.cpp

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,24 +41,16 @@ class LLVMCPUSelectLoweringStrategyPass
4141

4242
/// Verify that valid configuration is set for all ops within the funcOp.
4343
template <typename F>
44-
static LogicalResult
45-
verifyLoweringConfiguration(FunctionOpInterface funcOp,
46-
IREE::Codegen::TranslationInfoAttr translationInfo,
47-
F verificationFn) {
44+
static LogicalResult verifyLoweringConfiguration(FunctionOpInterface funcOp,
45+
F verificationFn) {
4846
auto walkResult = funcOp.walk([&](Operation *op) -> WalkResult {
4947
if (isa<IREE::LinalgExt::CustomOp>(op)) {
5048
return WalkResult::advance();
5149
}
52-
IREE::Codegen::LoweringConfigAttrInterface loweringConfig =
53-
getLoweringConfig(op);
54-
if (!loweringConfig || !loweringConfig.hasWorkgroupTilingLevel())
50+
auto loweringConfig = getLoweringConfig<IREE::CPU::LoweringConfigAttr>(op);
51+
if (!loweringConfig)
5552
return WalkResult::advance();
56-
std::unique_ptr<TilingConfig> tilingConfig =
57-
TilingConfig::create(loweringConfig);
58-
if (!tilingConfig)
59-
return WalkResult::interrupt();
60-
return verificationFn(op, *tilingConfig, translationInfo,
61-
ArrayRef<int64_t>{});
53+
return verificationFn(op, loweringConfig);
6254
});
6355
return failure(walkResult.wasInterrupted());
6456
}
@@ -82,12 +74,12 @@ void LLVMCPUSelectLoweringStrategyPass::runOnOperation() {
8274
switch (translationInfo.getDispatchLoweringPassPipeline()) {
8375
case IREE::Codegen::DispatchLoweringPassPipeline::CPUDoubleTilingExpert:
8476
verificationStatus = verifyLoweringConfiguration(
85-
funcOp, translationInfo, verifyDoubleTilingExpertPassPipelineConfig);
77+
funcOp, verifyMultiTilingExpertPassPipelineConfig);
8678
break;
8779
case IREE::Codegen::DispatchLoweringPassPipeline::
8880
CPUConvTileAndDecomposeExpert:
8981
verificationStatus = verifyLoweringConfiguration(
90-
funcOp, translationInfo, verifyConvTileAndDecomposeExpertConfig);
82+
funcOp, verifyConvTileAndDecomposeExpertConfig);
9183
break;
9284
default:
9385
break;

compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp

Lines changed: 109 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "mlir/Dialect/Func/IR/FuncOps.h"
3232
#include "mlir/Dialect/Linalg/Passes.h"
3333
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
34+
#include "mlir/Dialect/Utils/IndexingUtils.h"
35+
#include "mlir/IR/BuiltinTypeInterfaces.h"
3436
#include "mlir/Pass/PassManager.h"
3537
#include "mlir/Transforms/Passes.h"
3638

@@ -150,126 +152,129 @@ addTileAndDistributePasses(OpPassManager &funcPassManager,
150152
//===---------------------------------------------------------------------===//
151153

152154
static bool isValidInterchange(ArrayRef<int64_t> interchange, int numLoops) {
153-
if (interchange.empty())
155+
if (interchange.empty()) {
154156
return true;
155-
llvm::SmallDenseSet<int64_t> s;
156-
s.insert(interchange.begin(), interchange.end());
157-
for (int i = 0; i < numLoops; ++i) {
158-
if (!s.contains(i))
159-
return false;
160157
}
161-
return true;
158+
return isPermutationVector(interchange) && interchange.size() == numLoops;
162159
}
163160

164-
// TODO(hanchung): Refresh the verifier after all the pipelines use
165-
// IREE::CPU::LoweringConfigAttr.
166-
LogicalResult verifyDoubleTilingExpertPassPipelineConfig(
167-
Operation *op, TilingConfig &tilingConfig,
168-
IREE::Codegen::TranslationInfoAttr translationInfo,
169-
ArrayRef<int64_t> workgroupSize) {
170-
if (!workgroupSize.empty()) {
171-
return op->emitOpError(
172-
"expected workgroup size to be empty for CPU pipelines");
173-
}
174-
175-
// Verify that the translation info is using the right pipeline.
176-
if (translationInfo.getDispatchLoweringPassPipeline() !=
177-
IREE::Codegen::DispatchLoweringPassPipeline::CPUDoubleTilingExpert) {
178-
return op->emitOpError("expected pipeline in translation_info to be ")
179-
<< stringifyEnum(IREE::Codegen::DispatchLoweringPassPipeline::
180-
CPUDoubleTilingExpert);
181-
}
161+
LogicalResult verifyMultiTilingExpertPassPipelineConfig(
162+
Operation *op, IREE::CPU::LoweringConfigAttr loweringConfig) {
182163

183-
if (tilingConfig.getNumTilingLevels() == 6) {
184-
// TODO: update verification.
164+
auto interfaceOp = dyn_cast_or_null<TilingInterface>(op);
165+
if (!interfaceOp) {
185166
return success();
186167
}
187168

188-
if (tilingConfig.getNumTilingLevels() != 4) {
189-
return op->emitOpError("expected four tiling levels, got ")
190-
<< tilingConfig.getNumTilingLevels();
169+
// Collects parallel loops.
170+
llvm::SmallDenseSet<unsigned> pLoopsSet;
171+
for (auto [index, iteratorType] :
172+
llvm::enumerate(interfaceOp.getLoopIteratorTypes())) {
173+
if (iteratorType == utils::IteratorType::parallel) {
174+
pLoopsSet.insert(index);
175+
}
191176
}
192177

193-
auto interfaceOp = dyn_cast_or_null<TilingInterface>(op);
194-
if (interfaceOp) {
195-
llvm::SmallDenseSet<unsigned> pLoopsSet;
196-
for (auto [index, iteratorType] :
197-
llvm::enumerate(interfaceOp.getLoopIteratorTypes())) {
198-
if (iteratorType == utils::IteratorType::parallel) {
199-
pLoopsSet.insert(index);
200-
}
178+
for (int i = 0, e = IREE::CPU::TilingLevel::MaxNumTileLevels; i < e; ++i) {
179+
if (!loweringConfig.hasTilingLevel(i)) {
180+
continue;
201181
}
202182

203-
SmallVector<int64_t> secondLevelTileSizes;
204-
std::tie(secondLevelTileSizes, std::ignore) =
205-
tilingConfig.getVectorCommonParallelSizes();
206-
for (auto [index, tileSize] : llvm::enumerate(secondLevelTileSizes)) {
207-
if (tileSize != 0 && !pLoopsSet.contains(index)) {
208-
return op->emitOpError(
209-
"expected only parallel dims to be set in the second tiling "
210-
"level, got ")
211-
<< index << "-th tile size set";
183+
auto level = static_cast<IREE::CPU::TilingLevel>(i);
184+
auto tilingLevelAttr = cast<IREE::Codegen::LoweringConfigTilingLevelAttr>(
185+
loweringConfig.getTilingLevelAttr(level));
186+
switch (level) {
187+
case IREE::CPU::TilingLevel::DistributionTiles:
188+
case IREE::CPU::TilingLevel::CacheParallelTiles:
189+
case IREE::CPU::TilingLevel::VectorCommonParallelTiles:
190+
case IREE::CPU::TilingLevel::VectorInnerParallelTiles: {
191+
for (auto [index, tileSize] :
192+
llvm::enumerate(tilingLevelAttr.getSizes())) {
193+
if (tileSize != 0 && !pLoopsSet.contains(index)) {
194+
return op->emitOpError(
195+
"expected only parallel dims to be set in the ")
196+
<< IREE::CPU::getTilingLevelName(level)
197+
<< " tiling level, but tile size at index (" << index
198+
<< ") was also set";
199+
}
212200
}
201+
break;
213202
}
214-
215-
SmallVector<int64_t> thirdLevelTileSizes;
216-
std::tie(thirdLevelTileSizes, std::ignore) =
217-
tilingConfig.getVectorReductionSizes();
218-
for (auto [index, tileSize] : llvm::enumerate(thirdLevelTileSizes)) {
219-
if (tileSize != 0 && pLoopsSet.contains(index)) {
220-
return op->emitOpError(
221-
"expected only reduction dims to be set in the third tiling "
222-
"level, got ")
223-
<< index << "-th tile size set";
203+
case IREE::CPU::TilingLevel::CacheReductionTiles:
204+
case IREE::CPU::TilingLevel::VectorReductionTiles: {
205+
for (auto [index, tileSize] :
206+
llvm::enumerate(tilingLevelAttr.getSizes())) {
207+
if (tileSize != 0 && pLoopsSet.contains(index)) {
208+
return op->emitOpError(
209+
"expected only reduction dims to be set in the ")
210+
<< IREE::CPU::getTilingLevelName(level)
211+
<< " tiling level, but tile size at index (" << index
212+
<< ") was also set";
213+
}
224214
}
215+
break;
225216
}
226-
}
217+
case IREE::CPU::TilingLevel::MaxNumTileLevels:
218+
case IREE::CPU::TilingLevel::InvalidLevel:
219+
break;
220+
};
227221

228-
// Verify interchange.
229-
for (int level = 0; level < tilingConfig.getNumTilingLevels(); level++) {
230-
IREE::Codegen::LoweringConfigTilingLevelAttr attr =
231-
tilingConfig.getTilingLevelAttr(level);
232-
ArrayRef<int64_t> interchange = attr.getInterchange();
233-
size_t expectedSize = attr.getSizes().size();
234-
if (!interchange.empty() &&
235-
!isValidInterchange(interchange, expectedSize)) {
222+
ArrayRef<int64_t> interchange = tilingLevelAttr.getInterchange();
223+
size_t expectedSize = tilingLevelAttr.getSizes().size();
224+
if (!isValidInterchange(interchange, expectedSize)) {
236225
return op->emitOpError("expected [0, ")
237-
<< expectedSize << ") to be set exactly once in interchange #"
238-
<< level;
226+
<< expectedSize << ") to be set exactly once in interchange for "
227+
<< IREE::CPU::getTilingLevelName(level) << " tiling level";
239228
}
240229
}
230+
241231
return success();
242232
}
243233

244234
LogicalResult verifyConvTileAndDecomposeExpertConfig(
245-
Operation *op, TilingConfig &tilingConfig,
246-
IREE::Codegen::TranslationInfoAttr translationInfo,
247-
ArrayRef<int64_t> workgroupSize) {
248-
if (!isa<linalg::ConvolutionOpInterface>(op))
249-
return success();
250-
251-
if (tilingConfig.getNumTilingLevels() == 6) {
252-
// TODO: update verification.
235+
Operation *op, IREE::CPU::LoweringConfigAttr loweringConfig) {
236+
if (!isa<linalg::ConvolutionOpInterface>(op)) {
253237
return success();
254238
}
255239

256-
if (tilingConfig.getNumTilingLevels() != 3) {
257-
return op->emitOpError("expected three tiling levels, got ")
258-
<< tilingConfig.getNumTilingLevels();
259-
}
240+
auto getTileSizeAtIndex = [](ArrayRef<int64_t> sizes,
241+
ArrayRef<bool> scalableFlags,
242+
unsigned index) -> std::pair<int64_t, bool> {
243+
return std::make_pair(sizes[index],
244+
index < scalableFlags.size() && scalableFlags[index]);
245+
};
260246

247+
SmallVector<IREE::CPU::TilingLevel> requiredLevels = {
248+
IREE::CPU::DistributionTiles, IREE::CPU::VectorCommonParallelTiles,
249+
IREE::CPU::VectorReductionTiles};
261250
linalg::LinalgOp linalgOp = cast<linalg::LinalgOp>(op);
262-
SmallVector<int64_t> shape = linalgOp.getStaticLoopRanges();
263-
for (auto sizes : tilingConfig.getTileSizes()) {
264-
for (auto [i, size] : llvm::enumerate(sizes)) {
265-
if (size == 1)
266-
shape[i] = 1;
267-
if (shape[i] == -1 || size == 0)
251+
SmallVector<int64_t> shapeAfterTiling = linalgOp.getStaticLoopRanges();
252+
for (auto level : requiredLevels) {
253+
if (!loweringConfig.hasTilingLevel(level)) {
254+
return op->emitOpError("expected ")
255+
<< IREE::CPU::getTilingLevelName(level) << " is set";
256+
}
257+
auto tilingLevelAttr = cast<IREE::Codegen::LoweringConfigTilingLevelAttr>(
258+
loweringConfig.getTilingLevelAttr(level));
259+
for (size_t i = 0, e = tilingLevelAttr.getSizes().size(); i < e; ++i) {
260+
auto [size, scalableFlag] = getTileSizeAtIndex(
261+
tilingLevelAttr.getSizes(), tilingLevelAttr.getScalableFlags(), i);
262+
if (scalableFlag) {
263+
shapeAfterTiling[i] = ShapedType::kDynamic;
264+
continue;
265+
}
266+
if (size == 1) {
267+
shapeAfterTiling[i] = 1;
268+
continue;
269+
}
270+
if (ShapedType::isDynamicShape(shapeAfterTiling[i]) ||
271+
ShapedType::isDynamic(size) || size == 0) {
268272
continue;
269-
if (shape[i] % size != 0) {
270-
shape[i] = -1;
273+
}
274+
if (shapeAfterTiling[i] % size != 0) {
275+
shapeAfterTiling[i] = ShapedType::kDynamic;
271276
} else {
272-
shape[i] = size;
277+
shapeAfterTiling[i] = size;
273278
}
274279
}
275280
}
@@ -281,27 +286,27 @@ LogicalResult verifyConvTileAndDecomposeExpertConfig(
281286
linalg::PoolingNhwcSumOp, linalg::PoolingNhwcMaxOp,
282287
linalg::PoolingNhwcMaxUnsignedOp, linalg::PoolingNhwcMinOp,
283288
linalg::PoolingNhwcMinUnsignedOp>([&](auto) {
284-
// Shape: N, OH, OW, OC, KH, KW, (IC)
285-
khSize = shape[4];
286-
kwSize = shape[5];
287-
ohSize = shape[1];
288-
owSize = shape[2];
289+
// shape: N, OH, OW, OC, KH, KW, (IC)
290+
khSize = shapeAfterTiling[4];
291+
kwSize = shapeAfterTiling[5];
292+
ohSize = shapeAfterTiling[1];
293+
owSize = shapeAfterTiling[2];
289294
return success();
290295
})
291296
.Case<linalg::Conv2DNchwFchwOp>([&](auto) {
292-
// Shape: N, OC, OH, OW, (IC), KH, KW
293-
khSize = shape[5];
294-
kwSize = shape[6];
295-
ohSize = shape[2];
296-
owSize = shape[3];
297+
// shape: N, OC, OH, OW, (IC), KH, KW
298+
khSize = shapeAfterTiling[5];
299+
kwSize = shapeAfterTiling[6];
300+
ohSize = shapeAfterTiling[2];
301+
owSize = shapeAfterTiling[3];
297302
return success();
298303
})
299304
.Case<linalg::PoolingNchwSumOp, linalg::PoolingNchwMaxOp>([&](auto) {
300-
// Shape: N, OC, OH, OW, KH, KW
301-
khSize = shape[4];
302-
kwSize = shape[5];
303-
ohSize = shape[2];
304-
owSize = shape[3];
305+
// shape: N, OC, OH, OW, KH, KW
306+
khSize = shapeAfterTiling[4];
307+
kwSize = shapeAfterTiling[5];
308+
ohSize = shapeAfterTiling[2];
309+
owSize = shapeAfterTiling[3];
305310
return success();
306311
})
307312
.Default([&](auto) { return failure(); });

compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -125,18 +125,15 @@ void addMultiTilingExpertPassPipeline(OpPassManager &funcPassManager,
125125
void addTensorToVectorsPassPipeline(OpPassManager &funcPassManager,
126126
bool lowerToVectors = true);
127127

128-
// Populates the passes needed to do tiling, decomposing, and vectorizing the
129-
// convolution ops.
128+
/// Verifies that the given `loweringConfig` can decompose convolution ops to
129+
/// lower dim ops. It requires {Distribution, VectorCommonParallel,
130+
/// VectorReduction} tiling levels.
130131
LogicalResult verifyConvTileAndDecomposeExpertConfig(
131-
Operation *op, TilingConfig &tilingConfig,
132-
IREE::Codegen::TranslationInfoAttr translationInfo,
133-
ArrayRef<int64_t> workgroupSize = {});
132+
Operation *op, IREE::CPU::LoweringConfigAttr loweringConfig);
134133

135-
/// Populates the passes needed to do two-level tile + vectorize of linalg ops.
136-
LogicalResult verifyDoubleTilingExpertPassPipelineConfig(
137-
Operation *op, TilingConfig &tilingConfig,
138-
IREE::Codegen::TranslationInfoAttr translationInfo,
139-
ArrayRef<int64_t> workgroupSize = {});
134+
/// Verifies if the tile sizes from `loweringConfig` are valid for each level.
135+
LogicalResult verifyMultiTilingExpertPassPipelineConfig(
136+
Operation *op, IREE::CPU::LoweringConfigAttr loweringConfig);
140137

141138
/// Populates the passes needed to multi level tile and lowering of linalg ops
142139
/// on tensors to vectors operations.

0 commit comments

Comments
 (0)