Skip to content

Commit f96c544

Browse files
author
Xu, Xiaohui1
committed
temporaty save
1 parent 1a2a9a1 commit f96c544

File tree

7 files changed

+318
-187
lines changed

7 files changed

+318
-187
lines changed

include/gc/Analysis/VectorBasedFusionAnalysis.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ class GroupOperationFusion : public VectorFusionBase {
123123
// store read and write operations permutation maps in order to convenient
124124
// to replace loop induction var
125125
DenseMap<Operation *, AffineMap> opPermuationMap;
126+
/// record operation operand original operate value
127+
DenseMap<Value, Value> operandOriginalValue;
126128

127129
public:
128130
GroupOperationFusion(func::FuncOp &func, HardWareInfo &info)
@@ -154,6 +156,7 @@ class GroupOperationFusion : public VectorFusionBase {
154156
this->getGroupOpResults() = fusion.getGroupOpResults();
155157
this->getGroupOpInitArgs() = fusion.getGroupOpInitArgs();
156158
this->getOpPermuationMap() = fusion.getOpPermuationMap();
159+
this->getOperandOriginalValue() = fusion.getOperandOriginalValue();
157160
this->getFunction() = fusion.getFunction();
158161
this->getHardwareInfo() = fusion.getHardwareInfo();
159162
this->getTypeHelper() = fusion.getTypeHelper();
@@ -196,6 +199,10 @@ class GroupOperationFusion : public VectorFusionBase {
196199
DenseMap<Operation *, AffineMap> &getOpPermuationMap() noexcept {
197200
return opPermuationMap;
198201
}
202+
203+
DenseMap<Value, Value> &getOperandOriginalValue() noexcept {
204+
return operandOriginalValue;
205+
}
199206
/// set operation groups
200207
void setGroupOpResults(
201208
const SmallVector<

include/gc/Transforms/Utils/VectorUtils.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,55 @@
88

99
#ifndef GC_TRANSFORMS_UTILS_VECTORUTILS_H
1010
#define GC_TRANSFORMS_UTILS_VECTORUTILS_H
11+
#include "mlir/Dialect/Affine/IR/AffineOps.h"
12+
#include "mlir/Dialect/Func/IR/FuncOps.h"
13+
#include "mlir/Dialect/Tensor/IR/Tensor.h"
1114
#include "mlir/Dialect/Vector/IR/VectorOps.h"
1215
#include "mlir/IR/BuiltinTypes.h"
1316
#include "mlir/IR/TypeUtilities.h"
17+
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
1418
#include "llvm/ADT/TypeSwitch.h"
19+
#include "llvm/Support/Debug.h"
1520
#include <limits>
1621
#include <stdint.h>
1722
#include <variant>
1823

1924
namespace mlir {
2025
namespace gc {
26+
/// Need to move some operations like extract_slice or insert_slice.
27+
/// Because those operation may interpret our analysis result. e.g.:
28+
/// ```
29+
/// clang-format off
30+
/// %21 = vector.transfer_read %18[%c0, %c0], %cst {in_bounds = [true, true]} :
31+
/// tensor<16x16xf32>, vector<16x16xf32> %22 = arith.addf %21, %20 :
32+
/// vector<16x16xf32> %23 = vector.transfer_write %22, %extracted_slice_12[%c0,
33+
/// %c0] {in_bounds = [true, true]} : vector<16x16xf32>, tensor<16x16xf32>
34+
/// %inserted_slice_13 = tensor.insert_slice %18 into %arg14[%arg13, 0] [16, 16]
35+
/// [1, 1] : tensor<16x16xf32> into tensor<32x16xf32> %extracted_slice_14 =
36+
/// tensor.extract_slice %arg16[%arg13, 0] [16, 16] [1, 1] : tensor<32x16xf32>
37+
/// to tensor<16x16xf32> %24 = vector.transfer_read %cst_0[%c0, %c0], %cst
38+
/// {in_bounds = [true, true]} : tensor<16x16xf32>, vector<16x16xf32> %25 =
39+
/// arith.maximumf %22, %24 : vector<16x16xf32> %26 = vector.transfer_write %25,
40+
/// %extracted_slice_14[%c0, %c0] {in_bounds = [true, true]} :
41+
/// vector<16x16xf32>, tensor<16x16xf32> %inserted_slice_15 =
42+
/// tensor.insert_slice %23 into %arg15[%arg13, 0] [16, 16] [1, 1] :
43+
/// tensor<16x16xf32> into tensor<32x16xf32> %inserted_slice_16 =
44+
/// tensor.insert_slice %26 into %arg16[%arg13, 0] [16, 16] [1, 1] :
45+
/// tensor<16x16xf32> into tensor<32x16xf32> clang-format on
46+
/// ```
47+
/// The maximumf and addf operation can be a same group, but the extract_slice
48+
/// operation interpret us.
49+
/// The move operation(extra_slice) will check its parameters. In order to
50+
/// ensure that it does not affect the correctness of the result, we will only
51+
/// move the moved op after the op to which the parameters belong to. If it's
52+
/// operand is all the block argument, we will move it to the begining of the
53+
/// block.
54+
/// insert_slice just move them to the privious of the first operation which
55+
/// use it.
56+
void moveSomeInterferenceOperation(
57+
func::FuncOp *func, MLIRContext *ctx,
58+
std::function<bool(Operation *)> &conditionalFunc);
59+
2160
/// build a constant operation of index type
2261
Value makeIndexArithConstantOp(OpBuilder &opBuilder, const Location &loc,
2362
int64_t x);

lib/gc/Analysis/VectorBasedFusionAnalysis.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,9 @@ VectorType TypeHelper::getVectorzedType(Operation *op, uint32_t loopStep) {
375375
}
376376

377377
int TypeHelper::generateValidSteps(int steps, VectorType type) {
378+
// TODO: support odd shape using mask load store
379+
if (type.getShape().back() & 1)
380+
return 1;
378381
if (type.getShape().back() >= steps)
379382
return steps;
380383
int evenStep = getNearestVectorStep(type.getShape().back());

0 commit comments

Comments
 (0)