intel
diff --git a/‎include/gc/Analysis/VectorBasedFusionAnalysis.h‎
Lines changed: 7 additions & 0 deletions b/‎include/gc/Analysis/VectorBasedFusionAnalysis.h‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎include/gc/Transforms/Utils/VectorUtils.h‎
Lines changed: 39 additions & 0 deletions b/‎include/gc/Transforms/Utils/VectorUtils.h‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎lib/gc/Analysis/VectorBasedFusionAnalysis.cpp‎
Lines changed: 3 additions & 0 deletions b/‎lib/gc/Analysis/VectorBasedFusionAnalysis.cpp‎
Lines changed: 3 additions & 0 deletions
@@ -123,6 +123,8 @@ class GroupOperationFusion : public VectorFusionBase {
   // store read and write operations permutation maps in order to convenient
   // to replace loop induction var
   DenseMap<Operation *, AffineMap> opPermuationMap;
+  /// record operation operand original operate value
+  DenseMap<Value, Value> operandOriginalValue;
 
 public:
   GroupOperationFusion(func::FuncOp &func, HardWareInfo &info)
@@ -154,6 +156,7 @@ class GroupOperationFusion : public VectorFusionBase {
     this->getGroupOpResults() = fusion.getGroupOpResults();
     this->getGroupOpInitArgs() = fusion.getGroupOpInitArgs();
     this->getOpPermuationMap() = fusion.getOpPermuationMap();
+    this->getOperandOriginalValue() = fusion.getOperandOriginalValue();
     this->getFunction() = fusion.getFunction();
     this->getHardwareInfo() = fusion.getHardwareInfo();
     this->getTypeHelper() = fusion.getTypeHelper();
@@ -196,6 +199,10 @@ class GroupOperationFusion : public VectorFusionBase {
   DenseMap<Operation *, AffineMap> &getOpPermuationMap() noexcept {
     return opPermuationMap;
   }
+
+  DenseMap<Value, Value> &getOperandOriginalValue() noexcept {
+    return operandOriginalValue;
+  }
   /// set operation groups
   void setGroupOpResults(
       const SmallVector<
 
@@ -8,16 +8,55 @@
 
 #ifndef GC_TRANSFORMS_UTILS_VECTORUTILS_H
 #define GC_TRANSFORMS_UTILS_VECTORUTILS_H
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/TypeUtilities.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Support/Debug.h"
 #include <limits>
 #include <stdint.h>
 #include <variant>
 
 namespace mlir {
 namespace gc {
+/// Need to move some operations like extract_slice or insert_slice.
+/// Because those operation may interpret our analysis result. e.g.:
+/// ```
+/// clang-format off
+/// %21 = vector.transfer_read %18[%c0, %c0], %cst {in_bounds = [true, true]} :
+/// tensor<16x16xf32>, vector<16x16xf32> %22 = arith.addf %21, %20 :
+/// vector<16x16xf32> %23 = vector.transfer_write %22, %extracted_slice_12[%c0,
+/// %c0] {in_bounds = [true, true]} : vector<16x16xf32>, tensor<16x16xf32>
+/// %inserted_slice_13 = tensor.insert_slice %18 into %arg14[%arg13, 0] [16, 16]
+/// [1, 1] : tensor<16x16xf32> into tensor<32x16xf32> %extracted_slice_14 =
+/// tensor.extract_slice %arg16[%arg13, 0] [16, 16] [1, 1] : tensor<32x16xf32>
+/// to tensor<16x16xf32> %24 = vector.transfer_read %cst_0[%c0, %c0], %cst
+/// {in_bounds = [true, true]} : tensor<16x16xf32>, vector<16x16xf32> %25 =
+/// arith.maximumf %22, %24 : vector<16x16xf32> %26 = vector.transfer_write %25,
+/// %extracted_slice_14[%c0, %c0] {in_bounds = [true, true]} :
+/// vector<16x16xf32>, tensor<16x16xf32> %inserted_slice_15 =
+/// tensor.insert_slice %23 into %arg15[%arg13, 0] [16, 16] [1, 1] :
+/// tensor<16x16xf32> into tensor<32x16xf32> %inserted_slice_16 =
+/// tensor.insert_slice %26 into %arg16[%arg13, 0] [16, 16] [1, 1] :
+/// tensor<16x16xf32> into tensor<32x16xf32> clang-format on
+/// ```
+/// The maximumf and addf operation can be a same group, but the extract_slice
+/// operation interpret us.
+/// The move operation(extra_slice) will check its parameters. In order to
+/// ensure that it does not affect the correctness of the result, we will only
+/// move the moved op after the op to which the parameters belong to. If it's
+/// operand is all the block argument, we will move it to the begining of the
+/// block.
+/// insert_slice just move them to the privious of the first operation which
+/// use it.
+void moveSomeInterferenceOperation(
+    func::FuncOp *func, MLIRContext *ctx,
+    std::function<bool(Operation *)> &conditionalFunc);
+
 /// build a constant operation of index type
 Value makeIndexArithConstantOp(OpBuilder &opBuilder, const Location &loc,
                                int64_t x);
 
@@ -375,6 +375,9 @@ VectorType TypeHelper::getVectorzedType(Operation *op, uint32_t loopStep) {
 }
 
 int TypeHelper::generateValidSteps(int steps, VectorType type) {
+  // TODO: support odd shape using mask load store
+  if (type.getShape().back() & 1)
+    return 1;
   if (type.getShape().back() >= steps)
     return steps;
   int evenStep = getNearestVectorStep(type.getShape().back());
Original file line number	Diff line number	Diff line change
`@@ -375,6 +375,9 @@ VectorType TypeHelper::getVectorzedType(Operation *op, uint32_t loopStep) {`
`375`	`375`	`}`
`376`	`376`
`377`	`377`	`int TypeHelper::generateValidSteps(int steps, VectorType type) {`
	`378`	`+ // TODO: support odd shape using mask load store`
	`379`	`+ if (type.getShape().back() & 1)`
	`380`	`+ return 1;`
`378`	`381`	`if (type.getShape().back() >= steps)`
`379`	`382`	`return steps;`
`380`	`383`	`int evenStep = getNearestVectorStep(type.getShape().back());`