intel
diff --git a/‎include/gc/Transforms/Utils/VectorUtils.h‎
Lines changed: 86 additions & 0 deletions b/‎include/gc/Transforms/Utils/VectorUtils.h‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎lib/gc/Analysis/VectorBasedFusionAnalysis.cpp‎
Lines changed: 25 additions & 20 deletions b/‎lib/gc/Analysis/VectorBasedFusionAnalysis.cpp‎
Lines changed: 25 additions & 20 deletions
@@ -0,0 +1,86 @@
+//===-- VectorUtils.h ----- vector fusion analysis --------------*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GC_TRANSFORMS_UTILS_VECTORUTILS_H
+#define GC_TRANSFORMS_UTILS_VECTORUTILS_H
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/TypeUtilities.h"
+#include <limits>
+#include <stdint.h>
+#include <variant>
+
+namespace mlir {
+namespace gc {
+union Float32Bits {
+  uint32_t u;
+  float f;
+};
+uint16_t float2half(float floatValue);
+float half2float(uint16_t halfValue);
+uint16_t float2bfloat(float floatValue);
+float bfloat2float(uint16_t bfloatBits);
+std::variant<float, int64_t> numeric_limits_minimum(Type type);
+std::variant<float, int64_t> numericLimitsMaximum(Type type);
+
+template <typename T = float>
+T getInitValForReduce(vector::CombiningKind kind, Type t) {
+  T result;
+  Type t1 = getElementTypeOrSelf(t);
+
+  switch (kind) {
+  case vector::CombiningKind::ADD:
+    if (t1.isIntOrIndex())
+      result = 0;
+    else if (isa<FloatType>(t1))
+      result = 0.0f;
+    else
+      llvm_unreachable("invalid value types for ADD reduction");
+    break;
+  case vector::CombiningKind::MAXNUMF:
+  case vector::CombiningKind::MAXIMUMF:
+    if (not isa<FloatType>(t1))
+      llvm_unreachable("Expected float values.");
+    result = std::get<T>(numeric_limits_minimum(t));
+    break;
+  case vector::CombiningKind::MINNUMF:
+  case vector::CombiningKind::MINIMUMF:
+    if (not isa<FloatType>(t1))
+      llvm_unreachable("Expected float values.");
+    result = std::get<T>(numericLimitsMaximum(t));
+    break;
+  case vector::CombiningKind::MAXSI:
+  case vector::CombiningKind::MAXUI:
+    if (not t1.isIntOrIndex())
+      llvm_unreachable("Expected int or index values.");
+    result = std::get<T>(numeric_limits_minimum(t));
+    break;
+  case vector::CombiningKind::MINSI:
+  case vector::CombiningKind::MINUI:
+    if (not t1.isIntOrIndex())
+      llvm_unreachable("Expected int or index values.");
+    result = std::get<T>(numericLimitsMaximum(t));
+    break;
+  case vector::CombiningKind::MUL:
+    if (t1.isIntOrIndex())
+      result = 1;
+    else if (isa<FloatType>(t1))
+      result = 1.f;
+    else
+      llvm_unreachable("invalid value types for MUL reduction");
+    break;
+  default:
+    llvm_unreachable("unsupported reduction kind");
+  };
+  return result;
+}
+
+} // namespace gc
+} // namespace mlir
+
+#endif
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "gc/Analysis/VectorBasedFusionAnalysis.h"
-#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "gc/Dialect/Linalgx/Utils.h"
 
 namespace mlir {
 namespace gc {
@@ -22,16 +22,16 @@ namespace gc {
       arith::TruncFOp, arith::TruncIOp
 
 #define NOT_NEED_TO_PROCESS_OP                                                 \
-  linalg::GenericOp, linalg::BatchReduceMatmulOp, linalg::MatmulOp,            \
-      linalg::BatchMatmulOp, linalg::BatchMatmulTransposeAOp,                  \
-      linalg::BatchMatmulTransposeBOp, linalg::MatmulTransposeAOp,             \
-      linalg::MatmulTransposeBOp, linalg::QuantizedBatchMatmulOp,              \
-      linalg::QuantizedMatmulOp, tensor::CollapseShapeOp,                      \
-      tensor::ExpandShapeOp, tensor::ExtractSliceOp, tensor::InsertSliceOp,    \
-      microkernel::BrgemmOp
+  linalg::BatchReduceMatmulOp, linalg::MatmulOp, linalg::BatchMatmulOp,        \
+      linalg::BatchMatmulTransposeAOp, linalg::BatchMatmulTransposeBOp,        \
+      linalg::MatmulTransposeAOp, linalg::MatmulTransposeBOp,                  \
+      linalg::QuantizedBatchMatmulOp, linalg::QuantizedMatmulOp,               \
+      tensor::CollapseShapeOp, tensor::ExpandShapeOp, tensor::ExtractSliceOp,  \
+      tensor::InsertSliceOp, microkernel::BrgemmOp
 
 static inline bool isNotNeedToProcessOp(Operation *op) {
-  return isa<NOT_NEED_TO_PROCESS_OP>(op);
+  return isa<NOT_NEED_TO_PROCESS_OP>(op) or
+         linalgx::isAnyGenericPackedMatmulOp(op);
 }
 
 static inline bool isSpecialOp(Operation *op) {
@@ -72,7 +72,7 @@ void shapeCastSourceAxis(const ArrayRef<int64_t> &a, const ArrayRef<int64_t> &b,
     while (dimB < dimA && j < rankB)
       dimB *= b[j++];
     if (dimA != dimB) {
-      assert(false && " Invalid shape cast operation.");
+      llvm::llvm_unreachable_internal(" Invalid shape cast operation.");
       break;
     }
     if (bAxisBegin != j) {
@@ -87,12 +87,13 @@ void shapeCastSourceAxis(const ArrayRef<int64_t> &a, const ArrayRef<int64_t> &b,
     if (j < rankB && all_of(b.slice(j), isOne))
       j = rankB;
   }
-
-  assert(i == rankA && j == rankB && "Invalid shapecast operation.");
+  if (i != rankA or j != rankB)
+    llvm_unreachable("Invalid shapecast operation.");
 }
 
 bool isScalar(Type type) {
-  assert(type && "Not a valid type");
+  if (not type)
+    llvm_unreachable("Not a valid type");
   if (auto vecType = dyn_cast<VectorType>(type))
     return false;
   if (auto tensorType = dyn_cast<TensorType>(type))
@@ -107,8 +108,8 @@ void getSrcBroadcastDim(const ShapedType &input, const ShapedType &output,
   // following auto_broadcast semantics
   const size_t input_rank = inputShape.size();
   const size_t output_rank = outputShape.size();
-  assert(output_rank >= input_rank &&
-         "Incorrect input or output shape for broadcast op.");
+  if (output_rank < input_rank)
+    llvm_unreachable("Incorrect input or output shape for broadcast op.");
   const size_t offset = output_rank - input_rank;
   for (size_t i = 0; i < input_rank; ++i) {
     if (inputShape[i] == outputShape[i + offset] ||
@@ -390,13 +391,16 @@ mlir::FailureOr<VectorType> getOperationMaxVectorType(Operation *op) {
 
 /// select nearest even step
 int getNearestVectorStep(const int step) {
-  assert(step > 0);
+  if (step <= 0)
+    llvm_unreachable("Wrong step.");
+
   int nbits = 0, n = step;
   while (n) {
     n = n >> 1;
     nbits++;
   }
-  assert(nbits <= 6 || (nbits == 7 && step == 64));
+  if (nbits > 6 and !(nbits == 7 && step == 64))
+    llvm_unreachable("wrong nbits appear");
   return (1 << (nbits - 1)) == step ? step : (1 << nbits);
 }
 
@@ -488,7 +492,7 @@ VectorType TypeHelper::getVectorzedType(Operation *op, uint32_t loopStep) {
   // down into a loop.
   mlir::FailureOr<VectorType> baseType = getOperationVectorType(op);
   if (failed(baseType)) {
-    assert(0 && "Failed to get vector type for operation");
+    llvm_unreachable("Failed to get vector type for operation");
     return VectorType();
   }
   auto vectorizedType = baseType.value();
@@ -518,7 +522,7 @@ int TypeHelper::generateValidSteps(int steps, VectorType type) {
     return favx2bits / typebits;
 
   // invalid hardware
-  assert(false && "Invalid hardware.");
+  llvm_unreachable("Invalid hardware.");
   return -1;
 }
 
@@ -590,7 +594,8 @@ void GroupOperationFusion::updateGroupBigestVectorType(VectorType vectorType) {
 }
 
 void GroupOperationFusion::addOperationToGroup(Operation *op) {
-  assert(op);
+  if (not op)
+    llvm_unreachable("Op can't be NULL.");
   VectorType vectorType = getOperationMaxVectorType(op).value();
   if (isNeedNewGroup(op))
     opGroups.emplace_back(std::queue<Operation *>());