diff --git a/src/Conversion/ONNXToTOSA/DialectBuilder.cpp b/src/Conversion/ONNXToTOSA/DialectBuilder.cpp
index 7bcf73fd7f..b64cc0b752 100644
--- a/src/Conversion/ONNXToTOSA/DialectBuilder.cpp
+++ b/src/Conversion/ONNXToTOSA/DialectBuilder.cpp
@@ -204,7 +204,13 @@ Value TosaBuilder::reshape(Value value, llvm::ArrayRef<int64_t> shape) {
       rewriter(), loc(), newValueType, value, shapeAttr);
 }
 
-Value TosaBuilder::mul(Value &lhs, Value &rhs, int32_t shift) {
+Value TosaBuilder::mul(Value &lhs, Value &rhs, int8_t shift) {
+  Value shiftConst =
+      tosa::createMulShiftConst(rewriter(), loc(), /*shift=*/shift);
+  return mul(lhs, rhs, shiftConst);
+}
+
+Value TosaBuilder::mul(Value &lhs, Value &rhs, Value shift) {
   if (needsRankBroadcast({lhs, rhs})) {
     llvm::SmallVector<Value, 4> valueVec = equalizeRanks({lhs, rhs});
     lhs = valueVec[0];
@@ -217,6 +223,7 @@ Value TosaBuilder::mul(Value &lhs, Value &rhs, int32_t shift) {
           : RankedTensorType::get(llvm::SmallVector<int64_t, 4>(
                                       lhsType.getRank(), ShapedType::kDynamic),
                 lhsType.getElementType());
+
   return tosa::CreateOpAndInfer<mlir::tosa::MulOp>(
       rewriter(), loc(), newValueType, lhs, rhs, shift);
 }
diff --git a/src/Conversion/ONNXToTOSA/DialectBuilder.hpp b/src/Conversion/ONNXToTOSA/DialectBuilder.hpp
index bb3e9b438b..7788bdb347 100644
--- a/src/Conversion/ONNXToTOSA/DialectBuilder.hpp
+++ b/src/Conversion/ONNXToTOSA/DialectBuilder.hpp
@@ -43,7 +43,8 @@ struct TosaBuilder : DialectBuilder {
       int32_t axis);
   template <typename T>
   mlir::Value binaryOp(mlir::Value &lhs, mlir::Value &rhs);
-  mlir::Value mul(mlir::Value &lhs, mlir::Value &rhs, int32_t shift = 0);
+  mlir::Value mul(mlir::Value &lhs, mlir::Value &rhs, int8_t shift = 0);
+  mlir::Value mul(mlir::Value &lhs, mlir::Value &rhs, mlir::Value shift);
   mlir::Value intdiv(mlir::Value &lhs, mlir::Value &rhs);
 
   mlir::Value transpose(mlir::Value &value, llvm::ArrayRef<int32_t> perm);
diff --git a/src/Conversion/ONNXToTOSA/NN/DequantizeLinear.cpp b/src/Conversion/ONNXToTOSA/NN/DequantizeLinear.cpp
index b0cabf450e..24332ebd19 100644
--- a/src/Conversion/ONNXToTOSA/NN/DequantizeLinear.cpp
+++ b/src/Conversion/ONNXToTOSA/NN/DequantizeLinear.cpp
@@ -95,8 +95,9 @@ class ONNXDequantizeLinearOpLoweringToTOSA
         rewriter, loc, adaptor.getXScale(), axis, resultType.getRank());
     Value scaleFactorCast =
         tosaBuilder.castToNewTensorElementType(scaleFactorConst, arithType);
+    Value shiftConst = tosa::createMulShiftConst(rewriter, loc, 0);
     Value mulOp = tosa::CreateOpAndInfer<mlir::tosa::MulOp>(
-        rewriter, loc, casted.getType(), casted, scaleFactorCast, 0)
+        rewriter, loc, casted.getType(), casted, scaleFactorCast, shiftConst)
                       .getResult();
     Value castOp = tosaBuilder.castToNewTensorElementType(
         mulOp, resultType.getElementType());
diff --git a/src/Conversion/ONNXToTOSA/NN/QuantizeLinear.cpp b/src/Conversion/ONNXToTOSA/NN/QuantizeLinear.cpp
index 38277d7a4a..6ceebe74c8 100644
--- a/src/Conversion/ONNXToTOSA/NN/QuantizeLinear.cpp
+++ b/src/Conversion/ONNXToTOSA/NN/QuantizeLinear.cpp
@@ -91,8 +91,9 @@ class ONNXQuantizeLinearOpLoweringToTOSA
     Value recOp = tosa::CreateOpAndInfer<mlir::tosa::ReciprocalOp>(rewriter,
         loc, expandedScaleFactorConst.getType(), expandedScaleFactorConst)
                       .getResult();
+    Value shiftConst = tosa::createMulShiftConst(rewriter, loc, 0);
     Value scaledResult = tosa::CreateOpAndInfer<mlir::tosa::MulOp>(
-        rewriter, loc, xType, x, recOp, 0)
+        rewriter, loc, xType, x, recOp, shiftConst)
                              .getResult();
 
     // Quantization to i4/i8/16/ is particular since the intermediate result of
diff --git a/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.cpp b/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.cpp
index e5dc056cbd..5eeb23ba31 100644
--- a/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.cpp
+++ b/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.cpp
@@ -149,5 +149,14 @@ mlir::Value expandShape(mlir::PatternRewriter &rewriter, mlir::Location loc,
       loc, resultTy, tensor, newShape);
 }
 
+mlir::Value createMulShiftConst(
+    mlir::PatternRewriter &rewriter, mlir::Location loc, int32_t shift) {
+  assert(shift >= -128 && shift <= 127 && "TOSA shift must fit in i8");
+  auto shiftType = RankedTensorType::get({1}, rewriter.getI8Type());
+  auto shiftAttr = DenseElementsAttr::get<int8_t>(
+      shiftType, llvm::ArrayRef<int8_t>{static_cast<int8_t>(shift)});
+  return rewriter.create<mlir::tosa::ConstOp>(loc, shiftType, shiftAttr);
+}
+
 } // namespace tosa
 } // namespace onnx_mlir
diff --git a/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.hpp b/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.hpp
index 2dff124e15..03ce652350 100644
--- a/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.hpp
+++ b/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.hpp
@@ -60,6 +60,9 @@ mlir::ElementsAttr getElementsAttrFromConst(mlir::Value &val);
 mlir::Value expandShape(mlir::PatternRewriter &rewriter, mlir::Location loc,
     mlir::Value tensor, size_t axis, size_t rank);
 
+mlir::Value createMulShiftConst(
+    mlir::PatternRewriter &rewriter, mlir::Location loc, int32_t shift);
+
 // Creates a TOSA operation and performs shape inference on the individual
 // op. This allows shape inference during the framework to TOSA lowering.
 template <typename TosaOp, typename... Args>
diff --git a/test/mlir/conversion/onnx_to_tosa/Math/Elementwise.mlir b/test/mlir/conversion/onnx_to_tosa/Math/Elementwise.mlir
index ba21cb0e4e..beb11ea954 100644
--- a/test/mlir/conversion/onnx_to_tosa/Math/Elementwise.mlir
+++ b/test/mlir/conversion/onnx_to_tosa/Math/Elementwise.mlir
@@ -239,7 +239,8 @@ func.func @test_mul(%arg0: tensor<13x21x1xf32>, %arg1: tensor<13x21x1xf32>) -> t
   "func.return"(%0) : (tensor<13x21x1xf32>) -> ()
 // CHECK-LABEL:  func @test_mul
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<13x21x1xf32>, [[PARAM_1_:%.+]]: tensor<13x21x1xf32>) -> tensor<13x21x1xf32> {
-// CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.mul [[PARAM_0_]], [[PARAM_1_]] {shift = 0 : i8} : (tensor<13x21x1xf32>, tensor<13x21x1xf32>) -> tensor<13x21x1xf32>
+// CHECK-NEXT:      [[SHIFT_0_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.mul [[PARAM_0_]], [[PARAM_1_]], [[SHIFT_0_]] : (tensor<13x21x1xf32>, tensor<13x21x1xf32>, tensor<1xi8>) -> tensor<13x21x1xf32>
 }
 
 // -----
@@ -249,7 +250,8 @@ func.func @test_mul_dynamic(%arg0: tensor<?x?x?xf32>, %arg1: tensor<13x?x?xf32>)
   "func.return"(%0) : (tensor<13x?x?xf32>) -> ()
 // CHECK-LABEL:  func @test_mul_dynamic
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?x?x?xf32>, [[PARAM_1_:%.+]]: tensor<13x?x?xf32>) -> tensor<13x?x?xf32> {
-// CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.mul [[PARAM_0_]], [[PARAM_1_]] {shift = 0 : i8} : (tensor<?x?x?xf32>, tensor<13x?x?xf32>) -> tensor<13x?x?xf32>
+// CHECK-NEXT:      [[SHIFT_1_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.mul [[PARAM_0_]], [[PARAM_1_]], [[SHIFT_1_]] : (tensor<?x?x?xf32>, tensor<13x?x?xf32>, tensor<1xi8>) -> tensor<13x?x?xf32>
 }
 
 // -----
@@ -260,7 +262,8 @@ func.func @test_mul_rank_broadcast(%arg0: tensor<13x21x1xf32>, %arg1: tensor<21x
 // CHECK-LABEL:  func @test_mul_rank_broadcast
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<13x21x1xf32>, [[PARAM_1_:%.+]]: tensor<21x1xf32>) -> tensor<13x21x1xf32> {
 // CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.reshape [[PARAM_1_]] {new_shape = array<i64: 1, 21, 1>} : (tensor<21x1xf32>) -> tensor<1x21x1xf32>
-// CHECK-NEXT:      [[VAR_1_:%.+]] = tosa.mul [[PARAM_0_]], [[VAR_0_]] {shift = 0 : i8} : (tensor<13x21x1xf32>, tensor<1x21x1xf32>) -> tensor<13x21x1xf32>
+// CHECK-NEXT:      [[SHIFT_2_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK-NEXT:      [[VAR_1_:%.+]] = tosa.mul [[PARAM_0_]], [[VAR_0_]], [[SHIFT_2_]] : (tensor<13x21x1xf32>, tensor<1x21x1xf32>, tensor<1xi8>) -> tensor<13x21x1xf32>
 }
 
 // -----
@@ -271,7 +274,8 @@ func.func @test_mul_rank_broadcast2(%arg0: tensor<21x1xf32>, %arg1: tensor<13x21
 // CHECK-LABEL:  func @test_mul_rank_broadcast2
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<21x1xf32>, [[PARAM_1_:%.+]]: tensor<13x21x1xf32>) -> tensor<13x21x1xf32> {
 // CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.reshape [[PARAM_0_]] {new_shape = array<i64: 1, 21, 1>} : (tensor<21x1xf32>) -> tensor<1x21x1xf32>
-// CHECK-NEXT:      [[VAR_1_:%.+]] = tosa.mul [[VAR_0_]], [[PARAM_1_]] {shift = 0 : i8} : (tensor<1x21x1xf32>, tensor<13x21x1xf32>) -> tensor<13x21x1xf32>
+// CHECK-NEXT:      [[SHIFT_3_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK-NEXT:      [[VAR_1_:%.+]] = tosa.mul [[VAR_0_]], [[PARAM_1_]], [[SHIFT_3_]] : (tensor<1x21x1xf32>, tensor<13x21x1xf32>, tensor<1xi8>) -> tensor<13x21x1xf32>
 }
 
 // -----
@@ -302,7 +306,7 @@ func.func @test_div_dynamic_float(%arg0: tensor<?x?x?xf32>, %arg1: tensor<13x?x?
 // CHECK-LABEL:  func.func @test_div_dynamic_float
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?x?x?xf32>, [[PARAM_1_:%.+]]: tensor<13x?x?xf32>) -> tensor<13x?x?xf32> {
 // CHECK:           [[VAR_0_:%.+]] = tosa.reciprocal [[PARAM_1_]] : (tensor<13x?x?xf32>) -> tensor<13x?x?xf32>
-// CHECK:           [[VAR_1_:%.+]] = tosa.mul [[PARAM_0_]], [[VAR_0_]] {shift = 0 : i8} : (tensor<?x?x?xf32>, tensor<13x?x?xf32>) -> tensor<13x?x?xf32>
+// CHECK:           [[VAR_1_:%.+]] = tosa.mul [[PARAM_0_]], [[VAR_0_]], {{.*}}: (tensor<?x?x?xf32>, tensor<13x?x?xf32>, tensor<1xi8>) -> tensor<13x?x?xf32>
 // CHECK:           return [[VAR_1_]] : tensor<13x?x?xf32>
 // CHECK:         }
 }
@@ -336,29 +340,30 @@ func.func @test_div_decomposed(%arg0: tensor<13x21x1xf32>, %arg1: tensor<13x21x1
 // CHECK-LABEL:  func @test_div_decomposed
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<13x21x1xf32>, [[PARAM_1_:%.+]]: tensor<13x21x1xf32>) -> tensor<13x21x1xf32> {
 // CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.reciprocal [[PARAM_1_]] : (tensor<13x21x1xf32>) -> tensor<13x21x1xf32>
-// CHECK-NEXT:      [[VAR_1_:%.+]] = tosa.mul [[PARAM_0_]], [[VAR_0_]] {shift = 0 : i8} : (tensor<13x21x1xf32>, tensor<13x21x1xf32>) -> tensor<13x21x1xf32>
+// CHECK-NEXT:      [[SHIFT_4_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK-NEXT:      [[VAR_1_:%.+]] = tosa.mul [[PARAM_0_]], [[VAR_0_]], [[SHIFT_4_]] : (tensor<13x21x1xf32>, tensor<13x21x1xf32>, tensor<1xi8>) -> tensor<13x21x1xf32>
 }
 
 // -----
 
 func.func @test_leaky_relu(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
-  %0 = "onnx.LeakyRelu"(%arg0) {alpha = 0.707330704  : f32} : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
+  %0 = "onnx.LeakyRelu"(%arg0) {alpha = 0.707330704 : f32} : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
   func.return %0 : tensor<13x21x3xf32>
 // CHECK-LABEL: test_leaky_relu
 // CHECK-DAG: %[[VAR0:.*]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x1xf32>}>
 // CHECK-DAG: %[[VAR1:.*]] = "tosa.const"() <{value = dense<0.707330704> : tensor<1x1x1xf32>}>
-// CHECK-DAG: %[[VAR2:.*]] = tosa.mul %arg0, %[[VAR1]] {shift = 0 : i8}
+// CHECK-DAG: %[[VAR2:.*]] = tosa.mul %arg0, %[[VAR1]]
 // CHECK-DAG: %[[VAR3:.*]] = tosa.greater_equal %arg0, %[[VAR0]]
 // CHECK: %[[VAR6:.*]] = tosa.select %[[VAR3]], %arg0, %[[VAR2]]
 }
 
 func.func @test_leaky_relu_bf16(%arg0: tensor<13x21x3xbf16>) -> tensor<13x21x3xbf16> {
-  %0 = "onnx.LeakyRelu"(%arg0) {alpha = 0.707330704  : f32} : (tensor<13x21x3xbf16>) -> tensor<13x21x3xbf16>
+  %0 = "onnx.LeakyRelu"(%arg0) {alpha = 0.707330704 : f32} : (tensor<13x21x3xbf16>) -> tensor<13x21x3xbf16>
   func.return %0 : tensor<13x21x3xbf16>
 // CHECK-LABEL: test_leaky_relu_bf16
 // CHECK-DAG: %[[VAR0:.*]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x1xbf16>}>
 // CHECK-DAG: %[[VAR1:.*]] = "tosa.const"() <{value = dense<7.070310e-01> : tensor<1x1x1xbf16>}>
-// CHECK-DAG: %[[VAR2:.*]] = tosa.mul %arg0, %[[VAR1]] {shift = 0 : i8}
+// CHECK-DAG: %[[VAR2:.*]] = tosa.mul %arg0, %[[VAR1]]
 // CHECK-DAG: %[[VAR3:.*]] = tosa.greater_equal %arg0, %[[VAR0]]
 // CHECK: %[[VAR6:.*]] = tosa.select %[[VAR3]], %arg0, %[[VAR2]]
 }
@@ -370,7 +375,7 @@ func.func @test_prelu(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x21x3xf32>) ->
   func.return %0 : tensor<13x21x3xf32>
 // CHECK-LABEL: test_prelu
 // CHECK-DAG:       [[VAR_0_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x1xf32>}>
-// CHECK-DAG:       [[VAR_1_:%.+]] = tosa.mul %arg0, %arg1 {shift = 0 : i8}
+// CHECK-DAG:       [[VAR_1_:%.+]] = tosa.mul %arg0, %arg1
 // CHECK:           [[VAR_2_:%.+]] = tosa.greater_equal %arg0, [[VAR_0_]]
 // CHECK:           [[VAR_3_:%.+]] = tosa.select [[VAR_2_]], %arg0, [[VAR_1_]]
 }
@@ -380,7 +385,7 @@ func.func @test_prelu_bf16(%arg0: tensor<13x21x3xbf16>, %arg1: tensor<13x21x3xbf
   func.return %0 : tensor<13x21x3xbf16>
 // CHECK-LABEL: test_prelu_bf16
 // CHECK-DAG:       [[VAR_0_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x1xbf16>}> : () -> tensor<1x1x1xbf16>
-// CHECK-DAG:       [[VAR_1_:%.+]] = tosa.mul %arg0, %arg1 {shift = 0 : i8}
+// CHECK-DAG:       [[VAR_1_:%.+]] = tosa.mul %arg0, %arg1
 // CHECK:           [[VAR_2_:%.+]] = tosa.greater_equal %arg0, [[VAR_0_]]
 // CHECK:           [[VAR_3_:%.+]] = tosa.select [[VAR_2_]], %arg0, [[VAR_1_]]
 }
@@ -395,11 +400,11 @@ func.func @test_selu_default_value(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3
 // CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<1.050700e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = tosa.exp %arg0
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_0_]] {shift = 0 : i8}
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_0_]]
 // CHECK-DAG:       [[VAR_5_:%.+]] = tosa.sub [[VAR_4_]], [[VAR_0_]]
 // CHECK-DAG:       [[VAR_6_:%.+]] = tosa.greater %arg0, [[VAR_2_]]
 // CHECK:           [[VAR_7_:%.+]] = tosa.select [[VAR_6_]], %arg0, [[VAR_5_]]
-// CHECK:           [[VAR_8_:%.+]] = tosa.mul [[VAR_7_]], [[VAR_1_]] {shift = 0 : i8}
+// CHECK:           [[VAR_8_:%.+]] = tosa.mul [[VAR_7_]], [[VAR_1_]]
 // CHECK:           return [[VAR_8_]] : tensor<13x21x3xf32>
 }
 
@@ -411,11 +416,11 @@ func.func @test_selu(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
 // CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<2.000000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = tosa.exp %arg0 : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_0_]] {shift = 0 : i8}
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_0_]]
 // CHECK-DAG:       [[VAR_5_:%.+]] = tosa.sub [[VAR_4_]], [[VAR_0_]]
 // CHECK-DAG:       [[VAR_6_:%.+]] = tosa.greater %arg0, [[VAR_2_]]
 // CHECK:           [[VAR_7_:%.+]] = tosa.select [[VAR_6_]], %arg0, [[VAR_5_]]
-// CHECK:           [[VAR_8_:%.+]] = tosa.mul [[VAR_7_]], [[VAR_1_]] {shift = 0 : i8}
+// CHECK:           [[VAR_8_:%.+]] = tosa.mul [[VAR_7_]], [[VAR_1_]]
 // CHECK:           return [[VAR_8_]] : tensor<13x21x3xf32>
 }
 
@@ -442,11 +447,11 @@ func.func @test_selu_dynamic(%arg0: tensor<?x4x?xf32>) -> tensor<?x4x?xf32> {
 // CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<2.000000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = tosa.exp [[PARAM_0_]] : (tensor<?x4x?xf32>) -> tensor<?x4x?xf32>
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_0_]] {shift = 0 : i8} : (tensor<?x4x?xf32>, tensor<1x1x1xf32>) -> tensor<?x4x?xf32>
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_0_]], {{.*}}: (tensor<?x4x?xf32>, tensor<1x1x1xf32>, tensor<1xi8>) -> tensor<?x4x?xf32>
 // CHECK-DAG:       [[VAR_5_:%.+]] = tosa.sub [[VAR_4_]], [[VAR_0_]] : (tensor<?x4x?xf32>, tensor<1x1x1xf32>) -> tensor<?x4x?xf32>
 // CHECK-DAG:       [[VAR_6_:%.+]] = tosa.greater [[PARAM_0_]], [[VAR_2_]] : (tensor<?x4x?xf32>, tensor<1x1x1xf32>) -> tensor<?x4x?xi1>
 // CHECK:           [[VAR_7_:%.+]] = tosa.select [[VAR_6_]], [[PARAM_0_]], [[VAR_5_]] : (tensor<?x4x?xi1>, tensor<?x4x?xf32>, tensor<?x4x?xf32>) -> tensor<?x4x?xf32>
-// CHECK:           [[VAR_8_:%.+]] = tosa.mul [[VAR_7_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<?x4x?xf32>, tensor<1x1x1xf32>) -> tensor<?x4x?xf32>
+// CHECK:           [[VAR_8_:%.+]] = tosa.mul [[VAR_7_]], [[VAR_1_]], {{.*}}: (tensor<?x4x?xf32>, tensor<1x1x1xf32>, tensor<1xi8>) -> tensor<?x4x?xf32>
 // CHECK:           return [[VAR_8_]] : tensor<?x4x?xf32>
 }
 
@@ -697,7 +702,8 @@ func.func @test_div_decomposed_broadcast(%arg0: tensor<13x21x1xf32>, %arg1: tens
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<13x21x1xf32>, [[PARAM_1_:%.+]]: tensor<1xf32>) -> tensor<13x21x1xf32> {
 // CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.reciprocal [[PARAM_1_]] : (tensor<1xf32>) -> tensor<1xf32>
 // CHECK-NEXT:      [[VAR_1_:%.+]] = tosa.reshape [[VAR_0_]] {new_shape = array<i64: 1, 1, 1>} : (tensor<1xf32>) -> tensor<1x1x1xf32>
-// CHECK-NEXT:      [[VAR_2_:%.+]] = tosa.mul [[PARAM_0_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<13x21x1xf32>, tensor<1x1x1xf32>) -> tensor<13x21x1xf32>
+// CHECK-NEXT:      [[SHIFT_5_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK-NEXT:      [[VAR_2_:%.+]] = tosa.mul [[PARAM_0_]], [[VAR_1_]], [[SHIFT_5_]] : (tensor<13x21x1xf32>, tensor<1x1x1xf32>, tensor<1xi8>) -> tensor<13x21x1xf32>
 }
 
 // -----
@@ -846,7 +852,7 @@ func.func @test_hardsigmoid_default_values_f32(%arg0: tensor<3xf32>) -> tensor<3
 // CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<2.000000e-01> : tensor<1xf32>}> : () -> tensor<1xf32>
 // CHECK:           [[VAR_2_:%.+]] = tosa.add [[PARAM_0_]], [[VAR_0_]] : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xf32>
 // CHECK:           [[VAR_3_:%.+]] = tosa.clamp [[VAR_2_]] {max_fp = 5.000000e+00 : f32, max_int = 5 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<3xf32>) -> tensor<3xf32>
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xf32>
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]], {{.*}}: (tensor<3xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<3xf32>
 // CHECK:           return [[VAR_4_]] : tensor<3xf32>
 }
 
@@ -859,7 +865,7 @@ func.func @test_hardsigmoid_default_values_f16(%arg0: tensor<3xf16>) -> tensor<3
 // CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<1.999510e-01> : tensor<1xf16>}> : () -> tensor<1xf16>
 // CHECK:           [[VAR_2_:%.+]] = tosa.add [[PARAM_0_]], [[VAR_0_]] : (tensor<3xf16>, tensor<1xf16>) -> tensor<3xf16>
 // CHECK:           [[VAR_3_:%.+]] = tosa.clamp [[VAR_2_]] {max_fp = 5.000000e+00 : f32, max_int = 5 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<3xf16>) -> tensor<3xf16>
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<3xf16>, tensor<1xf16>) -> tensor<3xf16>
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]], {{.*}}: (tensor<3xf16>, tensor<1xf16>, tensor<1xi8>) -> tensor<3xf16>
 // CHECK:           return [[VAR_4_]] : tensor<3xf16>
 }
 
@@ -873,7 +879,7 @@ func.func @test_hardsigmoid_f32(%arg0: tensor<3xf32>) -> tensor<3xf32> {
 // CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<0.166666672> : tensor<1xf32>}> : () -> tensor<1xf32>
 // CHECK:           [[VAR_2_:%.+]] = tosa.add [[PARAM_0_]], [[VAR_0_]] : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xf32>
 // CHECK:           [[VAR_3_:%.+]] = tosa.clamp [[VAR_2_]] {max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<3xf32>) -> tensor<3xf32>
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xf32>
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]], {{.*}}: (tensor<3xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<3xf32>
 // CHECK:           return [[VAR_4_]] : tensor<3xf32>
 }
 
@@ -886,7 +892,7 @@ func.func @test_hardsigmoid_f16(%arg0: tensor<3xf16>) -> tensor<3xf16> {
 // CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<1.666260e-01> : tensor<1xf16>}> : () -> tensor<1xf16>
 // CHECK:           [[VAR_2_:%.+]] = tosa.add [[PARAM_0_]], [[VAR_0_]] : (tensor<3xf16>, tensor<1xf16>) -> tensor<3xf16>
 // CHECK:           [[VAR_3_:%.+]] = tosa.clamp [[VAR_2_]] {max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<3xf16>) -> tensor<3xf16>
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<3xf16>, tensor<1xf16>) -> tensor<3xf16>
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]], {{.*}}: (tensor<3xf16>, tensor<1xf16>, tensor<1xi8>) -> tensor<3xf16>
 // CHECK:           return [[VAR_4_]] : tensor<3xf16>
 }
 
@@ -901,7 +907,7 @@ func.func @test_hardsigmoid_dynamic(%arg0: tensor<?x3x?xf16>) -> tensor<?x3x?xf1
 // CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<1.666260e-01> : tensor<1x1x1xf16>}> : () -> tensor<1x1x1xf16>
 // CHECK:           [[VAR_2_:%.+]] = tosa.add [[PARAM_0_]], [[VAR_0_]] : (tensor<?x3x?xf16>, tensor<1x1x1xf16>) -> tensor<?x3x?xf16>
 // CHECK:           [[VAR_3_:%.+]] = tosa.clamp [[VAR_2_]] {max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<?x3x?xf16>) -> tensor<?x3x?xf16>
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<?x3x?xf16>, tensor<1x1x1xf16>) -> tensor<?x3x?xf16>
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]], {{.*}}: (tensor<?x3x?xf16>, tensor<1x1x1xf16>, tensor<1xi8>) -> tensor<?x3x?xf16>
 // CHECK:           return [[VAR_4_]] : tensor<?x3x?xf16>
 }
 
@@ -928,7 +934,7 @@ func.func @test_elu_f32(%arg0: tensor<3xf32>) -> tensor<3xf32> {
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1xf32>}> : () -> tensor<1xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = tosa.exp [[PARAM_0_]] : (tensor<3xf32>) -> tensor<3xf32>
 // CHECK:           [[VAR_4_:%.+]] = tosa.sub [[VAR_3_]], [[VAR_0_]] : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xf32>
-// CHECK-DAG:       [[VAR_5_:%.+]] = tosa.mul [[VAR_4_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xf32>
+// CHECK-DAG:       [[VAR_5_:%.+]] = tosa.mul [[VAR_4_]], [[VAR_1_]], {{.*}}: (tensor<3xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<3xf32>
 // CHECK-DAG:       [[VAR_6_:%.+]] = tosa.greater_equal [[PARAM_0_]], [[VAR_2_]] : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xi1>
 // CHECK:           [[VAR_7_:%.+]] = tosa.select [[VAR_6_]], [[PARAM_0_]], [[VAR_5_]] : (tensor<3xi1>, tensor<3xf32>, tensor<3xf32>) -> tensor<3xf32>
 // CHECK:           return [[VAR_7_]]
@@ -944,7 +950,7 @@ func.func @test_elu_f16(%arg0: tensor<3xf16>) -> tensor<3xf16> {
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1xf16>}> : () -> tensor<1xf16>
 // CHECK-DAG:       [[VAR_3_:%.+]] = tosa.exp [[PARAM_0_]] : (tensor<3xf16>) -> tensor<3xf16>
 // CHECK:           [[VAR_4_:%.+]] = tosa.sub [[VAR_3_]], [[VAR_0_]] : (tensor<3xf16>, tensor<1xf16>) -> tensor<3xf16>
-// CHECK-DAG:       [[VAR_5_:%.+]] = tosa.mul [[VAR_4_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<3xf16>, tensor<1xf16>) -> tensor<3xf16>
+// CHECK-DAG:       [[VAR_5_:%.+]] = tosa.mul [[VAR_4_]], [[VAR_1_]], {{.*}}: (tensor<3xf16>, tensor<1xf16>, tensor<1xi8>) -> tensor<3xf16>
 // CHECK-DAG:       [[VAR_6_:%.+]] = tosa.greater_equal [[PARAM_0_]], [[VAR_2_]] : (tensor<3xf16>, tensor<1xf16>) -> tensor<3xi1>
 // CHECK:           [[VAR_7_:%.+]] = tosa.select [[VAR_6_]], [[PARAM_0_]], [[VAR_5_]] : (tensor<3xi1>, tensor<3xf16>, tensor<3xf16>) -> tensor<3xf16>
 // CHECK:           return [[VAR_7_]]
@@ -962,7 +968,7 @@ func.func @test_elu_unranked(%arg0: tensor<*xf32>) -> tensor<3xf32> {
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1xf32>}> : () -> tensor<1xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = tosa.exp [[PARAM_0_]] : (tensor<*xf32>) -> tensor<3xf32>
 // CHECK:           [[VAR_4_:%.+]] = tosa.sub [[VAR_3_]], [[VAR_0_]] : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xf32>
-// CHECK-DAG:       [[VAR_5_:%.+]] = tosa.mul [[VAR_4_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<3xf32>, tensor<1xf32>) -> tensor<3xf32>
+// CHECK-DAG:       [[VAR_5_:%.+]] = tosa.mul [[VAR_4_]], [[VAR_1_]], {{.*}}: (tensor<3xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<3xf32>
 // CHECK-DAG:       [[VAR_6_:%.+]] = tosa.greater_equal [[PARAM_0_]], [[VAR_2_]] : (tensor<*xf32>, tensor<1xf32>) -> tensor<*xi1>
 // CHECK:           [[VAR_7_:%.+]] = tosa.select [[VAR_6_]], [[PARAM_0_]], [[VAR_5_]] : (tensor<*xi1>, tensor<*xf32>, tensor<3xf32>) -> tensor<3xf32>
 // CHECK:           return [[VAR_7_]] : tensor<3xf32>
@@ -1154,7 +1160,7 @@ func.func @test_sin_dynamic(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
   "func.return"(%0) : (tensor<*xf32>) -> ()
 // CHECK-LABEL:  func @test_sin_dynamic
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?x10xf32>) -> tensor<?x10xf32> {
-// CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.sin [[PARAM_0_]]  : (tensor<?x10xf32>) -> tensor<?x10xf32>
+// CHECK-NEXT:      [[VAR_0_:%.+]] = tosa.sin [[PARAM_0_]] : (tensor<?x10xf32>) -> tensor<?x10xf32>
 // CHECK-NEXT:      return [[VAR_0_]] : tensor<?x10xf32>
 // CHECK-NEXT:    }
 }
diff --git a/test/mlir/conversion/onnx_to_tosa/Math/Gemm_to_matmul.mlir b/test/mlir/conversion/onnx_to_tosa/Math/Gemm_to_matmul.mlir
index d1c99e06d0..38decf50ed 100644
--- a/test/mlir/conversion/onnx_to_tosa/Math/Gemm_to_matmul.mlir
+++ b/test/mlir/conversion/onnx_to_tosa/Math/Gemm_to_matmul.mlir
@@ -26,7 +26,7 @@ func.func @test_alpha(%arg0: tensor<3x6xf32>, %arg1: tensor<6x4xf32>, %arg2: ten
 // CHECK-DAG:       [[VAR_0_:%.+]] = tosa.reshape [[PARAM_0_]] {new_shape = array<i64: 1, 3, 6>} : (tensor<3x6xf32>) -> tensor<1x3x6xf32>
 // CHECK-DAG:       [[VAR_1_:%.+]] = tosa.reshape [[PARAM_1_]] {new_shape = array<i64: 1, 6, 4>} : (tensor<6x4xf32>) -> tensor<1x6x4xf32>
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<1.618000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
-// CHECK:           [[VAR_3_:%.+]] = tosa.mul [[VAR_2_]], [[VAR_0_]] {shift = 0 : i8} : (tensor<1x1x1xf32>, tensor<1x3x6xf32>) -> tensor<1x3x6xf32>
+// CHECK:           [[VAR_3_:%.+]] = tosa.mul [[VAR_2_]], [[VAR_0_]], {{.*}}: (tensor<1x1x1xf32>, tensor<1x3x6xf32>, tensor<1xi8>) -> tensor<1x3x6xf32>
 // CHECK-DAG:       [[VAR_4_:%.+]] = tosa.matmul [[VAR_3_]], [[VAR_1_]] : (tensor<1x3x6xf32>, tensor<1x6x4xf32>) -> tensor<1x3x4xf32>
 // CHECK-DAG:       [[VAR_5_:%.+]] = tosa.reshape [[PARAM_2_]] {new_shape = array<i64: 1, 3, 4>} : (tensor<3x4xf32>) -> tensor<1x3x4xf32>
 // CHECK:           [[VAR_6_:%.+]] = tosa.add [[VAR_4_]], [[VAR_5_]] : (tensor<1x3x4xf32>, tensor<1x3x4xf32>) -> tensor<1x3x4xf32>
@@ -47,7 +47,7 @@ func.func @test_beta(%arg0: tensor<3x6xf32>, %arg1: tensor<6x6xf32>, %arg2: tens
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<1.349000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = tosa.reshape [[PARAM_2_]] {new_shape = array<i64: 1, 3, 6>} : (tensor<3x6xf32>) -> tensor<1x3x6xf32>
 // CHECK-NOT: separator of consecutive DAGs
-// CHECK-DAG:       [[VAR_4_:%.+]] = tosa.mul [[VAR_2_]], [[VAR_3_]] {shift = 0 : i8} : (tensor<1x1x1xf32>, tensor<1x3x6xf32>) -> tensor<1x3x6xf32>
+// CHECK-DAG:       [[VAR_4_:%.+]] = tosa.mul [[VAR_2_]], [[VAR_3_]], {{.*}}: (tensor<1x1x1xf32>, tensor<1x3x6xf32>, tensor<1xi8>) -> tensor<1x3x6xf32>
 // CHECK-DAG:       [[VAR_5_:%.+]] = tosa.matmul [[VAR_0_]], [[VAR_1_]] : (tensor<1x3x6xf32>, tensor<1x6x6xf32>) -> tensor<1x3x6xf32>
 // CHECK:           [[VAR_6_:%.+]] = tosa.add [[VAR_5_]], [[VAR_4_]] : (tensor<1x3x6xf32>, tensor<1x3x6xf32>) -> tensor<1x3x6xf32>
 // CHECK:           [[VAR_7_:%.+]] = tosa.reshape [[VAR_6_]] {new_shape = array<i64: 3, 6>} : (tensor<1x3x6xf32>) -> tensor<3x6xf32>
@@ -87,7 +87,7 @@ func.func @test_transb(%arg0: tensor<3x6xf32>, %arg1: tensor<4x6xf32>, %arg2: te
 // CHECK-NOT: separator of consecutive DAGs
 // CHECK-DAG:       [[VAR_3_:%.+]] = tosa.transpose [[VAR_1_]], [[VAR_2_]] : (tensor<1x4x6xf32>, tensor<3xi32>) -> tensor<1x6x4xf32>
 // CHECK-DAG:       [[VAR_4_:%.+]] = "tosa.const"() <{value = dense<1.184000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
-// CHECK:           [[VAR_5_:%.+]] = tosa.mul [[VAR_4_]], [[VAR_0_]] {shift = 0 : i8} : (tensor<1x1x1xf32>, tensor<1x3x6xf32>) -> tensor<1x3x6xf32>
+// CHECK:           [[VAR_5_:%.+]] = tosa.mul [[VAR_4_]], [[VAR_0_]], {{.*}}: (tensor<1x1x1xf32>, tensor<1x3x6xf32>, tensor<1xi8>) -> tensor<1x3x6xf32>
 // CHECK-DAG:       [[VAR_6_:%.+]] = tosa.matmul [[VAR_5_]], [[VAR_3_]] : (tensor<1x3x6xf32>, tensor<1x6x4xf32>) -> tensor<1x3x4xf32>
 // CHECK-DAG:       [[VAR_7_:%.+]] = tosa.reshape [[PARAM_2_]] {new_shape = array<i64: 1, 3, 4>} : (tensor<3x4xf32>) -> tensor<1x3x4xf32>
 // CHECK:           [[VAR_8_:%.+]] = tosa.add [[VAR_6_]], [[VAR_7_]] : (tensor<1x3x4xf32>, tensor<1x3x4xf32>) -> tensor<1x3x4xf32>
@@ -127,7 +127,7 @@ func.func @test_no_c_no_trans(%arg0: tensor<1x5xf32>, %arg1: tensor<5x6xf32>) ->
 // CHECK-DAG:       [[VAR_1_:%.+]] = tosa.reshape [[PARAM_0_]] {new_shape = array<i64: 1, 1, 5>} : (tensor<1x5xf32>) -> tensor<1x1x5xf32>
 // CHECK-DAG:       [[VAR_2_:%.+]] = tosa.reshape [[PARAM_1_]] {new_shape = array<i64: 1, 5, 6>} : (tensor<5x6xf32>) -> tensor<1x5x6xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = "tosa.const"() <{value = dense<1.349000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
-// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]] {shift = 0 : i8} : (tensor<1x1x1xf32>, tensor<1x1x5xf32>) -> tensor<1x1x5xf32>
+// CHECK:           [[VAR_4_:%.+]] = tosa.mul [[VAR_3_]], [[VAR_1_]], {{.*}}: (tensor<1x1x1xf32>, tensor<1x1x5xf32>, tensor<1xi8>) -> tensor<1x1x5xf32>
 // CHECK:           [[VAR_5_:%.+]] = tosa.matmul [[VAR_4_]], [[VAR_2_]] : (tensor<1x1x5xf32>, tensor<1x5x6xf32>) -> tensor<1x1x6xf32>
 // CHECK:           [[VAR_6_:%.+]] = tosa.reshape [[VAR_5_]] {new_shape = array<i64: 1, 6>} : (tensor<1x1x6xf32>) -> tensor<1x6xf32>
 // CHECK:           return [[VAR_6_]] : tensor<1x6xf32>
@@ -151,11 +151,11 @@ func.func @test_mixed(%arg0: tensor<11x5xf32>, %arg1: tensor<3x11xf32>, %arg2: t
 // CHECK-DAG:       [[VAR_5_:%.+]] = tosa.transpose [[VAR_1_]], [[VAR_4_]] : (tensor<1x3x11xf32>, tensor<3xi32>) -> tensor<1x11x3xf32>
 // CHECK-DAG:       [[VAR_6_:%.+]] = "tosa.const"() <{value = dense<1.402000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-NOT: separator of consecutive DAGs
-// CHECK-DAG:       [[VAR_7_:%.+]] = tosa.mul [[VAR_6_]], [[VAR_3_]] {shift = 0 : i8} : (tensor<1x1x1xf32>, tensor<1x5x11xf32>) -> tensor<1x5x11xf32>
+// CHECK-DAG:       [[VAR_7_:%.+]] = tosa.mul [[VAR_6_]], [[VAR_3_]], {{.*}}: (tensor<1x1x1xf32>, tensor<1x5x11xf32>, tensor<1xi8>) -> tensor<1x5x11xf32>
 // CHECK-DAG:       [[VAR_8_:%.+]] = "tosa.const"() <{value = dense<1.998000e+00> : tensor<1x1x1xf32>}> : () -> tensor<1x1x1xf32>
 // CHECK-DAG:       [[VAR_9_:%.+]] = tosa.reshape [[PARAM_2_]] {new_shape = array<i64: 1, 5, 3>} : (tensor<5x3xf32>) -> tensor<1x5x3xf32>
 // CHECK-NOT: separator of consecutive DAGs
-// CHECK-DAG:       [[VAR_10_:%.+]] = tosa.mul [[VAR_8_]], [[VAR_9_]] {shift = 0 : i8} : (tensor<1x1x1xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
+// CHECK-DAG:       [[VAR_10_:%.+]] = tosa.mul [[VAR_8_]], [[VAR_9_]], {{.*}}: (tensor<1x1x1xf32>, tensor<1x5x3xf32>, tensor<1xi8>) -> tensor<1x5x3xf32>
 // CHECK-DAG:       [[VAR_11_:%.+]] = tosa.matmul [[VAR_7_]], [[VAR_5_]] : (tensor<1x5x11xf32>, tensor<1x11x3xf32>) -> tensor<1x5x3xf32>
 // CHECK:           [[VAR_12_:%.+]] = tosa.add [[VAR_11_]], [[VAR_10_]] : (tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
 // CHECK:           [[VAR_13_:%.+]] = tosa.reshape [[VAR_12_]] {new_shape = array<i64: 5, 3>} : (tensor<1x5x3xf32>) -> tensor<5x3xf32>
diff --git a/test/mlir/conversion/onnx_to_tosa/Math/ReduceMean.mlir b/test/mlir/conversion/onnx_to_tosa/Math/ReduceMean.mlir
index 71629af893..4d985992f5 100644
--- a/test/mlir/conversion/onnx_to_tosa/Math/ReduceMean.mlir
+++ b/test/mlir/conversion/onnx_to_tosa/Math/ReduceMean.mlir
@@ -9,7 +9,8 @@ return %1 : tensor<2x5x1x1xf32>
 // CHECK:           %[[VAL_1:.*]] = tosa.reduce_sum %[[VAL_0]] {axis = 2 : i32} : (tensor<2x5x9x11xf32>) -> tensor<2x5x1x11xf32>
 // CHECK:           %[[VAL_2:.*]] = tosa.reduce_sum %[[VAL_1]] {axis = 3 : i32} : (tensor<2x5x1x11xf32>) -> tensor<2x5x1x1xf32>
 // CHECK:           %[[VAL_3:.*]] = "tosa.const"() <{value = dense<0.0101010101> : tensor<1x1x1x1xf32>}> : () -> tensor<1x1x1x1xf32>
-// CHECK:           %[[VAL_4:.*]] = tosa.mul %[[VAL_2]], %[[VAL_3]] {shift = 0 : i8} : (tensor<2x5x1x1xf32>, tensor<1x1x1x1xf32>) -> tensor<2x5x1x1xf32>
+// CHECK:           %[[SHIFT_0:.*]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           %[[VAL_4:.*]] = tosa.mul %[[VAL_2]], %[[VAL_3]], %[[SHIFT_0]] : (tensor<2x5x1x1xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>) -> tensor<2x5x1x1xf32>
 // CHECK:           return %[[VAL_4]] : tensor<2x5x1x1xf32>
 }
 
@@ -26,7 +27,8 @@ return %0 : tensor<1x1x1x1xf32>
 // CHECK:           %[[VAL_3:.*]] = tosa.reduce_sum %[[VAL_2]] {axis = 2 : i32} : (tensor<1x1x9x11xf32>) -> tensor<1x1x1x11xf32>
 // CHECK:           %[[VAL_4:.*]] = tosa.reduce_sum %[[VAL_3]] {axis = 3 : i32} : (tensor<1x1x1x11xf32>) -> tensor<1x1x1x1xf32>
 // CHECK:           %[[VAL_5:.*]] = "tosa.const"() <{value = dense<0.00101010106> : tensor<1x1x1x1xf32>}> : () -> tensor<1x1x1x1xf32>
-// CHECK:           %[[VAL_6:.*]] = tosa.mul %[[VAL_4]], %[[VAL_5]] {shift = 0 : i8} : (tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>
+// CHECK:           %[[SHIFT_1:.*]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           %[[VAL_6:.*]] = tosa.mul %[[VAL_4]], %[[VAL_5]], %[[SHIFT_1]] : (tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>) -> tensor<1x1x1x1xf32>
 // CHECK:           return %[[VAL_6]] : tensor<1x1x1x1xf32>
 }
 
@@ -42,7 +44,8 @@ return %1 : tensor<2x5xf32>
 // CHECK:           %[[VAL_2:.*]] = tosa.reduce_sum %[[VAL_1]] {axis = 3 : i32} : (tensor<2x5x1x11xf32>) -> tensor<2x5x1x1xf32>
 // CHECK:           %[[VAL_3:.*]] = tosa.reshape %[[VAL_2]] {new_shape = array<i64: 2, 5>} : (tensor<2x5x1x1xf32>) -> tensor<2x5xf32>
 // CHECK:           %[[VAL_4:.*]] = "tosa.const"() <{value = dense<0.0101010101> : tensor<1x1xf32>}> : () -> tensor<1x1xf32>
-// CHECK:           %[[VAL_5:.*]] = tosa.mul %[[VAL_3]], %[[VAL_4]] {shift = 0 : i8} : (tensor<2x5xf32>, tensor<1x1xf32>) -> tensor<2x5xf32>
+// CHECK:           %[[SHIFT_2:.*]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           %[[VAL_5:.*]] = tosa.mul %[[VAL_3]], %[[VAL_4]], %[[SHIFT_2]] : (tensor<2x5xf32>, tensor<1x1xf32>, tensor<1xi8>) -> tensor<2x5xf32>
 // CHECK:           return %[[VAL_5]] : tensor<2x5xf32>
 }
 
@@ -57,7 +60,8 @@ return %1 : tensor<2x5x1x1xf32>
 // CHECK:           %[[VAL_1:.*]] = tosa.reduce_sum %[[VAL_0]] {axis = 2 : i32} : (tensor<2x5x9x11xf32>) -> tensor<2x5x1x11xf32>
 // CHECK:           %[[VAL_2:.*]] = tosa.reduce_sum %[[VAL_1]] {axis = 3 : i32} : (tensor<2x5x1x11xf32>) -> tensor<2x5x1x1xf32>
 // CHECK:           %[[VAL_3:.*]] = "tosa.const"() <{value = dense<0.0101010101> : tensor<1x1x1x1xf32>}> : () -> tensor<1x1x1x1xf32>
-// CHECK:           %[[VAL_4:.*]] = tosa.mul %[[VAL_2]], %[[VAL_3]] {shift = 0 : i8} : (tensor<2x5x1x1xf32>, tensor<1x1x1x1xf32>) -> tensor<2x5x1x1xf32>
+// CHECK:           %[[SHIFT_3:.*]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           %[[VAL_4:.*]] = tosa.mul %[[VAL_2]], %[[VAL_3]], %[[SHIFT_3]] : (tensor<2x5x1x1xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>) -> tensor<2x5x1x1xf32>
 // CHECK:           return %[[VAL_4]] : tensor<2x5x1x1xf32>
 }
 
@@ -81,7 +85,8 @@ func.func @test_reducemeanV13(%arg0: tensor<1x32x112x112xf32>) -> tensor<1x32x1x
 // CHECK:           [[VAR_0_:%.+]] = tosa.reduce_sum %arg0 {axis = 2 : i32}
 // CHECK-DAG:       [[VAR_1_:%.+]] = tosa.reduce_sum [[VAR_0_]] {axis = 3 : i32}
 // CHECK-DAG:       [[VAR_2_:%.+]] = "tosa.const"() <{value = dense<7.97193861E-5> : tensor<1x1x1x1xf32>}>
-// CHECK:           [[VAR_3_:%.+]] = tosa.mul [[VAR_1_]], [[VAR_2_]] {shift = 0 : i8} : (tensor<1x32x1x1xf32>, tensor<1x1x1x1xf32>)
+// CHECK:           [[SHIFT_4_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           [[VAR_3_:%.+]] = tosa.mul [[VAR_1_]], [[VAR_2_]], [[SHIFT_4_]] : (tensor<1x32x1x1xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>)
 // CHECK:           return [[VAR_3_]] : tensor<1x32x1x1xf32>
 }
 
diff --git a/test/mlir/conversion/onnx_to_tosa/Math/Softmax.mlir b/test/mlir/conversion/onnx_to_tosa/Math/Softmax.mlir
index d2013c44a7..ef4be1c041 100644
--- a/test/mlir/conversion/onnx_to_tosa/Math/Softmax.mlir
+++ b/test/mlir/conversion/onnx_to_tosa/Math/Softmax.mlir
@@ -9,7 +9,7 @@ func.func @test_softmax_v13(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
 // CHECK: %[[VAL_1:.*]] = tosa.exp %[[SUB]] : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
 // CHECK: %[[VAL_2:.*]] = tosa.reduce_sum %[[VAL_1]] {axis = 2 : i32} : (tensor<13x21x3xf32>) -> tensor<13x21x1xf32>
 // CHECK: %[[VAL_3:.*]] = tosa.reciprocal %[[VAL_2]] : (tensor<13x21x1xf32>) -> tensor<13x21x1xf32>
-// CHECK: %[[VAL_4:.*]] = tosa.mul %[[VAL_1]], %[[VAL_3]] {shift = 0 : i8} : (tensor<13x21x3xf32>, tensor<13x21x1xf32>) -> tensor<13x21x3xf32>
+// CHECK: %[[VAL_4:.*]] = tosa.mul %[[VAL_1]], %[[VAL_3]], {{.*}}: (tensor<13x21x3xf32>, tensor<13x21x1xf32>, tensor<1xi8>) -> tensor<13x21x3xf32>
 }
 
 // -----
@@ -23,7 +23,7 @@ func.func @test_softmax_v13_axis_one(%arg0: tensor<13x21x3xf32>) -> tensor<13x21
 // CHECK: %[[VAL_1:.*]] = tosa.exp %[[SUB]] : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
 // CHECK: %[[VAL_2:.*]] = tosa.reduce_sum %[[VAL_1]] {axis = 1 : i32} : (tensor<13x21x3xf32>) -> tensor<13x1x3xf32>
 // CHECK: %[[VAL_3:.*]] = tosa.reciprocal %[[VAL_2]] : (tensor<13x1x3xf32>) -> tensor<13x1x3xf32>
-// CHECK: %[[VAL_4:.*]] = tosa.mul %[[VAL_1]], %[[VAL_3]] {shift = 0 : i8} : (tensor<13x21x3xf32>, tensor<13x1x3xf32>) -> tensor<13x21x3xf32>
+// CHECK: %[[VAL_4:.*]] = tosa.mul %[[VAL_1]], %[[VAL_3]], {{.*}}: (tensor<13x21x3xf32>, tensor<13x1x3xf32>, tensor<1xi8>) -> tensor<13x21x3xf32>
 }
 
 // -----
@@ -39,7 +39,7 @@ func.func @test_softmax_before_v13(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3
 // CHECK: %[[VAL_2:.*]] = tosa.reduce_sum %[[VAL_1]] {axis = 1 : i32} : (tensor<13x21x3xf32>) -> tensor<13x1x3xf32>
 // CHECK: %[[VAL_3:.*]] = tosa.reduce_sum %[[VAL_2]] {axis = 2 : i32} : (tensor<13x1x3xf32>) -> tensor<13x1x1xf32>
 // CHECK: %[[VAL_4:.*]] = tosa.reciprocal %[[VAL_3]] : (tensor<13x1x1xf32>) -> tensor<13x1x1xf32>
-// CHECK: %[[VAL_5:.*]] = tosa.mul %[[VAL_1]], %[[VAL_4]] {shift = 0 : i8} : (tensor<13x21x3xf32>, tensor<13x1x1xf32>) -> tensor<13x21x3xf32>
+// CHECK: %[[VAL_5:.*]] = tosa.mul %[[VAL_1]], %[[VAL_4]], {{.*}}: (tensor<13x21x3xf32>, tensor<13x1x1xf32>, tensor<1xi8>) -> tensor<13x21x3xf32>
 }
 
 // -----
@@ -57,5 +57,5 @@ func.func @test_softmax_before_v13_axis_zero(%arg0: tensor<13x21x3xf32>) -> tens
 // CHECK: %[[VAL_3:.*]] = tosa.reduce_sum %[[VAL_2]] {axis = 1 : i32} : (tensor<1x21x3xf32>) -> tensor<1x1x3xf32>
 // CHECK: %[[VAL_4:.*]] = tosa.reduce_sum %[[VAL_3]] {axis = 2 : i32} : (tensor<1x1x3xf32>) -> tensor<1x1x1xf32>
 // CHECK: %[[VAL_5:.*]] = tosa.reciprocal %[[VAL_4]] : (tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
-// CHECK: %[[VAL_6:.*]] = tosa.mul %[[VAL_1]], %[[VAL_5]] {shift = 0 : i8} : (tensor<13x21x3xf32>, tensor<1x1x1xf32>) -> tensor<13x21x3xf32>
+// CHECK: %[[VAL_6:.*]] = tosa.mul %[[VAL_1]], %[[VAL_5]], {{.*}}: (tensor<13x21x3xf32>, tensor<1x1x1xf32>, tensor<1xi8>) -> tensor<13x21x3xf32>
 }
diff --git a/test/mlir/conversion/onnx_to_tosa/NN/BatchNorm.mlir b/test/mlir/conversion/onnx_to_tosa/NN/BatchNorm.mlir
index 0c47f22ad4..c59f51946e 100644
--- a/test/mlir/conversion/onnx_to_tosa/NN/BatchNorm.mlir
+++ b/test/mlir/conversion/onnx_to_tosa/NN/BatchNorm.mlir
@@ -21,8 +21,9 @@ func.func @test_batchnorm_f32(%arg0: tensor<100x3x10x10xf32>) -> tensor<100x3x10
 // CHECK-DAG:       [[VAR_9_:%.+]] = tosa.sub [[PARAM_0_]], [[VAR_4_]] : (tensor<100x3x10x10xf32>, tensor<1x3x1x1xf32>) -> tensor<100x3x10x10xf32>
 // CHECK:           [[VAR_10_:%.+]] = tosa.add [[VAR_7_]], [[VAR_8_]] : (tensor<1x3x1x1xf32>, tensor<1x1x1x1xf32>) -> tensor<1x3x1x1xf32>
 // CHECK:           [[VAR_11_:%.+]] = tosa.rsqrt [[VAR_10_]] : (tensor<1x3x1x1xf32>) -> tensor<1x3x1x1xf32>
-// CHECK:           [[VAR_12_:%.+]] = tosa.mul [[VAR_9_]], [[VAR_11_]] {shift = 0 : i8} : (tensor<100x3x10x10xf32>, tensor<1x3x1x1xf32>) -> tensor<100x3x10x10xf32>
-// CHECK:           [[VAR_13_:%.+]] = tosa.mul [[VAR_12_]], [[VAR_5_]] {shift = 0 : i8} : (tensor<100x3x10x10xf32>, tensor<1x3x1x1xf32>) -> tensor<100x3x10x10xf32>
+// CHECK:           [[SHIFT_0_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           [[VAR_12_:%.+]] = tosa.mul [[VAR_9_]], [[VAR_11_]], [[SHIFT_0_]] : (tensor<100x3x10x10xf32>, tensor<1x3x1x1xf32>, tensor<1xi8>) -> tensor<100x3x10x10xf32>
+// CHECK:           [[VAR_13_:%.+]] = tosa.mul [[VAR_12_]], [[VAR_5_]], [[SHIFT_0_]] : (tensor<100x3x10x10xf32>, tensor<1x3x1x1xf32>, tensor<1xi8>) -> tensor<100x3x10x10xf32>
 // CHECK:           [[VAR_14_:%.+]] = tosa.add [[VAR_13_]], [[VAR_6_]] : (tensor<100x3x10x10xf32>, tensor<1x3x1x1xf32>) -> tensor<100x3x10x10xf32>
 // CHECK:           return [[VAR_14_]] : tensor<100x3x10x10xf32>
 }
@@ -49,8 +50,9 @@ func.func @test_batchnorm_f16_dynamic(%arg0: tensor<100x3x?x?xf16>) -> tensor<*x
 // CHECK:           [[VAR_9_:%.+]] = tosa.sub [[PARAM_0_]], [[VAR_4_]] : (tensor<100x3x?x?xf16>, tensor<1x3x1x1xf16>) -> tensor<100x3x?x?xf16>
 // CHECK:           [[VAR_10_:%.+]] = tosa.add [[VAR_7_]], [[VAR_8_]] : (tensor<1x3x1x1xf16>, tensor<1x1x1x1xf16>) -> tensor<1x3x1x1xf16>
 // CHECK:           [[VAR_11_:%.+]] = tosa.rsqrt [[VAR_10_]] : (tensor<1x3x1x1xf16>) -> tensor<1x3x1x1xf16>
-// CHECK:           [[VAR_12_:%.+]] = tosa.mul [[VAR_9_]], [[VAR_11_]] {shift = 0 : i8} : (tensor<100x3x?x?xf16>, tensor<1x3x1x1xf16>) -> tensor<100x3x?x?xf16>
-// CHECK:           [[VAR_13_:%.+]] = tosa.mul [[VAR_12_]], [[VAR_5_]] {shift = 0 : i8} : (tensor<100x3x?x?xf16>, tensor<1x3x1x1xf16>) -> tensor<100x3x?x?xf16>
+// CHECK:           [[SHIFT_2_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           [[VAR_12_:%.+]] = tosa.mul [[VAR_9_]], [[VAR_11_]], [[SHIFT_2_]] : (tensor<100x3x?x?xf16>, tensor<1x3x1x1xf16>, tensor<1xi8>) -> tensor<100x3x?x?xf16>
+// CHECK:           [[VAR_13_:%.+]] = tosa.mul [[VAR_12_]], [[VAR_5_]], [[SHIFT_2_]] : (tensor<100x3x?x?xf16>, tensor<1x3x1x1xf16>, tensor<1xi8>) -> tensor<100x3x?x?xf16>
 // CHECK:           [[VAR_14_:%.+]] = tosa.add [[VAR_13_]], [[VAR_6_]] : (tensor<100x3x?x?xf16>, tensor<1x3x1x1xf16>) -> tensor<100x3x?x?xf16>
 // CHECK:           return [[VAR_14_]] : tensor<100x3x?x?xf16>
 }
@@ -78,8 +80,9 @@ func.func @test_batchnorm_bf16_dynamic(%arg0: tensor<100x3x?x?xbf16>) -> tensor<
 // CHECK:           [[VAR_9_:%.+]] = tosa.sub [[PARAM_0_]], [[VAR_4_]] : (tensor<100x3x?x?xbf16>, tensor<1x3x1x1xbf16>) -> tensor<100x3x?x?xbf16>
 // CHECK:           [[VAR_10_:%.+]] = tosa.add [[VAR_7_]], [[VAR_8_]] : (tensor<1x3x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x3x1x1xbf16>
 // CHECK:           [[VAR_11_:%.+]] = tosa.rsqrt [[VAR_10_]] : (tensor<1x3x1x1xbf16>) -> tensor<1x3x1x1xbf16>
-// CHECK:           [[VAR_12_:%.+]] = tosa.mul [[VAR_9_]], [[VAR_11_]] {shift = 0 : i8} : (tensor<100x3x?x?xbf16>, tensor<1x3x1x1xbf16>) -> tensor<100x3x?x?xbf16>
-// CHECK:           [[VAR_13_:%.+]] = tosa.mul [[VAR_12_]], [[VAR_5_]] {shift = 0 : i8} : (tensor<100x3x?x?xbf16>, tensor<1x3x1x1xbf16>) -> tensor<100x3x?x?xbf16>
+// CHECK:           [[SHIFT_4_:%.+]] = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           [[VAR_12_:%.+]] = tosa.mul [[VAR_9_]], [[VAR_11_]], [[SHIFT_4_]] : (tensor<100x3x?x?xbf16>, tensor<1x3x1x1xbf16>, tensor<1xi8>) -> tensor<100x3x?x?xbf16>
+// CHECK:           [[VAR_13_:%.+]] = tosa.mul [[VAR_12_]], [[VAR_5_]], [[SHIFT_4_]] : (tensor<100x3x?x?xbf16>, tensor<1x3x1x1xbf16>, tensor<1xi8>) -> tensor<100x3x?x?xbf16>
 // CHECK:           [[VAR_14_:%.+]] = tosa.add [[VAR_13_]], [[VAR_6_]] : (tensor<100x3x?x?xbf16>, tensor<1x3x1x1xbf16>) -> tensor<100x3x?x?xbf16>
 // CHECK:           return [[VAR_14_]] : tensor<100x3x?x?xbf16>
 }
@@ -107,8 +110,9 @@ func.func @test_batchnorm_f64(%arg0: tensor<100x3x10x10xf64>) -> tensor<100x3x10
 // CHECK-NEXT: [[VAR_9_:%.+]]  = tosa.sub %arg0, [[VAR_4_]] : (tensor<100x3x10x10xf64>, tensor<1x3x1x1xf64>) -> tensor<100x3x10x10xf64>
 // CHECK-NEXT: [[VAR_10_:%.+]]  = tosa.add %7, [[VAR_8_]] : (tensor<1x3x1x1xf64>, tensor<1x1x1x1xf64>) -> tensor<1x3x1x1xf64>
 // CHECK-NEXT: [[VAR_11_:%.+]]  = tosa.rsqrt [[VAR_10_]] : (tensor<1x3x1x1xf64>) -> tensor<1x3x1x1xf64>
-// CHECK-NEXT: [[VAR_12_:%.+]]  = tosa.mul [[VAR_9_]], %11 {shift = 0 : i8} : (tensor<100x3x10x10xf64>, tensor<1x3x1x1xf64>) -> tensor<100x3x10x10xf64>
-// CHECK-NEXT: [[VAR_13_:%.+]]  = tosa.mul [[VAR_12_]], %5 {shift = 0 : i8} : (tensor<100x3x10x10xf64>, tensor<1x3x1x1xf64>) -> tensor<100x3x10x10xf64>
+// CHECK-NEXT: [[SHIFT_6_:%.+]]  = "tosa.const"() <{value = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK-NEXT: [[VAR_12_:%.+]]  = tosa.mul [[VAR_9_]], %11, [[SHIFT_6_]] : (tensor<100x3x10x10xf64>, tensor<1x3x1x1xf64>, tensor<1xi8>) -> tensor<100x3x10x10xf64>
+// CHECK-NEXT: [[VAR_13_:%.+]]  = tosa.mul [[VAR_12_]], %5, [[SHIFT_6_]] : (tensor<100x3x10x10xf64>, tensor<1x3x1x1xf64>, tensor<1xi8>) -> tensor<100x3x10x10xf64>
 // CHECK-NEXT: [[VAR_14_:%.+]]  = tosa.add [[VAR_13_]], [[VAR_6_]] : (tensor<100x3x10x10xf64>, tensor<1x3x1x1xf64>) -> tensor<100x3x10x10xf64>
 // CHECK-NEXT: return [[VAR_14_]] : tensor<100x3x10x10xf64>
 }
diff --git a/test/mlir/conversion/onnx_to_tosa/NN/DequantizeLinear.mlir b/test/mlir/conversion/onnx_to_tosa/NN/DequantizeLinear.mlir
index be49d4c7fd..66be2344c1 100644
--- a/test/mlir/conversion/onnx_to_tosa/NN/DequantizeLinear.mlir
+++ b/test/mlir/conversion/onnx_to_tosa/NN/DequantizeLinear.mlir
@@ -13,7 +13,7 @@ func.func @test_dequantizeLinear(%arg0 : tensor<32x3x224x224xi8>) -> tensor<32x3
 // CHECK-DAG:    %[[CAST_0:.*]] = tosa.cast %[[ARG_0]] : (tensor<32x3x224x224xi8>) -> tensor<32x3x224x224xf32>
 // CHECK-DAG:    %[[CASTZP:.*]] = tosa.cast %[[ZP]] : (tensor<1x1x1x1xi8>) -> tensor<1x1x1x1xf32>
 // CHECK-DAG:    %[[SUB:.*]] = tosa.sub %[[CAST_0]], %[[CASTZP]] : (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>) -> tensor<32x3x224x224xf32>
-// CHECK-DAG:    %[[MUL:.*]] = tosa.mul %[[SUB]], %[[SCALE]] {shift = 0 : i8} : (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>) -> tensor<32x3x224x224xf32>
+// CHECK-DAG:    %[[MUL:.*]] = tosa.mul %[[SUB]], %[[SCALE]], {{.*}}: (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>) -> tensor<32x3x224x224xf32>
 // CHECK-DAG:    return %[[MUL]] : tensor<32x3x224x224xf32>
 
 // -----
@@ -33,7 +33,7 @@ func.func @test_dequantizeLinear_f16(%arg0 : tensor<32x3x224x224xi8>) -> tensor<
 // CHECK-DAG:    %[[CASTZP:.*]] = tosa.cast %[[ZP]] : (tensor<1x1x1x1xi8>) -> tensor<1x1x1x1xf32>
 // CHECK-DAG:    %[[SUB:.*]] = tosa.sub %[[CAST_0]], %[[CASTZP]] : (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>) -> tensor<32x3x224x224xf32>
 // CHECK-DAG:    %[[CASTSCALE:.*]] = tosa.cast %[[SCALE]] : (tensor<1x1x1x1xf16>) -> tensor<1x1x1x1xf32>
-// CHECK-DAG:    %[[MUL:.*]] = tosa.mul %[[SUB]], %[[CASTSCALE]] {shift = 0 : i8} : (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>) -> tensor<32x3x224x224xf32>
+// CHECK-DAG:    %[[MUL:.*]] = tosa.mul %[[SUB]], %[[CASTSCALE]], {{.*}}: (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>) -> tensor<32x3x224x224xf32>
 // CHECK-DAG:    %[[CAST:.*]] = tosa.cast %[[MUL]] : (tensor<32x3x224x224xf32>) -> tensor<32x3x224x224xf16>
 // CHECK-DAG:    return %[[CAST]] : tensor<32x3x224x224xf16>
 
@@ -76,7 +76,7 @@ func.func @no_zeropoint(%arg0: tensor<5xi8>, %arg1: tensor<f32>) -> tensor<5xf32
 // CHECK-SAME:                            %[[VAL_1:.*]]: tensor<f32>) -> tensor<5xf32> {
 // CHECK:           %[[VAL_2:.*]] = tosa.cast %[[VAL_0]] : (tensor<5xi8>) -> tensor<5xf32>
 // CHECK:           %[[VAL_3:.*]] = tosa.reshape %[[VAL_1]] {new_shape = array<i64: 1>} : (tensor<f32>) -> tensor<1xf32>
-// CHECK:           %[[VAL_4:.*]] = tosa.mul %[[VAL_2]], %[[VAL_3]] {shift = 0 : i8} : (tensor<5xf32>, tensor<1xf32>) -> tensor<5xf32>
+// CHECK:           %[[VAL_4:.*]] = tosa.mul %[[VAL_2]], %[[VAL_3]], {{.*}}: (tensor<5xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<5xf32>
 // CHECK:           return %[[VAL_4]] : tensor<5xf32>
 
 // -----
@@ -92,7 +92,7 @@ func.func @f8E4M3FN(%arg0: tensor<5xf8E4M3FN>, %arg1: tensor<f32>) -> tensor<5xf
 // CHECK-SAME:                        %[[VAL_1:.*]]: tensor<f32>) -> tensor<5xf32> {
 // CHECK:           %[[VAL_2:.*]] = tosa.cast %[[VAL_0]] : (tensor<5xf8E4M3FN>) -> tensor<5xf32>
 // CHECK:           %[[VAL_3:.*]] = tosa.reshape %[[VAL_1]] {new_shape = array<i64: 1>} : (tensor<f32>) -> tensor<1xf32>
-// CHECK:           %[[VAL_4:.*]] = tosa.mul %[[VAL_2]], %[[VAL_3]] {shift = 0 : i8} : (tensor<5xf32>, tensor<1xf32>) -> tensor<5xf32>
+// CHECK:           %[[VAL_4:.*]] = tosa.mul %[[VAL_2]], %[[VAL_3]], {{.*}}: (tensor<5xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<5xf32>
 // CHECK:           return %[[VAL_4]] : tensor<5xf32>
 
 // -----
diff --git a/test/mlir/conversion/onnx_to_tosa/NN/QuantizeLinear.mlir b/test/mlir/conversion/onnx_to_tosa/NN/QuantizeLinear.mlir
index 0d53ce3d02..ca20bb3ae8 100644
--- a/test/mlir/conversion/onnx_to_tosa/NN/QuantizeLinear.mlir
+++ b/test/mlir/conversion/onnx_to_tosa/NN/QuantizeLinear.mlir
@@ -11,7 +11,7 @@ func.func @test_quantizeLinear(%arg0 : tensor<32x3x224x224xf32>) -> tensor<32x3x
 // CHECK-DAG:    %[[ZP:.*]] = "tosa.const"() <{value = dense<0> : tensor<1x1x1x1xi8>}> : () -> tensor<1x1x1x1xi8>
 // CHECK-DAG:    %[[SCALE:.*]] = "tosa.const"() <{value = dense<3.125000e-02> : tensor<1x1x1x1xf32>}> : () -> tensor<1x1x1x1xf32>
 // CHECK-DAG:    %[[REC:.*]] = tosa.reciprocal %[[SCALE]] : (tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>
-// CHECK-DAG:    %[[MUL:.*]] = tosa.mul %[[ARG_0]], %[[REC]] {shift = 0 : i8} : (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>) -> tensor<32x3x224x224xf32>
+// CHECK-DAG:    %[[MUL:.*]] = tosa.mul %[[ARG_0]], %[[REC]], {{.*}}: (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>) -> tensor<32x3x224x224xf32>
 // CHECK-DAG:    %[[MUL_CAST:.*]] = tosa.cast %[[MUL]] : (tensor<32x3x224x224xf32>) -> tensor<32x3x224x224xi32>
 // CHECK-DAG:    %[[ZPCAST:.*]] = tosa.cast %[[ZP]] : (tensor<1x1x1x1xi8>) -> tensor<1x1x1x1xi32>
 // CHECK-DAG:    %[[ADD:.*]] = tosa.add %[[MUL_CAST]], %[[ZPCAST]] : (tensor<32x3x224x224xi32>, tensor<1x1x1x1xi32>) -> tensor<32x3x224x224xi32>
@@ -32,7 +32,7 @@ func.func @test_quantizeLinear_none(%arg0 : tensor<32x3x224x224xf32>) -> tensor<
 // CHECK-SAME:    (%[[ARG_0:.*]]: tensor<32x3x224x224xf32>) -> tensor<32x3x224x224xui8>
 // CHECK-DAG:   %[[SCALE:.*]] = "tosa.const"() <{value = dense<3.125000e-02> : tensor<1x1x1x1xf32>}> : () -> tensor<1x1x1x1xf32>
 // CHECK-DAG:   %[[REC:.*]] = tosa.reciprocal %[[SCALE]] : (tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>
-// CHECK-DAG:   %[[MUL:.*]] = tosa.mul %[[ARG_0]], %[[REC]] {shift = 0 : i8} : (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>) -> tensor<32x3x224x224xf32>
+// CHECK-DAG:   %[[MUL:.*]] = tosa.mul %[[ARG_0]], %[[REC]], {{.*}}: (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>) -> tensor<32x3x224x224xf32>
 // CHECK-DAG:   %[[MUL_CAST:.*]] = tosa.cast %[[MUL]] : (tensor<32x3x224x224xf32>) -> tensor<32x3x224x224xi32>
 // CHECK-DAG:   %[[CAST:.*]] = tosa.cast %[[MUL_CAST]] : (tensor<32x3x224x224xi32>) -> tensor<32x3x224x224xui8>
 // CHECK-DAG:   return %[[CAST]] : tensor<32x3x224x224xui8>
@@ -51,7 +51,7 @@ func.func @test_quantizeLinear_per_axis(%arg0: tensor<8x2xf32>) -> tensor<8x2xi8
 // CHECK-SAME:                                            %[[VAL_0:.*]]: tensor<8x2xf32>) -> tensor<8x2xi8> {
 // CHECK:           %[[VAL_2:.*]] = "tosa.const"() <{value = dense<{{\[\[}}1.000000e+00, 2.000000e+00]]> : tensor<1x2xf32>}> : () -> tensor<1x2xf32>
 // CHECK:           %[[REC:.*]] = tosa.reciprocal %[[VAL_2]] : (tensor<1x2xf32>) -> tensor<1x2xf32>
-// CHECK:           %[[MUL:.*]] = tosa.mul %[[VAL_0]], %[[REC]] {shift = 0 : i8} : (tensor<8x2xf32>, tensor<1x2xf32>) -> tensor<8x2xf32>
+// CHECK:           %[[MUL:.*]] = tosa.mul %[[VAL_0]], %[[REC]], {{.*}}: (tensor<8x2xf32>, tensor<1x2xf32>, tensor<1xi8>) -> tensor<8x2xf32>
 // CHECK:           %[[MUL_CAST:.*]] = tosa.cast %[[MUL]] : (tensor<8x2xf32>) -> tensor<8x2xi32>
 // CHECK:           %[[ZP:.*]] = "tosa.const"() <{value = dense<{{\[\[}}0, 1]]> : tensor<1x2xi8>}> : () -> tensor<1x2xi8>
 // CHECK:           %[[ZPCAST:.*]] = tosa.cast %[[ZP]] : (tensor<1x2xi8>) -> tensor<1x2xi32>
@@ -87,7 +87,7 @@ func.func @test_quantizeLinear_ui8(%arg0 : tensor<32x3x224x224xf32>) -> tensor<3
 // CHECK-DAG:    %[[ZP:.*]] = "tosa.const"() <{value = dense<0> : tensor<1x1x1x1xui8>}> : () -> tensor<1x1x1x1xui8>
 // CHECK-DAG:    %[[SCALE:.*]] = "tosa.const"() <{value = dense<3.125000e-02> : tensor<1x1x1x1xf32>}> : () -> tensor<1x1x1x1xf32>
 // CHECK-DAG:    %[[REC:.*]] = tosa.reciprocal %[[SCALE]] : (tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>
-// CHECK-DAG:    %[[MUL:.*]] = tosa.mul %[[ARG_0]], %[[REC]] {shift = 0 : i8} : (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>) -> tensor<32x3x224x224xf32>
+// CHECK-DAG:    %[[MUL:.*]] = tosa.mul %[[ARG_0]], %[[REC]], {{.*}}: (tensor<32x3x224x224xf32>, tensor<1x1x1x1xf32>, tensor<1xi8>) -> tensor<32x3x224x224xf32>
 // CHECK-DAG:    %[[MUL_CAST:.*]] = tosa.cast %[[MUL]] : (tensor<32x3x224x224xf32>) -> tensor<32x3x224x224xi32>
 // CHECK-DAG:    %[[ZPCAST:.*]] = tosa.cast %[[ZP]] : (tensor<1x1x1x1xui8>) -> tensor<1x1x1x1xi32>
 // CHECK-DAG:    %[[ADD:.*]] = tosa.add %[[MUL_CAST]], %[[ZPCAST]] : (tensor<32x3x224x224xi32>, tensor<1x1x1x1xi32>) -> tensor<32x3x224x224xi32>
diff --git a/utils/clone-mlir.sh b/utils/clone-mlir.sh
index df1501346d..3c48725a7e 100644
--- a/utils/clone-mlir.sh
+++ b/utils/clone-mlir.sh
@@ -1,3 +1,3 @@
 git clone -n https://github.com/xilinx/llvm-aie.git llvm-project
 # Check out a specific branch that is known to work with ONNX-MLIR.
-cd llvm-project && git checkout 776b07b472a12db1a451fb4bfc737e05c0ee0b1c && cd ..
+cd llvm-project && git checkout a0fc10d350b9 && cd ..