onnx · tungld · Feb 14, 2025 · Jan 29, 2025 · Feb 6, 2025 · Feb 6, 2025
diff --git a/docs/BuildOnLinuxOSX.md b/docs/BuildOnLinuxOSX.md
@@ -15,7 +15,7 @@ Firstly, install MLIR (as a part of LLVM-Project):
 ``` bash
 git clone -n https://github.com/llvm/llvm-project.git
 # Check out a specific branch that is known to work with ONNX-MLIR.
-cd llvm-project && git checkout e86910337f98e57f5b9253f7d80d5b916eb1d97e && cd ..
+cd llvm-project && git checkout b270525f730be6e7196667925f5a9bfa153262e9 && cd ..
 ```
 
 [same-as-file]: <> (utils/build-mlir.sh)

diff --git a/docs/BuildOnWindows.md b/docs/BuildOnWindows.md
@@ -52,7 +52,7 @@ Install MLIR (as a part of LLVM-Project):
 ```shell
 git clone -n https://github.com/llvm/llvm-project.git
 # Check out a specific branch that is known to work with ONNX-MLIR.
-cd llvm-project && git checkout e86910337f98e57f5b9253f7d80d5b916eb1d97e && cd ..
+cd llvm-project && git checkout b270525f730be6e7196667925f5a9bfa153262e9 && cd ..
 ```
 
 [same-as-file]: <> (utils/build-mlir.cmd)

diff --git a/src/Conversion/ONNXToTOSA/Math/Conv2D.cpp b/src/Conversion/ONNXToTOSA/Math/Conv2D.cpp
@@ -41,7 +41,7 @@ Value createConvInGroups(PatternRewriter &rewriter, Operation *op,
     const llvm::ArrayRef<int64_t> weightShape, Value &newInput,
     Value &newWeight, Value &bias, const int64_t groups,
     DenseI64ArrayAttr &pads, DenseI64ArrayAttr &strides,
-    DenseI64ArrayAttr &dilations) {
+    DenseI64ArrayAttr &dilations, TypeAttr &accType) {
   // Set up constants outside of loop
   const int64_t sizeOfSliceInput = weightShape[1];
   const int64_t sizeOfSliceKernel = weightShape[0] / groups;
@@ -72,7 +72,7 @@ Value createConvInGroups(PatternRewriter &rewriter, Operation *op,
         mlir::cast<ShapedType>(resultType).getElementType());
     Value tempConv2D = tosa::CreateOpAndInfer<mlir::tosa::Conv2DOp>(rewriter,
         op->getLoc(), newConvOutputType, newSliceInput, newSliceWeight,
-        newSliceBias, pads, strides, dilations);
+        newSliceBias, pads, strides, dilations, accType);
     // Add value to vector
     sliceValues.push_back(tempConv2D);
   }
@@ -147,6 +147,8 @@ class ONNXConvOpLoweringToTOSA : public ConversionPattern {
     DenseI64ArrayAttr newPads =
         rewriter.getDenseI64ArrayAttr({pads[0], pads[2], pads[1], pads[3]});
 
+    TypeAttr accType = mlir::TypeAttr::get(rewriter.getF32Type());
+
     // Handle group parameter by creating multiple convs
     const int64_t group = adaptor.getGroup();
     Value conv2D = NULL;
@@ -157,11 +159,11 @@ class ONNXConvOpLoweringToTOSA : public ConversionPattern {
 
       conv2D = tosa::CreateOpAndInfer<mlir::tosa::Conv2DOp>(rewriter,
           convOp->getLoc(), newConvOutputType, newInput, newWeight, bias,
-          newPads, strides, dilations);
+          newPads, strides, dilations, accType);
     } else {
       conv2D = createConvInGroups(rewriter, convOp, tosaBuilder, resultType,
           weightShape, newInput, newWeight, bias, group, newPads, strides,
-          dilations);
+          dilations, accType);
     }
 
     // Convert output [N,OH,OW,OC] -> [N,OC,OH,OW]

diff --git a/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.cpp b/src/Conversion/ONNXToTOSA/ONNXToTOSALegalizeUtils.cpp
@@ -48,7 +48,7 @@ Value buildOnnxToTosaPaddingConstOp(mlir::PatternRewriter &rewriter,
 
   // Create a new pad vec in the right format
   // ONNX : [b1, b2, b3, b4, e1, e2, e3, e4]
-  // TOSA :[[b1, e1], [b2, e2], [b3, e3], [b4, e4]]
+  // TOSA :[b1, e1, b2, e2, b3, e3, b4, e4]
 
   // Adds any initial or last vals, not included in onnxPads.
   llvm::SmallVector<int64_t, 8> tosaPads{initialVals};
@@ -59,11 +59,9 @@ Value buildOnnxToTosaPaddingConstOp(mlir::PatternRewriter &rewriter,
     tosaPads.push_back(onnxPads[i + dimSize]);
   }
   tosaPads.insert(tosaPads.end(), lastVals.begin(), lastVals.end());
-
-  // TOSA format groups dimensions by 2.
-  const unsigned int numberOfDims = tosaPads.size() / 2;
   TosaBuilder tosaBuilder(rewriter, loc);
-  return tosaBuilder.getConst(tosaPads, {numberOfDims, 2});
+  return tosaBuilder.getConst(
+      tosaPads, {static_cast<int64_t>(tosaPads.size())});
 }
 
 } // namespace tosa

diff --git a/test/backend/inference_backend.py b/test/backend/inference_backend.py
@@ -847,36 +847,48 @@ def get_test_models():
         # "test_training_dropout_zero_ratio_mask_cpu": {STATIC_SHAPE:{}, DYNAMIC_SHAPE:{-1:{-1}}},
         # ==OP== DynamicQuantizeLinear
         # ==MIN== 11
-        "test_dynamicquantizelinear_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
-        "test_dynamicquantizelinear_expanded_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
-        "test_dynamicquantizelinear_max_adjusted_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
-        "test_dynamicquantizelinear_max_adjusted_expanded_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
-        "test_dynamicquantizelinear_min_adjusted_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
-        "test_dynamicquantizelinear_min_adjusted_expanded_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
+        # TODO uncomment when issue #3068 is resolved.
+        # "vector.shape_cast operation fails on s390x"
+        # "test_dynamicquantizelinear_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #   CONSTANT_INPUT: {-1},
+        # },
+        # TODO uncomment when issue #3068 is resolved.
+        # "vector.shape_cast operation fails on s390x"
+        # "test_dynamicquantizelinear_expanded_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
+        # TODO uncomment when issue #3068 is resolved.
+        # "vector.shape_cast operation fails on s390x"
+        # "test_dynamicquantizelinear_max_adjusted_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
+        # TODO uncomment when issue #3068 is resolved.
+        # "vector.shape_cast operation fails on s390x"
+        # "test_dynamicquantizelinear_max_adjusted_expanded_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
+        # TODO uncomment when issue #3068 is resolved.
+        # "vector.shape_cast operation fails on s390x"
+        # "test_dynamicquantizelinear_min_adjusted_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
+        # TODO uncomment when issue #3068 is resolved.
+        # "vector.shape_cast operation fails on s390x"
+        # "test_dynamicquantizelinear_min_adjusted_expanded_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
         # ==OP== Einsum
         # ==MIN== 12
         # ==LIM== Limited to the types supported by ReduceSum and MatMul (which we decompose to in most cases) which exclude integers with width < 32. `inputs` must have static dimensions.
@@ -2323,11 +2335,13 @@ def get_test_models():
         # ==MIN== 10
         # ==LIM== Does not support per-axis and i8 quantization. Does not support int4 and uint4.
         # "test_quantizelinear_axis_cpu": {STATIC_SHAPE:{}, DYNAMIC_SHAPE:{-1:{-1}}, CONSTANT_INPUT:{-1}},
-        "test_quantizelinear_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
+        # TODO uncomment when issue #3068 is resolved.
+        # "vector.shape_cast operation fails on s390x"
+        # "test_quantizelinear_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
         # ==OP== Range
         # ==MIN== 11
         "test_range_float_type_positive_delta_cpu": {
@@ -2815,11 +2829,13 @@ def get_test_models():
         },
         # ==OP== Round
         # ==MIN== 11
-        "test_round_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
+        # TODO uncomment when issue #3068 is resolved.
+        # "vector.shape_cast operation fails on s390x"
+        # "test_round_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
         # ==OP== Scan
         # ==MIN== 8
         # ==LIM== Does not support dynamic shapes. Does not support int4 and uint4.

diff --git a/test/mlir/conversion/krnl_to_llvm/krnl_math_function_lowering.mlir b/test/mlir/conversion/krnl_to_llvm/krnl_math_function_lowering.mlir
@@ -16,9 +16,7 @@ func.func @test_krnl_erf_lowering(%arg0: memref<10x10xf32>) -> memref<10x10xf32>
 
 // CHECK-LABEL: test_krnl_erf_lowering
 // CHECK: [[MEMREF_IN:%.+]] = llvm.insertvalue %arg6, {{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[BUILTIN_CAST_0:%.+]] = builtin.unrealized_conversion_cast [[MEMREF_IN]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<10x10xf32>
-// CHECK: [[BUILTIN_CAST_1:%.+]] = builtin.unrealized_conversion_cast [[BUILTIN_CAST_0]] : memref<10x10xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[DATA:%.+]] = llvm.extractvalue [[BUILTIN_CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: [[DATA:%.+]] = llvm.extractvalue [[MEMREF_IN]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: [[DATA_IN:%.+]] = llvm.getelementptr [[DATA]]{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: [[SCALAR_IN:%.+]] = llvm.load [[DATA_IN]] : !llvm.ptr
 // CHECK: [[ERF_RES:%.+]] = llvm.call @erff([[SCALAR_IN]]) : (f32) -> f32
@@ -41,9 +39,7 @@ func.func @test_krnl_acos_lowering(%arg0: memref<10x10xf32>) -> memref<10x10xf32
 
 // CHECK-LABEL: test_krnl_acos_lowering
 // CHECK: [[MEMREF_IN:%.+]] = llvm.insertvalue %arg6, {{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[BUILTIN_CAST_0:%.+]] = builtin.unrealized_conversion_cast [[MEMREF_IN]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<10x10xf32>
-// CHECK: [[BUILTIN_CAST_1:%.+]] = builtin.unrealized_conversion_cast [[BUILTIN_CAST_0]] : memref<10x10xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[DATA:%.+]] = llvm.extractvalue [[BUILTIN_CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: [[DATA:%.+]] = llvm.extractvalue [[MEMREF_IN]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: [[DATA_IN:%.+]] = llvm.getelementptr [[DATA]]{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: [[SCALAR_IN:%.+]] = llvm.load [[DATA_IN]] : !llvm.ptr
 // CHECK: [[ACOS_RES:%.+]] = llvm.call @acosf([[SCALAR_IN]]) : (f32) -> f32
@@ -66,9 +62,7 @@ func.func @test_krnl_acosh_lowering(%arg0: memref<10x10xf32>) -> memref<10x10xf3
 
 // CHECK-LABEL: test_krnl_acosh_lowering
 // CHECK: [[MEMREF_IN:%.+]] = llvm.insertvalue %arg6, {{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[BUILTIN_CAST_0:%.+]] = builtin.unrealized_conversion_cast [[MEMREF_IN]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<10x10xf32>
-// CHECK: [[BUILTIN_CAST_1:%.+]] = builtin.unrealized_conversion_cast [[BUILTIN_CAST_0]] : memref<10x10xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[DATA:%.+]] = llvm.extractvalue [[BUILTIN_CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: [[DATA:%.+]] = llvm.extractvalue [[MEMREF_IN]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: [[DATA_IN:%.+]] = llvm.getelementptr [[DATA]]{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: [[SCALAR_IN:%.+]] = llvm.load [[DATA_IN]] : !llvm.ptr
 // CHECK: [[ACOS_RES:%.+]] = llvm.call @acoshf([[SCALAR_IN]]) : (f32) -> f32
@@ -91,9 +85,7 @@ func.func @test_krnl_asin_lowering(%arg0: memref<10x10xf32>) -> memref<10x10xf32
 
 // CHECK-LABEL: test_krnl_asin_lowering
 // CHECK: [[MEMREF_IN:%.+]] = llvm.insertvalue %arg6, {{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[BUILTIN_CAST_0:%.+]] = builtin.unrealized_conversion_cast [[MEMREF_IN]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<10x10xf32>
-// CHECK: [[BUILTIN_CAST_1:%.+]] = builtin.unrealized_conversion_cast [[BUILTIN_CAST_0]] : memref<10x10xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[DATA:%.+]] = llvm.extractvalue [[BUILTIN_CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: [[DATA:%.+]] = llvm.extractvalue [[MEMREF_IN]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: [[DATA_IN:%.+]] = llvm.getelementptr [[DATA]]{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: [[SCALAR_IN:%.+]] = llvm.load [[DATA_IN]] : !llvm.ptr
 // CHECK: [[ACOS_RES:%.+]] = llvm.call @asinf([[SCALAR_IN]]) : (f32) -> f32
@@ -116,9 +108,7 @@ func.func @test_krnl_asinh_lowering(%arg0: memref<10x10xf32>) -> memref<10x10xf3
 
 // CHECK-LABEL: test_krnl_asinh_lowering
 // CHECK: [[MEMREF_IN:%.+]] = llvm.insertvalue %arg6, {{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[BUILTIN_CAST_0:%.+]] = builtin.unrealized_conversion_cast [[MEMREF_IN]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<10x10xf32>
-// CHECK: [[BUILTIN_CAST_1:%.+]] = builtin.unrealized_conversion_cast [[BUILTIN_CAST_0]] : memref<10x10xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[DATA:%.+]] = llvm.extractvalue [[BUILTIN_CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: [[DATA:%.+]] = llvm.extractvalue [[MEMREF_IN]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: [[DATA_IN:%.+]] = llvm.getelementptr [[DATA]]{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: [[SCALAR_IN:%.+]] = llvm.load [[DATA_IN]] : !llvm.ptr
 // CHECK: [[ACOS_RES:%.+]] = llvm.call @asinhf([[SCALAR_IN]]) : (f32) -> f32
@@ -141,9 +131,7 @@ func.func @test_krnl_atan_lowering(%arg0: memref<10x10xf32>) -> memref<10x10xf32
 
 // CHECK-LABEL: test_krnl_atan_lowering
 // CHECK: [[MEMREF_IN:%.+]] = llvm.insertvalue %arg6, {{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[BUILTIN_CAST_0:%.+]] = builtin.unrealized_conversion_cast [[MEMREF_IN]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<10x10xf32>
-// CHECK: [[BUILTIN_CAST_1:%.+]] = builtin.unrealized_conversion_cast [[BUILTIN_CAST_0]] : memref<10x10xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[DATA:%.+]] = llvm.extractvalue [[BUILTIN_CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: [[DATA:%.+]] = llvm.extractvalue [[MEMREF_IN]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: [[DATA_IN:%.+]] = llvm.getelementptr [[DATA]]{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: [[SCALAR_IN:%.+]] = llvm.load [[DATA_IN]] : !llvm.ptr
 // CHECK: [[ACOS_RES:%.+]] = llvm.call @atanf([[SCALAR_IN]]) : (f32) -> f32
@@ -165,9 +153,7 @@ func.func @test_krnl_atanh_lowering(%arg0: memref<10x10xf32>) -> memref<10x10xf3
 
 // CHECK-LABEL: test_krnl_atanh_lowering
 // CHECK: [[MEMREF_IN:%.+]] = llvm.insertvalue %arg6, {{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[BUILTIN_CAST_0:%.+]] = builtin.unrealized_conversion_cast [[MEMREF_IN]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<10x10xf32>
-// CHECK: [[BUILTIN_CAST_1:%.+]] = builtin.unrealized_conversion_cast [[BUILTIN_CAST_0]] : memref<10x10xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[DATA:%.+]] = llvm.extractvalue [[BUILTIN_CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: [[DATA:%.+]] = llvm.extractvalue [[MEMREF_IN]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: [[DATA_IN:%.+]] = llvm.getelementptr [[DATA]]{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: [[SCALAR_IN:%.+]] = llvm.load [[DATA_IN]] : !llvm.ptr
 // CHECK: [[ACOS_RES:%.+]] = llvm.call @atanhf([[SCALAR_IN]]) : (f32) -> f32
@@ -189,12 +175,9 @@ func.func @test_krnl_tan_lowering(%arg0: memref<10x10xf32>) -> memref<10x10xf32>
 
 // CHECK-LABEL: test_krnl_tan_lowering
 // CHECK: [[MEMREF_IN:%.+]] = llvm.insertvalue %arg6, {{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[BUILTIN_CAST_0:%.+]] = builtin.unrealized_conversion_cast [[MEMREF_IN]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<10x10xf32>
-// CHECK: [[BUILTIN_CAST_1:%.+]] = builtin.unrealized_conversion_cast [[BUILTIN_CAST_0]] : memref<10x10xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: [[DATA:%.+]] = llvm.extractvalue [[BUILTIN_CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: [[DATA:%.+]] = llvm.extractvalue [[MEMREF_IN]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: [[DATA_IN:%.+]] = llvm.getelementptr [[DATA]]{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: [[SCALAR_IN:%.+]] = llvm.load [[DATA_IN]] : !llvm.ptr
 // CHECK: [[ACOS_RES:%.+]] = llvm.call @tanf([[SCALAR_IN]]) : (f32) -> f32
 // CHECK: [[DATA_OUT:%.+]] = llvm.getelementptr {{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
 // CHECK: llvm.store [[ACOS_RES]], [[DATA_OUT]] : f32, !llvm.ptr
-