From e38a4fb430f6379c4b6cd90df6865ddd3c918705 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Sat, 7 Dec 2024 12:09:28 +0000 Subject: [PATCH 1/2] [mlir][nfc] Update vectorize-tensor-extract.mlir (3/N) Tests in "vectorize-tensor-extract.mlir" are inconsistent and would benefit from refactoring to: * Clearly categorize tests into "contiguous load," "gather load," and "scalar load + broadcast" cases, reflecting the structure of tensor.extract vectorization. * Unify variable naming (both MLIR and FileCheck). * Ensure all tests exercise unmasked vectorization (masked vectorization is covered in "vectorize-tensor-extract-masked.mlir"). * Improve and standardize formatting. These changes will make it easier to identify the test cases being exercised and simplify future maintenance or refactoring. This is patch 3/N in the series. Below is a summary of the changes in this patch. ---------------------------------------------------------------------- Summary of patch 3/N ---------------------------------------------------------------------- * Cluster all tests for "scalar load + broadcast" together * Unify MLIR and FileCheck variable names, e.g. `%input`, `%output` -> `%src`, `%init`. Note, I haven't changed test function names to make it easier to track changes (this PR is mostly about moving code). I will send a seperate PR to rename the tests. ---------------------------------------------------------------------- **DEPENDS ON** * https://github.com/llvm/llvm-project/pull/118977 * https://github.com/llvm/llvm-project/pull/119079 * https://github.com/llvm/llvm-project/pull/118977 Please only review the top commit --- .../Linalg/vectorize-tensor-extract.mlir | 269 +++++++++--------- 1 file changed, 137 insertions(+), 132 deletions(-) diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir index 25435cf51a615..0ee3898af8baa 100644 --- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir @@ -31,83 +31,6 @@ func.func @vectorize_1d_tensor_extract(%arg0: tensor<3xf32>, %arg1: tensor<4x3xi // ----- -#map = affine_map<() -> ()> -func.func @extract_scalar_from_0d_into_0d(%src: tensor, %init: tensor) -> tensor { - %res = linalg.generic { - indexing_maps = [#map], - iterator_types = [] - } outs(%init : tensor) { - ^bb0(%in: f32): - %1 = tensor.extract %src[] : tensor - linalg.yield %1 : f32 - } -> tensor - - return %res : tensor -} - -// CHECK-LABEL: func.func @extract_scalar_from_0d_into_0d( -// CHECK-SAME: %[[SRC:.*]]: tensor, -// CHECK-SAME: %[[INIT:.*]]: tensor) -> tensor { -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector -// CHECK: vector.transfer_write %[[READ]], %[[INIT]][] : vector, tensor - -// ----- - -#map = affine_map<(n) -> (n)> -func.func @extract_scalar_from_0d_into_1d(%src: tensor, %init: tensor<1xf32>) -> tensor<1xf32> { - %res = linalg.generic { - indexing_maps = [#map], - iterator_types = ["parallel"] - } outs(%init : tensor<1xf32>) { - ^bb0(%in: f32): - %1 = tensor.extract %src[] : tensor - linalg.yield %1 : f32 - } -> tensor<1xf32> - - return %res : tensor<1xf32> -} -// CHECK-LABEL: func.func @extract_scalar_from_0d_into_1d( -// CHECK-SAME: %[[SRC:.*]]: tensor, -// CHECK-SAME: %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> { -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector -// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1xf32> -// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32> - -// ----- - -#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> -func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { - %c0 = arith.constant 1 : index - %c1 = arith.constant 2 : index - - %res = linalg.generic { - indexing_maps = [#map], - iterator_types = ["parallel", "parallel", "parallel"] - } outs(%init : tensor<1x1x3xf32>) { - ^bb0(%arg4: f32): - %1 = tensor.extract %src[%c0, %c1] : tensor<3x3xf32> - linalg.yield %1 : f32 - } -> tensor<1x1x3xf32> - - return %res : tensor<1x1x3xf32> -} - -// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_scalar_broadcast( -// CHECK-SAME: %[[SRC:.*]]: tensor<3x3xf32>, -// CHECK-SAME: %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector -// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x3xf32> -// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> - -// ----- - #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func.func @vectorize_nd_tensor_extract_transfer_read_basic( %arg0: tensor<3x3x3xf32>, @@ -144,37 +67,6 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic( // CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[IDX1]], %[[IDX2]], %[[IDX3]]], %[[CST]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32> // CHECK: vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> -// Same as example above, but reading into a column tensor. - -// TODO: Currently this fails to vectorise when the indices are non-constant. - -func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( - %input: tensor<3x3x3xf32>, - %output: tensor<3x1x1xf32>) -> tensor<3x1x1xf32> { - - %c0 = arith.constant 0 : index - %res = linalg.generic { - indexing_maps = [#map], - iterator_types = ["parallel", "parallel", "parallel"] - } outs(%output : tensor<3x1x1xf32>) { - ^bb0(%out: f32): - %5 = tensor.extract %input[%c0, %c0, %c0] : tensor<3x3x3xf32> - linalg.yield %5 : f32 - } -> tensor<3x1x1xf32> - - return %res : tensor<3x1x1xf32> -} - -// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( -// CHECK-SAME: %[[INPUT:.*]]: tensor<3x3x3xf32>, -// CHECK-SAME: %[[OUTPUT:.*]]: tensor<3x1x1xf32>) -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector -// CHECK: %[[BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<3x1x1xf32> -// CHECK: %[[RES:.*]] = vector.transfer_write %[[BCAST]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32> -// CHECK: return %[[RES]] : tensor<3x1x1xf32> - // ----- func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16xf32>, %arg0: index, %arg2: index, %arg1: index, %arg4: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> { @@ -620,26 +512,6 @@ func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1: // ----- -#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> -func.func @vectorize_0d_tensor_extract(%arg0: tensor, %arg2: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { - %2 = linalg.generic { - indexing_maps = [#map1], - iterator_types = ["parallel", "parallel", "parallel"] - } outs(%arg2 : tensor<1x1x3xf32>) { - ^bb0(%arg4: f32): - %7 = tensor.extract %arg0[] : tensor - linalg.yield %7 : f32 - } -> tensor<1x1x3xf32> - return %2 : tensor<1x1x3xf32> -} - -// CHECK-LABEL: func.func @vectorize_0d_tensor_extract( -// CHECK-SAME: %[[ARG_0:.*]]: tensor -// CHECK: %[[EXTRACT:.*]] = vector.transfer_read %[[ARG_0]][], %{{.+}} : tensor -// CHECK: vector.broadcast %[[EXTRACT]] : vector to vector<1x1x3xf32> - -// ----- - #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0 + d1 + d2)> func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1: tensor<1x1x3xf32>, %arg2: index) -> tensor<1x1x3xf32> { @@ -674,17 +546,118 @@ func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1 // CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]] [%[[T3]]], %[[MASK]], %[[PASSTHRU]] // CHECK: vector.transfer_write %[[GATHER]] +//===----------------------------------------------------------------------===// +// Scalar load + broadcast +//===----------------------------------------------------------------------===// + // ----- -func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> { +#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { + %c0 = arith.constant 1 : index + %c1 = arith.constant 2 : index + + %res = linalg.generic { + indexing_maps = [#map], + iterator_types = ["parallel", "parallel", "parallel"] + } outs(%init : tensor<1x1x3xf32>) { + ^bb0(%arg4: f32): + %1 = tensor.extract %src[%c0, %c1] : tensor<3x3xf32> + linalg.yield %1 : f32 + } -> tensor<1x1x3xf32> + + return %res : tensor<1x1x3xf32> +} + +// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_scalar_broadcast( +// CHECK-SAME: %[[SRC:.*]]: tensor<3x3xf32>, +// CHECK-SAME: %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[C2:.*]] = arith.constant 2 : index +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector +// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x3xf32> +// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> + +// ----- + +#map = affine_map<() -> ()> +func.func @extract_scalar_from_0d_into_0d(%src: tensor, %init: tensor) -> tensor { + %res = linalg.generic { + indexing_maps = [#map], + iterator_types = [] + } outs(%init : tensor) { + ^bb0(%in: f32): + %1 = tensor.extract %src[] : tensor + linalg.yield %1 : f32 + } -> tensor + + return %res : tensor +} + +// CHECK-LABEL: func.func @extract_scalar_from_0d_into_0d( +// CHECK-SAME: %[[SRC:.*]]: tensor, +// CHECK-SAME: %[[INIT:.*]]: tensor) -> tensor { +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector +// CHECK: vector.transfer_write %[[READ]], %[[INIT]][] : vector, tensor + +// ----- + +#map = affine_map<(n) -> (n)> +func.func @extract_scalar_from_0d_into_1d(%src: tensor, %init: tensor<1xf32>) -> tensor<1xf32> { + %res = linalg.generic { + indexing_maps = [#map], + iterator_types = ["parallel"] + } outs(%init : tensor<1xf32>) { + ^bb0(%in: f32): + %1 = tensor.extract %src[] : tensor + linalg.yield %1 : f32 + } -> tensor<1xf32> + + return %res : tensor<1xf32> +} +// CHECK-LABEL: func.func @extract_scalar_from_0d_into_1d( +// CHECK-SAME: %[[SRC:.*]]: tensor, +// CHECK-SAME: %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> { +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector +// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1xf32> +// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32> + +// ----- + +#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @vectorize_0d_tensor_extract(%src: tensor, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { + %res = linalg.generic { + indexing_maps = [#map1], + iterator_types = ["parallel", "parallel", "parallel"] + } outs(%init : tensor<1x1x3xf32>) { + ^bb0(%arg4: f32): + %1 = tensor.extract %src[] : tensor + linalg.yield %1 : f32 + } -> tensor<1x1x3xf32> + return %res : tensor<1x1x3xf32> +} + +// CHECK-LABEL: func.func @vectorize_0d_tensor_extract( +// CHECK-SAME: %[[SRC:.*]]: tensor +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %{{.+}} : tensor +// CHECK: vector.broadcast %[[READ]] : vector to vector<1x1x3xf32> + +// ----- + +func.func @scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> { %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index %src = arith.constant dense<[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32> %res = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>], - iterator_types = ["parallel", "parallel", "parallel"]} - outs(%init : tensor<1x1x4xi32>) { + iterator_types = ["parallel", "parallel", "parallel"] + } outs(%init : tensor<1x1x4xi32>) { ^bb0(%out: i32): %idx = linalg.index 0 : index @@ -695,7 +668,7 @@ func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor return %res : tensor<1x1x4xi32> } -// CHECK-LABEL: func.func @vectorize_scalar_read_with_broadcast_from_column_tensor( +// CHECK-LABEL: func.func @scalar_read_with_broadcast_from_column_tensor // CHECK-SAME: %[[INIT:.*]]: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> { // CHECK: %[[PAD:.*]] = arith.constant 0 : i32 // CHECK: %[[C0:.*]] = arith.constant 0 : index @@ -705,3 +678,35 @@ func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{\[}}%[[IDX_ELT]], %[[C0]]], %[[PAD]] : tensor<15x1xi32>, vector // CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x4xi32> // CHECK: %[[RES:.*]] = vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x4xi32>, tensor<1x1x4xi32> + +// ----- + +// TODO: Currently this fails to vectorise when the indices are non-constant. + +#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( + %src: tensor<3x3x3xf32>, + %init: tensor<3x1x1xf32>) -> tensor<3x1x1xf32> { + + %c0 = arith.constant 0 : index + + %res = linalg.generic { + indexing_maps = [#map], + iterator_types = ["parallel", "parallel", "parallel"] + } outs(%init : tensor<3x1x1xf32>) { + ^bb0(%out: f32): + %1 = tensor.extract %src[%c0, %c0, %c0] : tensor<3x3x3xf32> + linalg.yield %1 : f32 + } -> tensor<3x1x1xf32> + + return %res : tensor<3x1x1xf32> +} + +// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( +// CHECK-SAME: %[[SRC:.*]]: tensor<3x3x3xf32>, +// CHECK-SAME: %[[INIT:.*]]: tensor<3x1x1xf32>) +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector +// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<3x1x1xf32> +// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32> From 9369a5d2d5407f6dfc84b500f97413e67942accb Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Tue, 10 Dec 2024 18:25:28 +0000 Subject: [PATCH 2/2] fixup! [mlir][nfc] Update vectorize-tensor-extract.mlir (3/N) Add DAG to FileCHeck prefixes --- .../Linalg/vectorize-tensor-extract.mlir | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir index 0ee3898af8baa..cfa5b5d569ca9 100644 --- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir @@ -572,10 +572,10 @@ func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, % // CHECK-LABEL: func.func @vectorize_nd_tensor_extract_scalar_broadcast( // CHECK-SAME: %[[SRC:.*]]: tensor<3x3xf32>, // CHECK-SAME: %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[C1:.*]] = arith.constant 1 : index -// CHECK: %[[C2:.*]] = arith.constant 2 : index -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector // CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x3xf32> // CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> @@ -621,8 +621,8 @@ func.func @extract_scalar_from_0d_into_1d(%src: tensor, %init: tensor<1xf32 // CHECK-LABEL: func.func @extract_scalar_from_0d_into_1d( // CHECK-SAME: %[[SRC:.*]]: tensor, // CHECK-SAME: %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> { -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector // CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1xf32> // CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32> @@ -670,10 +670,10 @@ func.func @scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32 // CHECK-LABEL: func.func @scalar_read_with_broadcast_from_column_tensor // CHECK-SAME: %[[INIT:.*]]: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> { -// CHECK: %[[PAD:.*]] = arith.constant 0 : i32 -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32> -// CHECK: %[[IDX_VEC:.*]] = arith.constant dense<0> : vector<1xindex> +// CHECK-DAG: %[[PAD:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32> +// CHECK-DAG: %[[IDX_VEC:.*]] = arith.constant dense<0> : vector<1xindex> // CHECK: %[[IDX_ELT:.*]] = vector.extract %[[IDX_VEC]][0] : index from vector<1xindex> // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{\[}}%[[IDX_ELT]], %[[C0]]], %[[PAD]] : tensor<15x1xi32>, vector // CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x4xi32>