Skip to content

Commit acefc23

Browse files
authored
[Codegen] Test Cleanup 5/8: LLVMCPU tests (#22748)
Result of a scan over all tests in Codegen to cleanup common issues in tests. A summary of the results + a preamble approximating the issues to look for can be found here: https://gist.github.com/qedawkins/40f9e604fd83745bf1ac20fd63a7a61f
1 parent 2e40437 commit acefc23

22 files changed

+106
-182
lines changed

compiler/src/iree/compiler/Codegen/LLVMCPU/test/2d_scalable_to_1d_scalable.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,12 @@ func.func @scalable_2d_matmul_and_generic(%arg0: tensor<32400x32xf32>, %arg1: te
9191

9292
#lowering_config_parallel_only = #iree_cpu.lowering_config<distribution = [0, 0], vector_common_parallel = [[4], [4]]>
9393

94-
// CHECK: #[[GENERIC_CONFIG:.*]] = #iree_cpu.lowering_config<distribution = [0, 0], vector_common_parallel = [4, [4]]>
95-
///
96-
// CHECK: func.func @should_not_crash
94+
// CHECK: #[[$GENERIC_CONFIG:.*]] = #iree_cpu.lowering_config<distribution = [0, 0], vector_common_parallel = [4, [4]]>
95+
//
96+
// CHECK-LABEL: func.func @should_not_crash
9797
// CHECK: scf.for
9898
// CHECK: linalg.generic
99-
// CHECK-SAME: lowering_config = #[[GENERIC_CONFIG]]
99+
// CHECK-SAME: lowering_config = #[[$GENERIC_CONFIG]]
100100
func.func @should_not_crash(%a: tensor<?x?xf32>, %b: tensor<?xf32>, %c: tensor<?x?xf32>) -> tensor<?x?xf32> {
101101
%0 = linalg.generic {
102102
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],

compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,7 @@ func.func @dot_384x512x128_dispatch_0() {
5656
}
5757
return
5858
}
59-
// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 64)>
6059
// CHECK: func.func @dot_384x512x128_dispatch_0() {
61-
// CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
6260
// CHECK-DAG: %[[CST_VECTOR:.+]] = arith.constant dense<0.000000e+00> : vector<16x16xf32>
6361
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
6462
// CHECK-DAG: %[[C384:.+]] = arith.constant 384 : index
@@ -69,25 +67,24 @@ func.func @dot_384x512x128_dispatch_0() {
6967
// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index
7068
// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<384x512xf32>>
7169
// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<512x128xf32>>
72-
// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<384x128xf32>>
7370
// CHECK: %[[DST_TILE_INIT:.+]] = tensor.empty()
74-
// CHECK: scf.for %[[I_IDX:.+]] = {{.*}} to %[[C384]] step %{{[0-9]*}} {
71+
// CHECK: scf.for {{.+}} = {{.*}} to %[[C384]] step %{{[0-9]*}} {
7572
// CHECK: %[[LHS_TILE:.+]] = iree_tensor_ext.dispatch.tensor.load %[[LHS]], {{.*}} -> tensor<64x512xf32>
76-
// CHECK: scf.for %[[J_IDX:.+]] = {{.*}} to %[[C128]] step %{{[0-9]*}} {
73+
// CHECK: scf.for {{.+}} = {{.*}} to %[[C128]] step %{{[0-9]*}} {
7774
// CHECK: %[[RHS_TILE:.+]] = iree_tensor_ext.dispatch.tensor.load %[[RHS]], {{.*}} -> tensor<512x64xf32>
7875
// CHECK: {{.*}} = scf.for %[[L1_I:.+]] = %[[C0]] to %[[C64]] step %[[C16]]
7976
// CHECK-SAME: iter_args(%[[ITER0:.+]] = %[[DST_TILE_INIT]]) -> (tensor<64x64xf32>)
8077
// CHECK: {{.*}} = scf.for %[[L1_J:.+]] = %[[C0]] to %[[C64]] step %[[C16]]
8178
// CHECK-SAME: iter_args(%[[ITER1:.+]] = %[[ITER0]]) -> (tensor<64x64xf32>)
82-
// CHECK: %[[MATMUL_RES:.+]] = scf.for %[[L1_K:.+]] = %[[C0]] to %[[C512]] step %[[C32]]
83-
// CHECK-SAME: iter_args(%[[ITER2:.+]] = %[[CST_VECTOR]]) -> (vector<16x16xf32>)
79+
// CHECK: %[[MATMUL_RES:.+]] = scf.for {{.+}} = %[[C0]] to %[[C512]] step %[[C32]]
80+
// CHECK-SAME: iter_args({{.+}} = %[[CST_VECTOR]]) -> (vector<16x16xf32>)
8481
// CHECK-DAG: {{.*}} = tensor.extract %[[LHS_TILE]]
85-
// CHECK-DAD: {{.*}} = vector.transfer_read %[[RHS_TILE]]
82+
// CHECK-DAG: {{.*}} = vector.transfer_read %[[RHS_TILE]]
8683
// CHECK-COUNT-32: vector.fma
8784
// CHECK: scf.yield %{{.*}} : vector<16x16xf32>
8885
// CHECK: %[[EXP:.+]] = math.exp %[[MATMUL_RES]] : vector<16x16xf32>
89-
// CHECK: %[[RES:.+]] = vector.transfer_write %[[EXP]], %[[ITER1]][%[[L1_I]], %[[L1_J]]] {{.*}} : vector<16x16xf32>, tensor<64x64xf32>
90-
// CHECK: scf.yield %[[RES]]
86+
// CHECK: {{.+}} = vector.transfer_write %[[EXP]], %[[ITER1]][%[[L1_I]], %[[L1_J]]] {{.*}} : vector<16x16xf32>, tensor<64x64xf32>
87+
// CHECK: scf.yield {{.+}}
9188

9289
// -----
9390

compiler/src/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ func.func @dynamic_allocas(%arg0: index) {
99
// -----
1010

1111
// expected-error @+1 {{exceeded stack allocation limit of 32768 bytes for function. Got 65536 bytes}}
12-
func.func @static_big_allocas(%arg0: index) {
12+
func.func @static_big_allocas() {
1313
%0 = memref.alloca() : memref<16384xi32>
1414
return
1515
}
@@ -36,7 +36,7 @@ func.func @mix_static_and_dynamic_allocas(%arg0: index) {
3636

3737
// -----
3838

39-
func.func @non_entry_bb_allocas(%arg0: index) {
39+
func.func @non_entry_bb_allocas() {
4040
cf.br ^bb1
4141
^bb1() :
4242
// expected-error @+1 {{all stack allocations need to be hoisted to the entry block of the function}}

compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ builtin.module {
88
}
99
// CHECK: llvm.func @extern_public()
1010
// CHECK: llvm.func @entry_point(
11-
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef},
12-
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef},
13-
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef}) -> i32
11+
// CHECK-SAME: %{{[A-Za-z0-9_]+}}: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef},
12+
// CHECK-SAME: %{{[A-Za-z0-9_]+}}: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef},
13+
// CHECK-SAME: %{{[A-Za-z0-9_]+}}: !llvm.ptr {llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef}) -> i32
1414
// CHECK: llvm.return %{{.+}} : i32
1515

1616
// -----
@@ -39,7 +39,7 @@ module {
3939
// CHECK-DAG: %[[PROCESSOR_INFO:.+]] = llvm.load %arg2
4040
// CHECK: %[[PROCESSOR_ID:.+]] = llvm.extractvalue %[[PROCESSOR_INFO]][4]
4141
// CHECK: %[[VAL:.+]] = llvm.call @default_cconv_with_extra_fields
42-
// CHECK-SAME: (%[[ALLOCA]], %[[Ci32]], %[[Cf64]], %[[DATA]], %[[PROCESSOR_ID]])
42+
// CHECK-SAME: (%[[ALLOCA]], %[[Ci32]], %[[Cf64]], %[[DATA]], %[[PROCESSOR_ID]])
4343

4444
// -----
4545

@@ -51,9 +51,6 @@ func.func @interleave_and_bitcast_lowering() {
5151
%cst = arith.constant dense<4> : vector<4x2xi8>
5252
%cst_0 = arith.constant dense<0> : vector<4x4xi4>
5353
%c0 = arith.constant 0 : index
54-
%c1 = arith.constant 1 : index
55-
%c2 = arith.constant 2 : index
56-
%c3 = arith.constant 3 : index
5754
%c4096 = arith.constant 4096 : index
5855
%c8192 = arith.constant 8192 : index
5956
%0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c4096) flags(ReadOnly) : memref<128xi8, strided<[1], offset: ?>>

compiler/src/iree/compiler/Codegen/LLVMCPU/test/expand_f16_op_to_f32.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ func.func @maximumf(%arg0: tensor<4xf16>, %arg1: tensor<4xf16>, %arg2: tensor<4x
2020
// CHECK: %[[LHS:.*]] = arith.extf %{{.+}} : f16 to f32
2121
// CHECK: %[[RHS:.*]] = arith.extf %{{.+}} : f16 to f32
2222
// CHECK: %[[MAX:.*]] = arith.maximumf %[[LHS]], %[[RHS]] : f32
23-
// CHECK: %[[TRUNC:.*]] = arith.truncf %[[MAX]] : f32 to f16
24-
// CHECK: linalg.yield %[[TRUNC:.*]] : f16
25-
// CHECK: return %[[GEN:.*]] : tensor<4xf16>
23+
// CHECK: %[[TRUNC:.+]] = arith.truncf %[[MAX]] : f32 to f16
24+
// CHECK: linalg.yield %[[TRUNC]] : f16
25+
// CHECK: return %[[GEN]] : tensor<4xf16>

compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#hal.pipeline.binding<storage_buffer>
66
]>
77

8-
// CHECK-LABEL: llvm.func @binding_ptrs(
8+
// CHECK-LABEL: llvm.func @binding_ptrs
99
func.func @binding_ptrs() {
1010
// CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(2
1111
// CHECK-DAG: %[[C5:.+]] = llvm.mlir.constant(5
@@ -43,7 +43,7 @@ llvm.func @sink(%arg0: f32) {
4343
#hal.pipeline.binding<storage_buffer>
4444
]>
4545

46-
// CHECK-LABEL: llvm.func @binding_ptrs_dynamic(
46+
// CHECK-LABEL: llvm.func @binding_ptrs_dynamic
4747
func.func @binding_ptrs_dynamic() {
4848
// CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(1 :
4949
// CHECK-DAG: %[[C8:.+]] = llvm.mlir.constant(8 :
@@ -68,7 +68,7 @@ func.func @binding_ptrs_dynamic() {
6868
// CHECK: %[[DIM2_PTR:.+]] = llvm.getelementptr %[[CONSTANT_BASEPTR0]][3]
6969
// CHECK: %[[DIM2:.+]] = llvm.load %[[DIM2_PTR]]
7070
// CHECK: %[[DIM2_ZEXT:.+]] = llvm.zext %[[DIM2]]
71-
%dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1): index
71+
%dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
7272
%dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
7373
%dim2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
7474

@@ -109,7 +109,7 @@ llvm.func @sink(%arg0: f32) {
109109
#hal.pipeline.binding<storage_buffer>
110110
]>
111111

112-
// CHECK-LABEL: llvm.func @binding_ptrs_sub_byte_dynamic(
112+
// CHECK-LABEL: llvm.func @binding_ptrs_sub_byte_dynamic
113113
func.func @binding_ptrs_sub_byte_dynamic() {
114114
// CHECK-DAG: %[[C8:.+]] = llvm.mlir.constant(8 :
115115
// CHECK-DAG: %[[C4:.+]] = llvm.mlir.constant(4 :
@@ -119,7 +119,7 @@ func.func @binding_ptrs_sub_byte_dynamic() {
119119
// CHECK: %[[OFFSET:.+]] = llvm.load %[[CONSTANT_BASEPTR]]
120120
// CHECK: %[[OFFSET_ZEXT:.+]] = llvm.zext %[[OFFSET]]
121121
%offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
122-
%dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1): index
122+
%dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
123123

124124
// CHECK: %[[STATE3:.+]] = llvm.load %arg1
125125
// CHECK: %[[BINDING_PTRS:.+]] = llvm.extractvalue %[[STATE3]][10]

compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --verify-diagnostics --split-input-file %s
22

3-
#config = #iree_cpu.lowering_config<distribution = [64, 64], vector_common_parallel = [8, 32, 16], vector_reduction = [0, 0, 16], vector_inner_parallel = [0, 0, 0]>
3+
#config = #iree_cpu.lowering_config<distribution = [64, 64], vector_common_parallel = [8, 32, 16], vector_reduction = [0, 0, 16]>
44
#translation = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert>
55
func.func @illegal_parallel_tile_sizes_config(%0: memref<4x8xf32>, %1: memref<8x16xf32>, %2: memref<4x16xf32>) attributes {
66
translation_info = #translation
@@ -12,7 +12,7 @@ func.func @illegal_parallel_tile_sizes_config(%0: memref<4x8xf32>, %1: memref<8x
1212

1313
// -----
1414

15-
#config = #iree_cpu.lowering_config<distribution = [64, 64], vector_common_parallel = [8, 0, 0], vector_reduction = [0, 16, 16], vector_inner_parallel = [0, 0, 0]>
15+
#config = #iree_cpu.lowering_config<distribution = [64, 64], vector_common_parallel = [8, 0, 0], vector_reduction = [0, 16, 16]>
1616
#translation = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert>
1717
func.func @illegal_reduction_tile_sizes_config(%0: memref<4x8xf32>, %1: memref<8x16xf32>, %2: memref<4x16xf32>) attributes {
1818
translation_info = #translation
@@ -24,7 +24,7 @@ func.func @illegal_reduction_tile_sizes_config(%0: memref<4x8xf32>, %1: memref<8
2424

2525
// -----
2626

27-
#config = #iree_cpu.lowering_config<distribution = {sizes = [4, 8], interchange = [1]}, vector_common_parallel = [8, 8, 0], vector_reduction = [0, 0, 8], vector_inner_parallel = [0, 0, 0]>
27+
#config = #iree_cpu.lowering_config<distribution = {sizes = [4, 8], interchange = [1]}, vector_common_parallel = [8, 8, 0], vector_reduction = [0, 0, 8]>
2828
#translation = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert>
2929
func.func @illegal_interchange(%0: memref<4x8xf32>, %1: memref<8x16xf32>, %2: memref<4x16xf32>) attributes {
3030
translation_info = #translation

compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,11 @@ func.func @peel_static_matmul(%arg0: tensor<128x49xf32>, %arg1: tensor<49x512xf3
6666
// CHECK: linalg.matmul {{.*}} outs(%[[T0]] : tensor<8x32xf32>) -> tensor<8x32xf32>
6767
// CHECK: scf.for
6868
// CHECK: linalg.fill {{.*}} -> tensor<8x?xf32>
69-
// CHECK: %[[T1:.+]] = scf.for
69+
// CHECK: scf.for
7070
// CHECK: linalg.matmul {{.*}} tensor<8x?xf32>
7171
// CHECK: scf.for
7272
// CHECK: linalg.fill {{.*}} -> tensor<?x?xf32>
73-
// CHECK: %[[T2:.+]] = scf.for
73+
// CHECK: scf.for
7474
// CHECK: linalg.matmul {{.*}} tensor<?x?xf32>
7575

7676
// -----

compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir

Lines changed: 11 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,27 @@
11
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s
22

3-
#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
3+
#pipeline_layout = #hal.pipeline.layout<bindings = [
44
#hal.pipeline.binding<storage_buffer>,
55
#hal.pipeline.binding<storage_buffer>,
66
#hal.pipeline.binding<storage_buffer>
77
]>
88
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
99
func.func @pad_conv_2d_nchw_fchw_1x320x64x64x320x3x3() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
1010
%cst = arith.constant 0.000000e+00 : f32
11-
%c1 = arith.constant 1 : index
1211
%c0 = arith.constant 0 : index
13-
%c5243520 = arith.constant 5243520 : index
14-
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
15-
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
16-
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : i32
17-
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i32
18-
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : i32
19-
%5 = arith.index_castui %0 {stream.alignment = 128 : index, stream.values = [10486400 : index, 15729280 : index]} : i32 to index
20-
%6 = arith.index_castui %1 {stream.alignment = 256 : index, stream.values = [1273222400 : index, 1280618240 : index]} : i32 to index
21-
%7 = arith.index_castui %2 {stream.alignment = 256 : index, stream.values = [10507520 : index, 21488640 : index]} : i32 to index
22-
%8 = arith.index_castui %3 {stream.alignment = 256 : index, stream.values = [10508800 : index, 21489920 : index]} : i32 to index
23-
%9 = arith.index_castui %4 {stream.alignment = 128 : index, stream.values = [10486400 : index, 10487680 : index]} : i32 to index
24-
%10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c5243520) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320x64x64xf32>>
25-
%11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<320x320x3x3xf32>>
26-
%12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320xf32>>
27-
%13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320xf32>>
28-
%14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320xf32>>
29-
%15 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%9) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<1x320x64x64xf32>>
30-
%16 = iree_tensor_ext.dispatch.tensor.load %10, offsets = [0, 0, 0, 0], sizes = [1, 320, 64, 64], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320x64x64xf32>> -> tensor<1x320x64x64xf32>
31-
%17 = iree_tensor_ext.dispatch.tensor.load %11, offsets = [0, 0, 0, 0], sizes = [320, 320, 3, 3], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<320x320x3x3xf32>> -> tensor<320x320x3x3xf32>
32-
%18 = iree_tensor_ext.dispatch.tensor.load %12, offsets = [0, 0], sizes = [1, 320], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320xf32>> -> tensor<1x320xf32>
33-
%19 = iree_tensor_ext.dispatch.tensor.load %13, offsets = [0, 0], sizes = [1, 320], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320xf32>> -> tensor<1x320xf32>
34-
%20 = iree_tensor_ext.dispatch.tensor.load %14, offsets = [0, 0], sizes = [1, 320], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320xf32>> -> tensor<1x320xf32>
35-
%21 = tensor.empty() : tensor<1x320x64x64xf32>
36-
%22 = linalg.fill ins(%cst : f32) outs(%21 : tensor<1x320x64x64xf32>) -> tensor<1x320x64x64xf32>
37-
%padded = tensor.pad %16 low[0, 0, 1, 1] high[0, 0, 1, 1] {
12+
%0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320x64x64xf32>>
13+
%1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<320x320x3x3xf32>>
14+
%2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<1x320x64x64xf32>>
15+
%3 = iree_tensor_ext.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 320, 64, 64], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x320x64x64xf32>> -> tensor<1x320x64x64xf32>
16+
%4 = iree_tensor_ext.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [320, 320, 3, 3], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<320x320x3x3xf32>> -> tensor<320x320x3x3xf32>
17+
%5 = tensor.empty() : tensor<1x320x64x64xf32>
18+
%6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<1x320x64x64xf32>) -> tensor<1x320x64x64xf32>
19+
%padded = tensor.pad %3 low[0, 0, 1, 1] high[0, 0, 1, 1] {
3820
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index):
3921
tensor.yield %cst : f32
4022
} : tensor<1x320x64x64xf32> to tensor<1x320x66x66xf32>
41-
%23 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded, %17 : tensor<1x320x66x66xf32>, tensor<320x320x3x3xf32>) outs(%22 : tensor<1x320x64x64xf32>) -> tensor<1x320x64x64xf32>
42-
iree_tensor_ext.dispatch.tensor.store %23, %15, offsets = [0, 0, 0, 0], sizes = [1, 320, 64, 64], strides = [1, 1, 1, 1] : tensor<1x320x64x64xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<1x320x64x64xf32>>
23+
%7 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded, %4 : tensor<1x320x66x66xf32>, tensor<320x320x3x3xf32>) outs(%6 : tensor<1x320x64x64xf32>) -> tensor<1x320x64x64xf32>
24+
iree_tensor_ext.dispatch.tensor.store %7, %2, offsets = [0, 0, 0, 0], sizes = [1, 320, 64, 64], strides = [1, 1, 1, 1] : tensor<1x320x64x64xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<1x320x64x64xf32>>
4325
return
4426
}
4527

compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ func.func @pad_only_dispatch() attributes {hal.executable.target = #executable_t
5555
#map1 = affine_map<(d0, d1, d2, d3) -> (d3)>
5656
func.func @pad_with_producer_dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
5757
%c0 = arith.constant 0 : index
58-
%cst = arith.constant 1.001000e-05 : f32
5958
%cst_0 = arith.constant 0.000000e+00 : f32
6059
%0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x56x56x256xf32>>
6160
%1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x1x256x128xf32>>

0 commit comments

Comments
 (0)