11// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-tile-root-and-fuse-producer-consumer{tiling-level=0}), cse)" --split-input-file %s | FileCheck %s
2- // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-tile-root-and-fuse-producer-consumer{tiling-level=0 tile-using-forall=true}), cse)" --split-input-file %s | FileCheck %s --check-prefix=CHECK-FORALL
32// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-tile-root-and-fuse-producer-consumer{tiling-level=2 only-fuse-producer-input-operands=true}), cse)" --split-input-file %s | FileCheck %s --check-prefix=CHECK-REDUCTION
43
54#config = #iree_codegen.lowering_config <tile_sizes = [[1 , 0 , 0 , 0 , 0 , 0 ], [1 , 0 , 0 , 0 , 0 , 0 ], [0 , 0 , 0 , 0 , 0 , 0 ], [1 , 0 , 0 , 16 , 16 , 0 ], [0 , 0 , 1 , 0 , 0 , 1 ], [0 , 0 , 0 , 0 , 0 , 0 ]]>
@@ -24,21 +23,12 @@ func.func @mmt4d_bias_relu(%arg0: tensor<?x?x16x1xf32>, %arg1: tensor<?x?x16x1xf
2423 return %4 : tensor <?x?x16 x16 xf32 >
2524}
2625// CHECK-LABEL: func.func @mmt4d_bias_relu(
27- // CHECK: scf.for
26+ // CHECK: scf.forall
2827// CHECK: linalg.fill
2928// CHECK-NEXT: %[[MMT4D:.+]] = linalg.mmt4d
3029// CHECK: %[[ELEM:.+]] = linalg.generic
31- // CHECK: %[[RES0:.+]] = tensor.insert_slice %[[MMT4D]]
32- // CHECK: %[[RES1:.+]] = tensor.insert_slice %[[ELEM]]
33- // CHECK: scf.yield %[[RES0]], %[[RES1]]
34-
35- // CHECK-FORALL-LABEL: func.func @mmt4d_bias_relu(
36- // CHECK-FORALL: scf.forall
37- // CHECK-FORALL: linalg.fill
38- // CHECK-FORALL-NEXT: %[[MMT4D:.+]] = linalg.mmt4d
39- // CHECK-FORALL: %[[ELEM:.+]] = linalg.generic
40- // CHECK-FORALL: scf.forall.in_parallel
41- // CHECK-FORALL: tensor.parallel_insert_slice %[[ELEM]]
30+ // CHECK: scf.forall.in_parallel
31+ // CHECK: tensor.parallel_insert_slice %[[ELEM]]
4232
4333// -----
4434
@@ -72,26 +62,15 @@ func.func @quantized_matmul(%arg0: tensor<2x4x128x16x1xi8>, %arg1: tensor<2x4x16
7262 %unpack = linalg.unpack %6 outer_dims_perm = [0 , 2 , 1 ] inner_dims_pos = [2 , 1 ] inner_tiles = [16 , 16 ] into %7 : tensor <2 x4 x688 x16 x16 xf32 > -> tensor <2 x11008 x64 xf32 >
7363 return %unpack : tensor <2 x11008 x64 xf32 >
7464}
75- // CHECK: func.func @quantized_matmul(
76- // CHECK: scf.for
77- // CHECK: linalg.generic
78- // CHECK: linalg.generic
79- // CHECK: linalg.fill
80- // CHECK: %[[MMT4D:.+]] = linalg.batch_mmt4d
81- // CHECK: %[[UNPACK:.+]] = linalg.unpack
82- // CHECK: %[[RES0:.+]] = tensor.insert_slice %[[MMT4D]]
83- // CHECK: %[[RES1:.+]] = tensor.insert_slice %[[UNPACK]]
84- // CHECK: scf.yield %[[RES0]], %[[RES1]]
85-
86- // CHECK-FORALL-LABEL: func.func @quantized_matmul(
87- // CHECK-FORALL: scf.forall
88- // CHECK-FORALL: linalg.generic
89- // CHECK-FORALL: linalg.generic
90- // CHECK-FORALL: linalg.fill
91- // CHECK-FORALL: %[[MMT4D:.+]] = linalg.batch_mmt4d
92- // CHECK-FORALL: %[[UNPACK:.+]] = linalg.unpack
93- // CHECK-FORALL: scf.forall.in_parallel
94- // CHECK-FORALL: tensor.parallel_insert_slice %[[UNPACK]]
65+ // CHECK-LABEL: func.func @quantized_matmul(
66+ // CHECK: scf.forall
67+ // CHECK: linalg.generic
68+ // CHECK: linalg.generic
69+ // CHECK: linalg.fill
70+ // CHECK: %[[MMT4D:.+]] = linalg.batch_mmt4d
71+ // CHECK: %[[UNPACK:.+]] = linalg.unpack
72+ // CHECK: scf.forall.in_parallel
73+ // CHECK: tensor.parallel_insert_slice %[[UNPACK]]
9574
9675// -----
9776
0 commit comments