55module {
66 func.func @inferred_add_tensor (%3: tensor <64 x256 xf32 >, %4: tensor <64 x256 xf32 >, %5: tensor <64 x256 xf32 >) -> tensor <64 x256 xf32 >
77 attributes {
8- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 , 1 ] subgroup_size = 64 , {} >
8+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 , 1 ] subgroup_size = 64 >
99 } {
1010 %6 = linalg.generic {
1111 indexing_maps = [#map , #map , #map ],
1212 iterator_types = [" parallel" , " parallel" ]
13- } ins (%3 , %4 : tensor <64 x256 xf32 >, tensor <64 x256 xf32 >) outs (%5 : tensor <64 x256 xf32 >) attrs = {lowering_config = #config } {
13+ } ins (%3 , %4 : tensor <64 x256 xf32 >, tensor <64 x256 xf32 >) outs (%5 : tensor <64 x256 xf32 >) attrs = {lowering_config = #config } {
1414 ^bb0 (%in: f32 , %in_0: f32 , %out: f32 ):
1515 %7 = arith.addf %in , %in_0 : f32
1616 linalg.yield %7 : f32
@@ -32,12 +32,12 @@ module {
3232module {
3333 func.func @inferred_dynamic (%3: tensor <?x?xf32 >, %4: tensor <?x?xf32 >, %5: tensor <?x?xf32 >) -> tensor <?x?xf32 >
3434 attributes {
35- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 , 1 ] subgroup_size = 64 , {} >
35+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 , 1 ] subgroup_size = 64 >
3636 } {
3737 %6 = linalg.generic {
3838 indexing_maps = [#map , #map , #map ],
3939 iterator_types = [" parallel" , " parallel" ]
40- } ins (%3 , %4 : tensor <?x?xf32 >, tensor <?x?xf32 >) outs (%5 : tensor <?x?xf32 >) attrs = {lowering_config = #config } {
40+ } ins (%3 , %4 : tensor <?x?xf32 >, tensor <?x?xf32 >) outs (%5 : tensor <?x?xf32 >) attrs = {lowering_config = #config } {
4141 ^bb0 (%in: f32 , %in_0: f32 , %out: f32 ):
4242 %7 = arith.addf %in , %in_0 : f32
4343 linalg.yield %7 : f32
@@ -62,12 +62,12 @@ module {
6262module {
6363 func.func @inferred_small_inner_dim (%3: tensor <8 x2 xf32 >, %4: tensor <8 x2 xf32 >, %5: tensor <8 x2 xf32 >) -> tensor <8 x2 xf32 >
6464 attributes {
65- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 , 1 ] subgroup_size = 64 , {} >
65+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 , 1 ] subgroup_size = 64 >
6666 } {
6767 %6 = linalg.generic {
6868 indexing_maps = [#map , #map , #map ],
6969 iterator_types = [" parallel" , " parallel" ]
70- } ins (%3 , %4 : tensor <8 x2 xf32 >, tensor <8 x2 xf32 >) outs (%5 : tensor <8 x2 xf32 >) attrs = {lowering_config = #config } {
70+ } ins (%3 , %4 : tensor <8 x2 xf32 >, tensor <8 x2 xf32 >) outs (%5 : tensor <8 x2 xf32 >) attrs = {lowering_config = #config } {
7171 ^bb0 (%in: f32 , %in_0: f32 , %out: f32 ):
7272 %7 = arith.addf %in , %in_0 : f32
7373 linalg.yield %7 : f32
@@ -84,11 +84,10 @@ module {
8484
8585// -----
8686
87- #map = affine_map <(d0 , d1 ) -> (d0 , d1 )>
8887module {
8988 func.func @inferred_small_inner_dim_fill_vector_sizes (%0: tensor <4 x16 x8 x4 x16 x2 x4 xf16 >, %1: tensor <4 x16 x8 x4 x16 x2 x4 xf16 >) -> tensor <4 x16 x8 x4 x16 x2 x4 xf16 >
9089 attributes {
91- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [256 , 1 , 1 ] subgroup_size = 64 , {} >
90+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [256 , 1 , 1 ] subgroup_size = 64 >
9291 } {
9392 %2 = linalg.copy {lowering_config = #iree_gpu.derived_thread_config }
9493 ins (%0 : tensor <4 x16 x8 x4 x16 x2 x4 xf16 >)
@@ -105,12 +104,11 @@ module {
105104
106105// -----
107106
108- #map = affine_map <(d0 , d1 ) -> (d0 , d1 )>
109107module {
110108 func.func @inferred_small_inner_dim_dont_fill_non_contiguous (
111109 %0: tensor <4 x16 x4 x4 xf16 >, %1: tensor <4 x16 x4 x4 xf16 >) -> tensor <4 x16 x4 x4 xf16 >
112110 attributes {
113- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [64 , 1 , 1 ] subgroup_size = 64 , {} >
111+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [64 , 1 , 1 ] subgroup_size = 64 >
114112 } {
115113 %2 = linalg.copy {lowering_config = #iree_gpu.derived_thread_config }
116114 ins (%0 : tensor <4 x16 x4 x4 xf16 >)
@@ -127,11 +125,10 @@ module {
127125
128126// -----
129127
130- #map = affine_map <(d0 , d1 ) -> (d0 , d1 )>
131128module {
132129 func.func @inferred_unaligned (%0: tensor <70 xf16 >, %1: tensor <70 xf16 >) -> tensor <70 xf16 >
133130 attributes {
134- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [64 , 1 , 1 ] subgroup_size = 64 , {} >
131+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [64 , 1 , 1 ] subgroup_size = 64 >
135132 } {
136133 %2 = linalg.copy {lowering_config = #iree_gpu.derived_thread_config }
137134 ins (%0 : tensor <70 xf16 >)
@@ -148,11 +145,10 @@ module {
148145
149146// -----
150147
151- #map = affine_map <(d0 , d1 ) -> (d0 , d1 )>
152148module {
153149 func.func @inferred_smaller_load (%0: tensor <128 xf16 >, %1: tensor <128 xf16 >) -> tensor <128 xf16 >
154150 attributes {
155- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [64 , 1 , 1 ] subgroup_size = 64 , {} >
151+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [64 , 1 , 1 ] subgroup_size = 64 >
156152 } {
157153 %2 = linalg.copy {lowering_config = #iree_gpu.derived_thread_config }
158154 ins (%0 : tensor <128 xf16 >)
@@ -173,7 +169,7 @@ module {
173169module {
174170 func.func @inferred_im2col (%2: tensor <2 x34 x34 x128 xf16 >, %3: tensor <2 x128 x8 xf16 >) -> tensor <2 x128 x8 xf16 >
175171 attributes {
176- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 , 1 ] subgroup_size = 64 , {} >
172+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 , 1 ] subgroup_size = 64 >
177173 } {
178174 %4 = iree_linalg_ext.im2col {lowering_config = #config }
179175 strides = [1 , 1 ] dilations = [1 , 1 ] kernel_size = [3 , 3 ]
@@ -198,7 +194,7 @@ module {
198194module {
199195 func.func @inferred_im2col_batch_last (%2: tensor <16 x26 x18 x32 xbf16 >, %3: tensor <32 x1 x1 x32 xbf16 >) -> tensor <32 x1 x1 x32 xbf16 >
200196 attributes {
201- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [256 , 1 , 1 ] subgroup_size = 64 , {} >
197+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [256 , 1 , 1 ] subgroup_size = 64 >
202198 } {
203199 %4 = iree_linalg_ext.im2col {lowering_config = #config }
204200 strides = [1 , 1 ] dilations = [1 , 1 ] kernel_size = [24 , 16 ]
@@ -220,31 +216,29 @@ module {
220216// -----
221217
222218#config = #iree_gpu.derived_thread_config
223- func.func @scatter (%arg0: tensor <3 x32 x16 xf32 >, %arg1: tensor <3 x1 xi32 >) -> tensor <3 x32 x16 xf32 >
219+ func.func @scatter (%arg0: tensor <3 x32 x16 xf32 >, %arg1: tensor <3 x1 xi32 >, %arg2: tensor < 3 x 32 x 16 x f32 > ) -> tensor <3 x32 x16 xf32 >
224220 attributes {
225- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [64 , 1 , 1 ] subgroup_size = 64 , {} >
221+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [64 , 1 , 1 ] subgroup_size = 64 >
226222 } {
227- %cst = arith.constant 0.000000e+00 : f32
228- %0 = tensor.empty () : tensor <3 x32 x16 xf32 >
229223 %1 = iree_linalg_ext.scatter {lowering_config = #config } dimension_map = [0 ] unique_indices (true )
230- ins (%arg0 , %arg1 : tensor <3 x32 x16 xf32 >, tensor <3 x1 xi32 >) outs (%0 : tensor <3 x32 x16 xf32 >) {
231- ^bb0 (%arg2 : f32 , %arg3 : f32 ):
232- iree_linalg_ext.yield %arg2 : f32
224+ ins (%arg0 , %arg1 : tensor <3 x32 x16 xf32 >, tensor <3 x1 xi32 >) outs (%arg2 : tensor <3 x32 x16 xf32 >) {
225+ ^bb0 (%in : f32 , %out : f32 ):
226+ iree_linalg_ext.yield %in : f32
233227 } -> tensor <3 x32 x16 xf32 >
234228 return %1 : tensor <3 x32 x16 xf32 >
235229}
236230
237231// CHECK-LABEL: func.func @scatter
238232// CHECK: scf.forall ({{.*}}) = (0, 0, 0) to (3, 32, 16) step (1, 1, 4)
239- // CHECK: linalg_ext .scatter
233+ // CHECK: iree_linalg_ext .scatter
240234// CHECK: scf.forall.in_parallel
241235
242236// -----
243237
244238#config = #iree_gpu.derived_thread_config
245239func.func @map_scatter (%arg0: tensor <2 x32 xf32 >, %arg1: tensor <64 x256 xf32 >) -> tensor <64 x256 xf32 >
246240 attributes {
247- translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 ] subgroup_size = 64 , {} >
241+ translation_info = #iree_codegen.translation_info <pipeline = LLVMGPUTileAndFuse workgroup_size = [16 , 32 ] subgroup_size = 64 >
248242 } {
249243 %true = arith.constant true
250244 %1 = iree_linalg_ext.map_scatter {lowering_config = #config } %arg0 into %arg1 {
0 commit comments