@@ -453,3 +453,111 @@ util.func public @conv_1d_nhc_chf(%arg0: tensor<1x3x2xf32>, %arg1: tensor<2x2x2x
453453// CHECK-SAME: input_k_perm = [1, 0]
454454// CHECK-SAME: ins({{.*}} : tensor<1x3x2xf32>)
455455// CHECK-SAME: outs({{.*}} : tensor<1x2x4xf32>) -> tensor<1x2x4xf32>
456+
457+ // -----
458+
459+ util.func public @conv_2d_nhwgc_gfhwc (%arg0: tensor <2 x10 x10 x7 x4 xf32 >, %arg1: tensor <7 x16 x3 x3 x4 xf32 >, %arg2: tensor <2 x8 x8 x7 x16 xf32 >) -> tensor <2 x8 x8 x7 x16 xf32 > {
460+ %0 = linalg.conv_2d_nhwgc_gfhwc
461+ {dilations = dense <1 > : tensor <2 xi64 >, strides = dense <1 > : tensor <2 xi64 > }
462+ ins (%arg0 , %arg1: tensor <2 x10 x10 x7 x4 xf32 >, tensor <7 x16 x3 x3 x4 xf32 >)
463+ outs (%arg2: tensor <2 x8 x8 x7 x16 xf32 >) -> tensor <2 x8 x8 x7 x16 xf32 >
464+ util.return %0 : tensor <2 x8 x8 x7 x16 xf32 >
465+ }
466+ // n h w g f c
467+ // CHECK-DAG: #[[LHS_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d3, d1, d2, d5)>
468+ // CHECK-DAG: #[[RHS_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)>
469+ // CHECK-DAG: #[[OUT_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4)>
470+ // CHECK: util.func public @conv_2d_nhwgc_gfhwc(
471+ // CHECK-SAME: %[[IMG:.+]]: [[IMG_T:tensor<2x10x10x7x4xf32>]]
472+ // CHECK-SAME: %[[FIL:.+]]: [[FIL_T:tensor<7x16x3x3x4xf32>]]
473+ // CHECK-SAME: %[[OUT:.+]]: [[OUT_T:tensor<2x8x8x7x16xf32>]]
474+ // CHECK: %[[EMPTY:.+]] = tensor.empty() : [[LHS_T:tensor<2x7x8x8x36xf32>]]
475+ // CHECK: %[[IM2COL:.+]] = iree_linalg_ext.im2col
476+ // CHECK-SAME: strides = [1, 1] dilations = [1, 1] kernel_size = [3, 3]
477+ // CHECK-SAME: m_offset = [0, 0] * [8, 1] k_offset = [0] * [1]
478+ // CHECK-SAME: batch_pos = [0, 3] m_pos = [1, 2] k_pos = [4]
479+ // CHECK-SAME: input_k_perm = [0, 1, 2]
480+ // CHECK-SAME: ins(%[[IMG]] : [[IMG_T]])
481+ // CHECK-SAME: outs(%[[EMPTY]] : [[LHS_T]])
482+ // CHECK-DAG: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[FIL]] {{\[}}[0], [1], [2, 3, 4]] : [[FIL_T]] into [[RHS_T:tensor<7x16x36xf32>]]
483+ // CHECK: %[[MATMUL:.+]] = linalg.generic
484+ // CHECK-SAME: indexing_maps = [#[[LHS_MAP]], #[[RHS_MAP]], #[[OUT_MAP]]]
485+ // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction"]
486+ // CHECK-SAME: ins(%[[IM2COL]], %[[COLLAPSED]] : [[LHS_T]], [[RHS_T]])
487+ // CHECK-SAME: outs(%[[OUT]] : [[OUT_T]]) {
488+ // CHECK: }
489+ // CHECK: util.return %[[MATMUL]]
490+
491+ // -----
492+
493+ util.func public @conv_2d_ngchw_fgchw (%arg0: tensor <2 x7 x4 x10 x10 xf32 >, %arg1: tensor <16 x7 x4 x3 x3 xf32 >, %arg2: tensor <2 x7 x16 x8 x8 xf32 >) -> tensor <2 x7 x16 x8 x8 xf32 > {
494+ %0 = linalg.conv_2d_ngchw_fgchw
495+ {dilations = dense <1 > : tensor <2 xi64 >, strides = dense <1 > : tensor <2 xi64 > }
496+ ins (%arg0 , %arg1: tensor <2 x7 x4 x10 x10 xf32 >, tensor <16 x7 x4 x3 x3 xf32 >)
497+ outs (%arg2: tensor <2 x7 x16 x8 x8 xf32 >) -> tensor <2 x7 x16 x8 x8 xf32 >
498+ util.return %0 : tensor <2 x7 x16 x8 x8 xf32 >
499+ }
500+ // n g f h w c
501+ // CHECK-DAG: #[[LHS_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5)>
502+ // CHECK-DAG: #[[RHS_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4, d5)>
503+ // CHECK-DAG: #[[OUT_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4)>
504+ // CHECK: util.func public @conv_2d_ngchw_fgchw(
505+ // CHECK-SAME: %[[IMG:.+]]: [[IMG_T:tensor<2x7x4x10x10xf32>]]
506+ // CHECK-SAME: %[[FIL:.+]]: [[FIL_T:tensor<16x7x4x3x3xf32>]]
507+ // CHECK-SAME: %[[OUT:.+]]: [[OUT_T:tensor<2x7x16x8x8xf32>]]
508+ // CHECK: %[[EMPTY:.+]] = tensor.empty() : [[RHS_T:tensor<2x7x8x8x36xf32>]]
509+ // CHECK: %[[IM2COL:.+]] = iree_linalg_ext.im2col
510+ // CHECK-SAME: strides = [1, 1] dilations = [1, 1] kernel_size = [3, 3]
511+ // CHECK-SAME: m_offset = [0, 0] * [8, 1] k_offset = [0] * [1]
512+ // CHECK-SAME: batch_pos = [0, 1] m_pos = [3, 4] k_pos = [2]
513+ // CHECK-SAME: input_k_perm = [0, 1, 2]
514+ // CHECK-SAME: ins(%[[IMG]] : [[IMG_T]])
515+ // CHECK-SAME: outs(%[[EMPTY]] : [[LHS_T]])
516+ // CHECK-DAG: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[FIL]] {{\[}}[0], [1], [2, 3, 4]] : [[FIL_T]] into [[LHS_T:tensor<16x7x36xf32>]]
517+ // CHECK: %[[MATMUL:.+]] = linalg.generic
518+ // CHECK-SAME: indexing_maps = [#[[LHS_MAP]], #[[RHS_MAP]], #[[OUT_MAP]]]
519+ // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction"]
520+ // CHECK-SAME: ins(%[[COLLAPSED]], %[[IM2COL]] : [[LHS_T]], [[RHS_T]])
521+ // CHECK-SAME: outs(%[[OUT]] : [[OUT_T]]) {
522+ // CHECK: }
523+ // CHECK: util.return %[[MATMUL]]
524+
525+ // -----
526+ // n g h w f c kh kw
527+ #map = affine_map <(d0 , d1 , d2 , d3 , d4 , d5 , d6 , d7 ) -> (d0 , d1 , d5 , d3 + d6 , d4 + d7 )>
528+ #map1 = affine_map <(d0 , d1 , d2 , d3 , d4 , d5 , d6 , d7 ) -> (d2 , d1 , d5 , d6 , d7 )>
529+ #map2 = affine_map <(d0 , d1 , d2 , d3 , d4 , d5 , d6 , d7 ) -> (d1 , d0 , d2 , d3 , d4 )>
530+ // Output has 'n' and 'g' dimensions transposed.
531+ util.func public @conv_2d_ngchw_fgchw_gnfhw (%arg0: tensor <2 x7 x4 x10 x10 xf32 >, %arg1: tensor <16 x7 x4 x3 x3 xf32 >, %arg2: tensor <7 x2 x16 x8 x8 xf32 >) -> tensor <7 x2 x16 x8 x8 xf32 > {
532+ %0 = linalg.generic {
533+ indexing_maps = [#map , #map1 , #map2 ],
534+ iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" , " parallel" , " reduction" , " reduction" , " reduction" ]
535+ } ins (%arg0 , %arg1 : tensor <2 x7 x4 x10 x10 xf32 >, tensor <16 x7 x4 x3 x3 xf32 >) outs (%arg2 : tensor <7 x2 x16 x8 x8 xf32 >) {
536+ ^bb0 (%in: f32 , %in_0: f32 , %out: f32 ):
537+ %1 = arith.mulf %in , %in_0 : f32
538+ %2 = arith.addf %out , %1 : f32
539+ linalg.yield %2 : f32
540+ } -> tensor <7 x2 x16 x8 x8 xf32 >
541+ util.return %0 : tensor <7 x2 x16 x8 x8 xf32 >
542+ }
543+ // g n f h w c
544+ // CHECK-DAG: #[[LHS_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d0, d5)>
545+ // CHECK-DAG: #[[RHS_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d0, d3, d4, d5)>
546+ // CHECK-DAG: #[[OUT_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4)>
547+ // CHECK: util.func public @conv_2d_ngchw_fgchw_gnfhw(
548+ // CHECK-SAME: %[[IMG:.+]]: [[IMG_T:tensor<2x7x4x10x10xf32>]]
549+ // CHECK-SAME: %[[FIL:.+]]: [[FIL_T:tensor<16x7x4x3x3xf32>]]
550+ // CHECK-SAME: %[[OUT:.+]]: [[OUT_T:tensor<7x2x16x8x8xf32>]]
551+ // CHECK: %[[EMPTY:.+]] = tensor.empty() : [[RHS_T:tensor<2x7x8x8x36xf32>]]
552+ // CHECK: %[[IM2COL:.+]] = iree_linalg_ext.im2col
553+ // CHECK-SAME: batch_pos = [0, 1] m_pos = [3, 4] k_pos = [2]
554+ // CHECK-SAME: ins(%[[IMG]] : [[IMG_T]])
555+ // CHECK-SAME: outs(%[[EMPTY]] : [[RHS_T]])
556+ // CHECK-DAG: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[FIL]] {{\[}}[0], [1], [2, 3, 4]] : [[FIL_T]] into [[LHS_T:tensor<16x7x36xf32>]]
557+ // CHECK: %[[MATMUL:.+]] = linalg.generic
558+ // CHECK-SAME: indexing_maps = [#[[LHS_MAP]], #[[RHS_MAP]], #[[OUT_MAP]]]
559+ // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction"]
560+ // CHECK-SAME: ins(%[[COLLAPSED]], %[[IM2COL]] : [[LHS_T]], [[RHS_T]])
561+ // CHECK-SAME: outs(%[[OUT]] : [[OUT_T]]) {
562+ // CHECK: }
563+ // CHECK: util.return %[[MATMUL]]
0 commit comments