@@ -1079,4 +1079,49 @@ module {
1079
1079
// CHECK-NOT: linalg.generic
1080
1080
// CHECK: tensor.expand_shape
1081
1081
// CHECK: linalg.generic {{.*}}, iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction"]}
1082
- // CHECK-SAME: ins(%[[ARG0]], %[[FUSED]]#1 : tensor<1x1x2x1xf32>, tensor<4x1x1x1xf32>)
1082
+ // CHECK-SAME: ins(%[[ARG0]], %[[FUSED]]#1 : tensor<1x1x2x1xf32>, tensor<4x1x1x1xf32>)
1083
+
1084
+ // -----
1085
+
1086
+ // CHECK-LABEL: @drop_unused_results
1087
+ // CHECK-SAME: [[ARG0:%[a-zA-Z0-9]+]]: tensor<64xf32>, [[ARG1:%[a-zA-Z0-9]+]]: tensor<1x56x56x64xf32>
1088
+ func.func @drop_unused_results (%arg0: tensor <64 xf32 >, %arg1: tensor <1 x56 x56 x64 xf32 >) -> tensor <1 x56 x56 x64 xf32 > {
1089
+ %cst = arith.constant 3.40282347E+38 : f32
1090
+ %cst_0 = arith.constant 0.000000e+00 : f32
1091
+ // CHECK: [[OUT:%[a-zA-Z0-9]+]] = tensor.empty() : tensor<1x56x56x64xf32>
1092
+ %0 = tensor.empty () : tensor <1 x56 x56 x64 xf32 >
1093
+ // CHECK: [[RES:%[0-9]+]] = linalg.generic {{.*}} ins([[ARG0]], [[ARG1]] : tensor<64xf32>, tensor<1x56x56x64xf32>) outs([[OUT]] : tensor<1x56x56x64xf32>)
1094
+ %1:2 = linalg.generic {index ing_maps = [affine_map <(d0 , d1 , d2 , d3 ) -> (d3 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>], iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]} ins (%arg0 : tensor <64 xf32 >) outs (%arg1 , %0 : tensor <1 x56 x56 x64 xf32 >, tensor <1 x56 x56 x64 xf32 >) {
1095
+ ^bb0 (%in: f32 , %out: f32 , %out_1: f32 ):
1096
+ %2 = arith.addf %in , %out : f32
1097
+ %3 = arith.minimumf %2 , %cst : f32
1098
+ %4 = arith.maximumf %3 , %cst_0 : f32
1099
+ linalg.yield %2 , %4 : f32 , f32
1100
+ } -> (tensor <1 x56 x56 x64 xf32 >, tensor <1 x56 x56 x64 xf32 >)
1101
+ // CHECK: -> tensor<1x56x56x64xf32>
1102
+ // CHECK: return [[RES]] : tensor<1x56x56x64xf32>
1103
+ return %1#1 : tensor <1 x56 x56 x64 xf32 >
1104
+ }
1105
+
1106
+ // -----
1107
+
1108
+ // CHECK-LABEL: @swap_drop_unused_results
1109
+ // CHECK-SAME: [[ARG0:%[a-zA-Z0-9]+]]: tensor<64xf32>, [[ARG1:%[a-zA-Z0-9]+]]: tensor<1x56x56x64xf32>
1110
+ func.func @swap_drop_unused_results (%arg0: tensor <64 xf32 >, %arg1: tensor <1 x56 x56 x64 xf32 >) -> tensor <1 x56 x56 x64 xf32 > {
1111
+ %cst = arith.constant 3.40282347E+38 : f32
1112
+ %cst_0 = arith.constant 0.000000e+00 : f32
1113
+ // CHECK: [[OUT:%[a-zA-Z0-9]+]] = tensor.empty() : tensor<1x56x56x64xf32>
1114
+ %0 = tensor.empty () : tensor <1 x56 x56 x64 xf32 >
1115
+ // CHECK: [[RES:%[0-9]+]] = linalg.generic {{.*}} ins([[ARG0]] : tensor<64xf32>) outs([[OUT]] : tensor<1x56x56x64xf32>)
1116
+ %1:2 = linalg.generic {index ing_maps = [affine_map <(d0 , d1 , d2 , d3 ) -> (d3 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>], iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]} ins (%arg0 : tensor <64 xf32 >) outs (%arg1 , %0 : tensor <1 x56 x56 x64 xf32 >, tensor <1 x56 x56 x64 xf32 >) {
1117
+ ^bb0 (%in: f32 , %out_1: f32 , %out: f32 ):
1118
+ %2 = arith.addf %in , %out : f32
1119
+ %3 = arith.minimumf %2 , %cst : f32
1120
+ %4 = arith.maximumf %3 , %cst_0 : f32
1121
+ linalg.yield %2 , %4 : f32 , f32
1122
+ } -> (tensor <1 x56 x56 x64 xf32 >, tensor <1 x56 x56 x64 xf32 >)
1123
+ // CHECK: -> tensor<1x56x56x64xf32>
1124
+ // CHECK: return [[RES]] : tensor<1x56x56x64xf32>
1125
+ return %1#0 : tensor <1 x56 x56 x64 xf32 >
1126
+ }
1127
+
0 commit comments