@@ -38,14 +38,12 @@ define <16 x i32> @mul_i32(<16 x i8> %a, <16 x i8> %b) {
3838;
3939; CHECK-GI-LABEL: mul_i32:
4040; CHECK-GI: // %bb.0: // %entry
41- ; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
42- ; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
43- ; CHECK-GI-NEXT: ushll2 v4.8h, v0.16b, #0
44- ; CHECK-GI-NEXT: ushll2 v5.8h, v1.16b, #0
45- ; CHECK-GI-NEXT: umull v0.4s, v2.4h, v3.4h
46- ; CHECK-GI-NEXT: umull2 v1.4s, v2.8h, v3.8h
47- ; CHECK-GI-NEXT: umull v2.4s, v4.4h, v5.4h
48- ; CHECK-GI-NEXT: umull2 v3.4s, v4.8h, v5.8h
41+ ; CHECK-GI-NEXT: umull v2.8h, v0.8b, v1.8b
42+ ; CHECK-GI-NEXT: umull2 v3.8h, v0.16b, v1.16b
43+ ; CHECK-GI-NEXT: ushll v0.4s, v2.4h, #0
44+ ; CHECK-GI-NEXT: ushll2 v1.4s, v2.8h, #0
45+ ; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0
46+ ; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
4947; CHECK-GI-NEXT: ret
5048entry:
5149 %ea = zext <16 x i8 > %a to <16 x i32 >
@@ -75,26 +73,20 @@ define <16 x i64> @mul_i64(<16 x i8> %a, <16 x i8> %b) {
7573;
7674; CHECK-GI-LABEL: mul_i64:
7775; CHECK-GI: // %bb.0: // %entry
78- ; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
79- ; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
80- ; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
81- ; CHECK-GI-NEXT: ushll2 v1.8h, v1.16b, #0
82- ; CHECK-GI-NEXT: ushll v4.4s, v2.4h, #0
83- ; CHECK-GI-NEXT: ushll2 v5.4s, v2.8h, #0
84- ; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0
85- ; CHECK-GI-NEXT: ushll v6.4s, v0.4h, #0
86- ; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
87- ; CHECK-GI-NEXT: ushll v7.4s, v1.4h, #0
88- ; CHECK-GI-NEXT: ushll2 v16.4s, v0.8h, #0
89- ; CHECK-GI-NEXT: ushll2 v17.4s, v1.8h, #0
90- ; CHECK-GI-NEXT: umull v0.2d, v4.2s, v2.2s
91- ; CHECK-GI-NEXT: umull2 v1.2d, v4.4s, v2.4s
92- ; CHECK-GI-NEXT: umull v2.2d, v5.2s, v3.2s
93- ; CHECK-GI-NEXT: umull2 v3.2d, v5.4s, v3.4s
94- ; CHECK-GI-NEXT: umull v4.2d, v6.2s, v7.2s
95- ; CHECK-GI-NEXT: umull2 v5.2d, v6.4s, v7.4s
96- ; CHECK-GI-NEXT: umull v6.2d, v16.2s, v17.2s
97- ; CHECK-GI-NEXT: umull2 v7.2d, v16.4s, v17.4s
76+ ; CHECK-GI-NEXT: umull v2.8h, v0.8b, v1.8b
77+ ; CHECK-GI-NEXT: umull2 v0.8h, v0.16b, v1.16b
78+ ; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0
79+ ; CHECK-GI-NEXT: ushll2 v3.4s, v2.8h, #0
80+ ; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0
81+ ; CHECK-GI-NEXT: ushll2 v7.4s, v0.8h, #0
82+ ; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0
83+ ; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
84+ ; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
85+ ; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0
86+ ; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0
87+ ; CHECK-GI-NEXT: ushll2 v5.2d, v5.4s, #0
88+ ; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0
89+ ; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0
9890; CHECK-GI-NEXT: ret
9991entry:
10092 %ea = zext <16 x i8 > %a to <16 x i64 >
@@ -142,18 +134,12 @@ define <16 x i32> @mla_i32(<16 x i8> %a, <16 x i8> %b, <16 x i32> %c) {
142134;
143135; CHECK-GI-LABEL: mla_i32:
144136; CHECK-GI: // %bb.0: // %entry
145- ; CHECK-GI-NEXT: ushll v6.8h, v0.8b, #0
146- ; CHECK-GI-NEXT: ushll v7.8h, v1.8b, #0
147- ; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
148- ; CHECK-GI-NEXT: ushll2 v1.8h, v1.16b, #0
149- ; CHECK-GI-NEXT: umlal v2.4s, v6.4h, v7.4h
150- ; CHECK-GI-NEXT: umlal2 v3.4s, v6.8h, v7.8h
151- ; CHECK-GI-NEXT: umlal v4.4s, v0.4h, v1.4h
152- ; CHECK-GI-NEXT: umlal2 v5.4s, v0.8h, v1.8h
153- ; CHECK-GI-NEXT: mov v0.16b, v2.16b
154- ; CHECK-GI-NEXT: mov v1.16b, v3.16b
155- ; CHECK-GI-NEXT: mov v2.16b, v4.16b
156- ; CHECK-GI-NEXT: mov v3.16b, v5.16b
137+ ; CHECK-GI-NEXT: umull v6.8h, v0.8b, v1.8b
138+ ; CHECK-GI-NEXT: umull2 v7.8h, v0.16b, v1.16b
139+ ; CHECK-GI-NEXT: uaddw v0.4s, v2.4s, v6.4h
140+ ; CHECK-GI-NEXT: uaddw2 v1.4s, v3.4s, v6.8h
141+ ; CHECK-GI-NEXT: uaddw v2.4s, v4.4s, v7.4h
142+ ; CHECK-GI-NEXT: uaddw2 v3.4s, v5.4s, v7.8h
157143; CHECK-GI-NEXT: ret
158144entry:
159145 %ea = zext <16 x i8 > %a to <16 x i32 >
@@ -186,35 +172,21 @@ define <16 x i64> @mla_i64(<16 x i8> %a, <16 x i8> %b, <16 x i64> %c) {
186172;
187173; CHECK-GI-LABEL: mla_i64:
188174; CHECK-GI: // %bb.0: // %entry
189- ; CHECK-GI-NEXT: mov v16.16b, v2.16b
190- ; CHECK-GI-NEXT: mov v17.16b, v3.16b
191- ; CHECK-GI-NEXT: mov v2.16b, v4.16b
192- ; CHECK-GI-NEXT: mov v3.16b, v5.16b
193- ; CHECK-GI-NEXT: mov v4.16b, v6.16b
194- ; CHECK-GI-NEXT: mov v5.16b, v7.16b
195- ; CHECK-GI-NEXT: ushll v6.8h, v0.8b, #0
196- ; CHECK-GI-NEXT: ushll v7.8h, v1.8b, #0
197- ; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
198- ; CHECK-GI-NEXT: ushll2 v1.8h, v1.16b, #0
199- ; CHECK-GI-NEXT: ushll v18.4s, v6.4h, #0
200- ; CHECK-GI-NEXT: ushll v20.4s, v7.4h, #0
201- ; CHECK-GI-NEXT: ushll2 v19.4s, v6.8h, #0
202- ; CHECK-GI-NEXT: ushll v21.4s, v0.4h, #0
203- ; CHECK-GI-NEXT: ushll2 v22.4s, v7.8h, #0
204- ; CHECK-GI-NEXT: ushll v23.4s, v1.4h, #0
205- ; CHECK-GI-NEXT: ldp q6, q7, [sp]
206- ; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
207- ; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
208- ; CHECK-GI-NEXT: umlal v16.2d, v18.2s, v20.2s
209- ; CHECK-GI-NEXT: umlal2 v17.2d, v18.4s, v20.4s
210- ; CHECK-GI-NEXT: umlal v2.2d, v19.2s, v22.2s
211- ; CHECK-GI-NEXT: umlal2 v3.2d, v19.4s, v22.4s
212- ; CHECK-GI-NEXT: umlal v4.2d, v21.2s, v23.2s
213- ; CHECK-GI-NEXT: umlal2 v5.2d, v21.4s, v23.4s
214- ; CHECK-GI-NEXT: umlal v6.2d, v0.2s, v1.2s
215- ; CHECK-GI-NEXT: umlal2 v7.2d, v0.4s, v1.4s
216- ; CHECK-GI-NEXT: mov v0.16b, v16.16b
217- ; CHECK-GI-NEXT: mov v1.16b, v17.16b
175+ ; CHECK-GI-NEXT: umull v16.8h, v0.8b, v1.8b
176+ ; CHECK-GI-NEXT: umull2 v0.8h, v0.16b, v1.16b
177+ ; CHECK-GI-NEXT: ldp q19, q20, [sp]
178+ ; CHECK-GI-NEXT: ushll v1.4s, v16.4h, #0
179+ ; CHECK-GI-NEXT: ushll2 v16.4s, v16.8h, #0
180+ ; CHECK-GI-NEXT: ushll v17.4s, v0.4h, #0
181+ ; CHECK-GI-NEXT: ushll2 v18.4s, v0.8h, #0
182+ ; CHECK-GI-NEXT: uaddw v0.2d, v2.2d, v1.2s
183+ ; CHECK-GI-NEXT: uaddw2 v1.2d, v3.2d, v1.4s
184+ ; CHECK-GI-NEXT: uaddw v2.2d, v4.2d, v16.2s
185+ ; CHECK-GI-NEXT: uaddw2 v3.2d, v5.2d, v16.4s
186+ ; CHECK-GI-NEXT: uaddw v4.2d, v6.2d, v17.2s
187+ ; CHECK-GI-NEXT: uaddw2 v5.2d, v7.2d, v17.4s
188+ ; CHECK-GI-NEXT: uaddw v6.2d, v19.2d, v18.2s
189+ ; CHECK-GI-NEXT: uaddw2 v7.2d, v20.2d, v18.4s
218190; CHECK-GI-NEXT: ret
219191entry:
220192 %ea = zext <16 x i8 > %a to <16 x i64 >
0 commit comments