@@ -14,19 +14,20 @@ target triple = "aarch64"
1414define %"class.std::complex" @complex_mul_v2f64 (ptr %a , ptr %b ) {
1515; CHECK-LABEL: complex_mul_v2f64:
1616; CHECK: // %bb.0: // %entry
17- ; CHECK-NEXT: movi v0.2d, #0000000000000000
1817; CHECK-NEXT: movi v1.2d, #0000000000000000
1918; CHECK-NEXT: mov w8, #100 // =0x64
20- ; CHECK-NEXT: whilelo p1.d, xzr, x8
2119; CHECK-NEXT: cntd x9
20+ ; CHECK-NEXT: whilelo p1.d, xzr, x8
2221; CHECK-NEXT: rdvl x10, #2
23- ; CHECK-NEXT: ptrue p0.d
2422; CHECK-NEXT: mov x11, x9
23+ ; CHECK-NEXT: ptrue p0.d
24+ ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
25+ ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
2526; CHECK-NEXT: .LBB0_1: // %vector.body
2627; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
2728; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
28- ; CHECK-NEXT: mov z6.d, z0 .d
29- ; CHECK-NEXT: mov z7.d, z1 .d
29+ ; CHECK-NEXT: mov z6.d, z1 .d
30+ ; CHECK-NEXT: mov z7.d, z0 .d
3031; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
3132; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
3233; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
@@ -38,14 +39,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
3839; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
3940; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
4041; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
41- ; CHECK-NEXT: mov z1 .d, p2/m, z7.d
42- ; CHECK-NEXT: mov z0 .d, p1/m, z6.d
42+ ; CHECK-NEXT: mov z0 .d, p2/m, z7.d
43+ ; CHECK-NEXT: mov z1 .d, p1/m, z6.d
4344; CHECK-NEXT: whilelo p1.d, x11, x8
4445; CHECK-NEXT: add x11, x11, x9
4546; CHECK-NEXT: b.mi .LBB0_1
4647; CHECK-NEXT: // %bb.2: // %exit.block
47- ; CHECK-NEXT: uzp1 z2.d, z0 .d, z1 .d
48- ; CHECK-NEXT: uzp2 z1.d, z0 .d, z1 .d
48+ ; CHECK-NEXT: uzp1 z2.d, z1 .d, z0 .d
49+ ; CHECK-NEXT: uzp2 z1.d, z1 .d, z0 .d
4950; CHECK-NEXT: faddv d0, p0, z2.d
5051; CHECK-NEXT: faddv d1, p0, z1.d
5152; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -110,20 +111,21 @@ exit.block: ; preds = %vector.body
110111define %"class.std::complex" @complex_mul_predicated_v2f64 (ptr %a , ptr %b , ptr %cond ) {
111112; CHECK-LABEL: complex_mul_predicated_v2f64:
112113; CHECK: // %bb.0: // %entry
113- ; CHECK-NEXT: movi v0.2d, #0000000000000000
114114; CHECK-NEXT: movi v1.2d, #0000000000000000
115115; CHECK-NEXT: cntd x9
116- ; CHECK-NEXT: neg x10, x9
117116; CHECK-NEXT: mov w11, #100 // =0x64
117+ ; CHECK-NEXT: neg x10, x9
118118; CHECK-NEXT: ptrue p0.d
119119; CHECK-NEXT: mov x8, xzr
120120; CHECK-NEXT: and x10, x10, x11
121121; CHECK-NEXT: rdvl x11, #2
122+ ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
123+ ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
122124; CHECK-NEXT: .LBB1_1: // %vector.body
123125; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
124126; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
125- ; CHECK-NEXT: mov z6.d, z0 .d
126- ; CHECK-NEXT: mov z7.d, z1 .d
127+ ; CHECK-NEXT: mov z6.d, z1 .d
128+ ; CHECK-NEXT: mov z7.d, z0 .d
127129; CHECK-NEXT: add x8, x8, x9
128130; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
129131; CHECK-NEXT: cmp x10, x8
@@ -139,12 +141,12 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
139141; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
140142; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
141143; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
142- ; CHECK-NEXT: mov z1 .d, p2/m, z7.d
143- ; CHECK-NEXT: mov z0 .d, p1/m, z6.d
144+ ; CHECK-NEXT: mov z0 .d, p2/m, z7.d
145+ ; CHECK-NEXT: mov z1 .d, p1/m, z6.d
144146; CHECK-NEXT: b.ne .LBB1_1
145147; CHECK-NEXT: // %bb.2: // %exit.block
146- ; CHECK-NEXT: uzp1 z2.d, z0 .d, z1 .d
147- ; CHECK-NEXT: uzp2 z1.d, z0 .d, z1 .d
148+ ; CHECK-NEXT: uzp1 z2.d, z1 .d, z0 .d
149+ ; CHECK-NEXT: uzp2 z1.d, z1 .d, z0 .d
148150; CHECK-NEXT: faddv d0, p0, z2.d
149151; CHECK-NEXT: faddv d1, p0, z1.d
150152; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -211,20 +213,21 @@ exit.block: ; preds = %vector.body
211213define %"class.std::complex" @complex_mul_predicated_x2_v2f64 (ptr %a , ptr %b , ptr %cond ) {
212214; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
213215; CHECK: // %bb.0: // %entry
214- ; CHECK-NEXT: movi v0.2d, #0000000000000000
215216; CHECK-NEXT: movi v1.2d, #0000000000000000
216217; CHECK-NEXT: mov w8, #100 // =0x64
217- ; CHECK-NEXT: whilelo p1.d, xzr, x8
218218; CHECK-NEXT: cntd x9
219+ ; CHECK-NEXT: whilelo p1.d, xzr, x8
219220; CHECK-NEXT: rdvl x10, #2
220- ; CHECK-NEXT: ptrue p0.d
221221; CHECK-NEXT: cnth x11
222+ ; CHECK-NEXT: ptrue p0.d
222223; CHECK-NEXT: mov x12, x9
224+ ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
225+ ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
223226; CHECK-NEXT: .LBB2_1: // %vector.body
224227; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
225228; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
226- ; CHECK-NEXT: mov z6.d, z0 .d
227- ; CHECK-NEXT: mov z7.d, z1 .d
229+ ; CHECK-NEXT: mov z6.d, z1 .d
230+ ; CHECK-NEXT: mov z7.d, z0 .d
228231; CHECK-NEXT: add x2, x2, x11
229232; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
230233; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
@@ -240,14 +243,14 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
240243; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
241244; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
242245; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
243- ; CHECK-NEXT: mov z1 .d, p2/m, z7.d
244- ; CHECK-NEXT: mov z0 .d, p1/m, z6.d
246+ ; CHECK-NEXT: mov z0 .d, p2/m, z7.d
247+ ; CHECK-NEXT: mov z1 .d, p1/m, z6.d
245248; CHECK-NEXT: whilelo p1.d, x12, x8
246249; CHECK-NEXT: add x12, x12, x9
247250; CHECK-NEXT: b.mi .LBB2_1
248251; CHECK-NEXT: // %bb.2: // %exit.block
249- ; CHECK-NEXT: uzp1 z2.d, z0 .d, z1 .d
250- ; CHECK-NEXT: uzp2 z1.d, z0 .d, z1 .d
252+ ; CHECK-NEXT: uzp1 z2.d, z1 .d, z0 .d
253+ ; CHECK-NEXT: uzp2 z1.d, z1 .d, z0 .d
251254; CHECK-NEXT: faddv d0, p0, z2.d
252255; CHECK-NEXT: faddv d1, p0, z1.d
253256; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
0 commit comments