@@ -15,36 +15,34 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
1515; CHECK-LABEL: complex_mul_v2f64:
1616; CHECK: // %bb.0: // %entry
1717; CHECK-NEXT: mov z1.d, #0 // =0x0
18- ; CHECK-NEXT: mov w9 , #100 // =0x64
19- ; CHECK-NEXT: cntd x10
20- ; CHECK-NEXT: whilelo p1.d, xzr, x9
21- ; CHECK-NEXT: mov x8, xzr
22- ; CHECK-NEXT: rdvl x11, #2
18+ ; CHECK-NEXT: mov w8 , #100 // =0x64
19+ ; CHECK-NEXT: cntd x9
20+ ; CHECK-NEXT: whilelo p1.d, xzr, x8
21+ ; CHECK-NEXT: rdvl x10, #2
22+ ; CHECK-NEXT: mov x11, x9
2323; CHECK-NEXT: ptrue p0.d
24- ; CHECK-NEXT: mov x12, x10
2524; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
2625; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
2726; CHECK-NEXT: .LBB0_1: // %vector.body
2827; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
2928; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
30- ; CHECK-NEXT: add x13, x0, x8
31- ; CHECK-NEXT: add x14, x1, x8
32- ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
3329; CHECK-NEXT: mov z6.d, z1.d
3430; CHECK-NEXT: mov z7.d, z0.d
35- ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
36- ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
37- ; CHECK-NEXT: add x8, x8, x11
38- ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13]
39- ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14]
31+ ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
32+ ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
33+ ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
34+ ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
35+ ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
36+ ; CHECK-NEXT: add x1, x1, x10
37+ ; CHECK-NEXT: add x0, x0, x10
4038; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
4139; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
4240; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
4341; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
4442; CHECK-NEXT: mov z0.d, p2/m, z7.d
4543; CHECK-NEXT: mov z1.d, p1/m, z6.d
46- ; CHECK-NEXT: whilelo p1.d, x12, x9
47- ; CHECK-NEXT: add x12, x12, x10
44+ ; CHECK-NEXT: whilelo p1.d, x11, x8
45+ ; CHECK-NEXT: add x11, x11, x9
4846; CHECK-NEXT: b.mi .LBB0_1
4947; CHECK-NEXT: // %bb.2: // %exit.block
5048; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -114,39 +112,37 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
114112; CHECK-LABEL: complex_mul_predicated_v2f64:
115113; CHECK: // %bb.0: // %entry
116114; CHECK-NEXT: mov z1.d, #0 // =0x0
117- ; CHECK-NEXT: cntd x10
118- ; CHECK-NEXT: mov w12 , #100 // =0x64
119- ; CHECK-NEXT: neg x11, x10
115+ ; CHECK-NEXT: cntd x9
116+ ; CHECK-NEXT: mov w11 , #100 // =0x64
117+ ; CHECK-NEXT: neg x10, x9
120118; CHECK-NEXT: ptrue p0.d
121119; CHECK-NEXT: mov x8, xzr
122- ; CHECK-NEXT: mov x9, xzr
123- ; CHECK-NEXT: and x11, x11, x12
124- ; CHECK-NEXT: rdvl x12, #2
120+ ; CHECK-NEXT: and x10, x10, x11
121+ ; CHECK-NEXT: rdvl x11, #2
125122; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
126123; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
127124; CHECK-NEXT: .LBB1_1: // %vector.body
128125; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
129- ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x9, lsl #2]
130- ; CHECK-NEXT: add x13, x0, x8
131- ; CHECK-NEXT: add x14, x1, x8
126+ ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
132127; CHECK-NEXT: mov z6.d, z1.d
133128; CHECK-NEXT: mov z7.d, z0.d
134- ; CHECK-NEXT: add x9, x9, x10
135- ; CHECK-NEXT: add x8, x8, x12
136- ; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
137- ; CHECK-NEXT: cmp x11, x9
138- ; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
139- ; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
140- ; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
141- ; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
142- ; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
143- ; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
129+ ; CHECK-NEXT: add x8, x8, x9
130+ ; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
131+ ; CHECK-NEXT: cmp x10, x8
132+ ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
133+ ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
134+ ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
135+ ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
136+ ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
137+ ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
138+ ; CHECK-NEXT: add x1, x1, x11
139+ ; CHECK-NEXT: add x0, x0, x11
144140; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
145141; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
146142; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
147143; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
148- ; CHECK-NEXT: mov z0.d, p1 /m, z7.d
149- ; CHECK-NEXT: mov z1.d, p2 /m, z6.d
144+ ; CHECK-NEXT: mov z0.d, p2 /m, z7.d
145+ ; CHECK-NEXT: mov z1.d, p1 /m, z6.d
150146; CHECK-NEXT: b.ne .LBB1_1
151147; CHECK-NEXT: // %bb.2: // %exit.block
152148; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -218,38 +214,38 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
218214; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
219215; CHECK: // %bb.0: // %entry
220216; CHECK-NEXT: mov z1.d, #0 // =0x0
221- ; CHECK-NEXT: mov w10, #100 // =0x64
217+ ; CHECK-NEXT: mov w8, #100 // =0x64
218+ ; CHECK-NEXT: cntd x9
219+ ; CHECK-NEXT: whilelo p1.d, xzr, x8
220+ ; CHECK-NEXT: rdvl x10, #2
221+ ; CHECK-NEXT: cnth x11
222222; CHECK-NEXT: ptrue p0.d
223- ; CHECK-NEXT: whilelo p1.d, xzr, x10
224- ; CHECK-NEXT: mov x8, xzr
225- ; CHECK-NEXT: mov x9, xzr
226- ; CHECK-NEXT: cntd x11
227- ; CHECK-NEXT: rdvl x12, #2
223+ ; CHECK-NEXT: mov x12, x9
228224; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
229225; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
230226; CHECK-NEXT: .LBB2_1: // %vector.body
231227; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
232- ; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2, x9, lsl #2]
233- ; CHECK-NEXT: add x13, x0, x8
234- ; CHECK-NEXT: add x14, x1, x8
228+ ; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
235229; CHECK-NEXT: mov z6.d, z1.d
236230; CHECK-NEXT: mov z7.d, z0.d
237- ; CHECK-NEXT: add x9, x9, x11
238- ; CHECK-NEXT: add x8, x8, x12
239- ; CHECK-NEXT: cmpne p2.d, p1/z, z2.d, #0
240- ; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
241- ; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
242- ; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
243- ; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
244- ; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
245- ; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
231+ ; CHECK-NEXT: add x2, x2, x11
232+ ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
233+ ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
234+ ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
235+ ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
236+ ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
237+ ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
238+ ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
239+ ; CHECK-NEXT: add x1, x1, x10
240+ ; CHECK-NEXT: add x0, x0, x10
246241; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
247242; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
248243; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
249244; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
250- ; CHECK-NEXT: mov z0.d, p1/m, z7.d
251- ; CHECK-NEXT: whilelo p1.d, x9, x10
252- ; CHECK-NEXT: mov z1.d, p2/m, z6.d
245+ ; CHECK-NEXT: mov z0.d, p2/m, z7.d
246+ ; CHECK-NEXT: mov z1.d, p1/m, z6.d
247+ ; CHECK-NEXT: whilelo p1.d, x12, x8
248+ ; CHECK-NEXT: add x12, x12, x9
253249; CHECK-NEXT: b.mi .LBB2_1
254250; CHECK-NEXT: // %bb.2: // %exit.block
255251; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
0 commit comments