@@ -17,69 +17,58 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
1717; CHECK-NEXT: mov.w r5, #0
1818; CHECK-NEXT: csel r7, r6, r5, hs
1919; CHECK-NEXT: add.w lr, r7, #1
20- ; CHECK-NEXT: mov r4 , r5
21- ; CHECK-NEXT: vldrh.u16 q0 , [r0], #32
20+ ; CHECK-NEXT: mov r6 , r5
21+ ; CHECK-NEXT: vldrh.u16 q1 , [r0], #32
2222; CHECK-NEXT: movs r7, #0
2323; CHECK-NEXT: mov r8, r5
24- ; CHECK-NEXT: vldrh.u16 q1, [r1], #32
25- ; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1
26- ; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
27- ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q1
28- ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
29- ; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3
3024; CHECK-NEXT: vldrh.u16 q0, [r1], #32
31- ; CHECK-NEXT: sub.w lr, lr, #1
32- ; CHECK-NEXT: cmp.w lr, #0
33- ; CHECK-NEXT: vldrh.u16 q1, [r0], #32
34- ; CHECK-NEXT: beq .LBB0_3
3525; CHECK-NEXT: .p2align 2
3626; CHECK-NEXT: .LBB0_2: @ %while.body
3727; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
38- ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
39- ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
40- ; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0
28+ ; CHECK-NEXT: vmlsldava.s16 r8, r7, q1, q0
4129; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
42- ; CHECK-NEXT: vmlaldavax.s16 r8 , r5, q1, q0
43- ; CHECK-NEXT: vldrh.u16 q1, [r0] , #32
44- ; CHECK-NEXT: vmlsldava.s16 r4 , r7, q2, q3
30+ ; CHECK-NEXT: vmlaldavax.s16 r6 , r5, q1, q0
31+ ; CHECK-NEXT: vldrh.u16 q1, [r1 , #-16]
32+ ; CHECK-NEXT: vmlsldava.s16 r8 , r7, q2, q1
4533; CHECK-NEXT: vldrh.u16 q0, [r1], #32
34+ ; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q1
35+ ; CHECK-NEXT: vldrh.u16 q1, [r0], #32
4636; CHECK-NEXT: le lr, .LBB0_2
47- ; CHECK-NEXT: .LBB0_3:
48- ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
49- ; CHECK-NEXT: movs r6, #14
50- ; CHECK-NEXT: and.w r2, r6, r2, lsl #1
51- ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0
37+ ; CHECK-NEXT: @ %bb.3: @ %do.body
38+ ; CHECK-NEXT: movs r4, #14
39+ ; CHECK-NEXT: and.w r2, r4, r2, lsl #1
40+ ; CHECK-NEXT: vmlaldavax.s16 r6, r5, q1, q0
5241; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
53- ; CHECK-NEXT: vmlsldava.s16 r4 , r7, q1, q0
42+ ; CHECK-NEXT: vmlsldava.s16 r8 , r7, q1, q0
5443; CHECK-NEXT: vldrh.u16 q0, [r1, #-16]
55- ; CHECK-NEXT: vmlaldavax.s16 r8 , r5, q2, q0
44+ ; CHECK-NEXT: vmlaldavax.s16 r6 , r5, q2, q0
5645; CHECK-NEXT: vctp.16 r2
57- ; CHECK-NEXT: vmlsldava.s16 r4 , r7, q2, q0
46+ ; CHECK-NEXT: vmlsldava.s16 r8 , r7, q2, q0
5847; CHECK-NEXT: vpst
5948; CHECK-NEXT: vldrht.u16 q1, [r0]
6049; CHECK-NEXT: cmp r2, #9
6150; CHECK-NEXT: vpsttt
6251; CHECK-NEXT: vldrht.u16 q0, [r1]
63- ; CHECK-NEXT: vmlsldavat.s16 r4 , r7, q1, q0
64- ; CHECK-NEXT: vmlaldavaxt.s16 r8 , r5, q1, q0
52+ ; CHECK-NEXT: vmlsldavat.s16 r8 , r7, q1, q0
53+ ; CHECK-NEXT: vmlaldavaxt.s16 r6 , r5, q1, q0
6554; CHECK-NEXT: blo .LBB0_10
6655; CHECK-NEXT: @ %bb.4: @ %do.body.1
6756; CHECK-NEXT: subs r2, #8
6857; CHECK-NEXT: vctp.16 r2
6958; CHECK-NEXT: vpstttt
7059; CHECK-NEXT: vldrht.u16 q0, [r0, #16]
7160; CHECK-NEXT: vldrht.u16 q1, [r1, #16]
72- ; CHECK-NEXT: vmlsldavat.s16 r4 , r7, q0, q1
73- ; CHECK-NEXT: vmlaldavaxt.s16 r8 , r5, q0, q1
61+ ; CHECK-NEXT: vmlsldavat.s16 r8 , r7, q0, q1
62+ ; CHECK-NEXT: vmlaldavaxt.s16 r6 , r5, q0, q1
7463; CHECK-NEXT: b .LBB0_10
7564; CHECK-NEXT: .p2align 2
7665; CHECK-NEXT: .LBB0_5: @ %if.else
77- ; CHECK-NEXT: mov.w r4 , #0
66+ ; CHECK-NEXT: mov.w r8 , #0
7867; CHECK-NEXT: cbz r2, .LBB0_9
7968; CHECK-NEXT: @ %bb.6: @ %while.body14.preheader
8069; CHECK-NEXT: lsls r6, r2, #1
81- ; CHECK-NEXT: mov r5, r4
82- ; CHECK-NEXT: mov r7, r4
70+ ; CHECK-NEXT: mov r5, r8
71+ ; CHECK-NEXT: mov r7, r8
8372; CHECK-NEXT: movs r2, #0
8473; CHECK-NEXT: dlstp.16 lr, r6
8574; CHECK-NEXT: .p2align 2
@@ -88,22 +77,22 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
8877; CHECK-NEXT: vldrh.u16 q0, [r0], #16
8978; CHECK-NEXT: vldrh.u16 q1, [r1], #16
9079; CHECK-NEXT: vmlsldava.s16 r2, r7, q0, q1
91- ; CHECK-NEXT: vmlaldavax.s16 r4 , r5, q0, q1
80+ ; CHECK-NEXT: vmlaldavax.s16 r8 , r5, q0, q1
9281; CHECK-NEXT: letp lr, .LBB0_7
9382; CHECK-NEXT: @ %bb.8: @ %if.end.loopexit177
94- ; CHECK-NEXT: mov r8, r4
95- ; CHECK-NEXT: mov r4 , r2
83+ ; CHECK-NEXT: mov r6, r8
84+ ; CHECK-NEXT: mov r8 , r2
9685; CHECK-NEXT: b .LBB0_10
9786; CHECK-NEXT: .p2align 2
9887; CHECK-NEXT: .LBB0_9:
99- ; CHECK-NEXT: mov r7, r4
100- ; CHECK-NEXT: mov.w r8 , #0
101- ; CHECK-NEXT: mov r5, r4
88+ ; CHECK-NEXT: mov r7, r8
89+ ; CHECK-NEXT: movs r6 , #0
90+ ; CHECK-NEXT: mov r5, r8
10291; CHECK-NEXT: .LBB0_10: @ %if.end
103- ; CHECK-NEXT: asrl r4 , r7, #6
104- ; CHECK-NEXT: asrl r8 , r5, #6
105- ; CHECK-NEXT: str r4 , [r3]
106- ; CHECK-NEXT: str.w r8 , [r12]
92+ ; CHECK-NEXT: asrl r8 , r7, #6
93+ ; CHECK-NEXT: asrl r6 , r5, #6
94+ ; CHECK-NEXT: str.w r8 , [r3]
95+ ; CHECK-NEXT: str.w r6 , [r12]
10796; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
10897entry:
10998 %cmp = icmp ugt i32 %numSamples , 15
0 commit comments