@@ -17,58 +17,69 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
1717; CHECK-NEXT: mov.w r5, #0
1818; CHECK-NEXT: csel r7, r6, r5, hs
1919; CHECK-NEXT: add.w lr, r7, #1
20- ; CHECK-NEXT: mov r6 , r5
21- ; CHECK-NEXT: vldrh.u16 q1 , [r0], #32
20+ ; CHECK-NEXT: mov r4 , r5
21+ ; CHECK-NEXT: vldrh.u16 q0 , [r0], #32
2222; CHECK-NEXT: movs r7, #0
2323; CHECK-NEXT: mov r8, r5
24+ ; CHECK-NEXT: vldrh.u16 q1, [r1], #32
25+ ; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1
26+ ; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
27+ ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q1
28+ ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
29+ ; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3
2430; CHECK-NEXT: vldrh.u16 q0, [r1], #32
31+ ; CHECK-NEXT: sub.w lr, lr, #1
32+ ; CHECK-NEXT: cmp.w lr, #0
33+ ; CHECK-NEXT: vldrh.u16 q1, [r0], #32
34+ ; CHECK-NEXT: beq .LBB0_3
2535; CHECK-NEXT: .p2align 2
2636; CHECK-NEXT: .LBB0_2: @ %while.body
2737; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
28- ; CHECK-NEXT: vmlsldava.s16 r8, r7, q1, q0
38+ ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
39+ ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
40+ ; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0
2941; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
30- ; CHECK-NEXT: vmlaldavax.s16 r6, r5, q1, q0
31- ; CHECK-NEXT: vldrh.u16 q1, [r1, #-16]
32- ; CHECK-NEXT: vmlsldava.s16 r8, r7, q2, q1
33- ; CHECK-NEXT: vldrh.u16 q0, [r1], #32
34- ; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q1
42+ ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0
3543; CHECK-NEXT: vldrh.u16 q1, [r0], #32
44+ ; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3
45+ ; CHECK-NEXT: vldrh.u16 q0, [r1], #32
3646; CHECK-NEXT: le lr, .LBB0_2
37- ; CHECK-NEXT: @ %bb.3: @ %do.body
38- ; CHECK-NEXT: movs r4, #14
39- ; CHECK-NEXT: and.w r2, r4, r2, lsl #1
40- ; CHECK-NEXT: vmlaldavax.s16 r6, r5, q1, q0
47+ ; CHECK-NEXT: .LBB0_3:
48+ ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
49+ ; CHECK-NEXT: movs r6, #14
50+ ; CHECK-NEXT: and.w r2, r6, r2, lsl #1
51+ ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0
4152; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
42- ; CHECK-NEXT: vmlsldava.s16 r8 , r7, q1, q0
53+ ; CHECK-NEXT: vmlsldava.s16 r4 , r7, q1, q0
4354; CHECK-NEXT: vldrh.u16 q0, [r1, #-16]
44- ; CHECK-NEXT: vmlaldavax.s16 r6 , r5, q2, q0
55+ ; CHECK-NEXT: vmlaldavax.s16 r8 , r5, q2, q0
4556; CHECK-NEXT: vctp.16 r2
46- ; CHECK-NEXT: vmlsldava.s16 r8 , r7, q2, q0
57+ ; CHECK-NEXT: vmlsldava.s16 r4 , r7, q2, q0
4758; CHECK-NEXT: vpst
4859; CHECK-NEXT: vldrht.u16 q1, [r0]
4960; CHECK-NEXT: cmp r2, #9
5061; CHECK-NEXT: vpsttt
5162; CHECK-NEXT: vldrht.u16 q0, [r1]
52- ; CHECK-NEXT: vmlsldavat.s16 r8 , r7, q1, q0
53- ; CHECK-NEXT: vmlaldavaxt.s16 r6 , r5, q1, q0
63+ ; CHECK-NEXT: vmlsldavat.s16 r4 , r7, q1, q0
64+ ; CHECK-NEXT: vmlaldavaxt.s16 r8 , r5, q1, q0
5465; CHECK-NEXT: blo .LBB0_10
5566; CHECK-NEXT: @ %bb.4: @ %do.body.1
5667; CHECK-NEXT: subs r2, #8
5768; CHECK-NEXT: vctp.16 r2
5869; CHECK-NEXT: vpstttt
5970; CHECK-NEXT: vldrht.u16 q0, [r0, #16]
6071; CHECK-NEXT: vldrht.u16 q1, [r1, #16]
61- ; CHECK-NEXT: vmlsldavat.s16 r8 , r7, q0, q1
62- ; CHECK-NEXT: vmlaldavaxt.s16 r6 , r5, q0, q1
72+ ; CHECK-NEXT: vmlsldavat.s16 r4 , r7, q0, q1
73+ ; CHECK-NEXT: vmlaldavaxt.s16 r8 , r5, q0, q1
6374; CHECK-NEXT: b .LBB0_10
6475; CHECK-NEXT: .p2align 2
6576; CHECK-NEXT: .LBB0_5: @ %if.else
66- ; CHECK-NEXT: mov.w r8 , #0
77+ ; CHECK-NEXT: mov.w r4 , #0
6778; CHECK-NEXT: cbz r2, .LBB0_9
6879; CHECK-NEXT: @ %bb.6: @ %while.body14.preheader
6980; CHECK-NEXT: lsls r6, r2, #1
70- ; CHECK-NEXT: mov r5, r8
71- ; CHECK-NEXT: mov r7, r8
81+ ; CHECK-NEXT: mov r5, r4
82+ ; CHECK-NEXT: mov r7, r4
7283; CHECK-NEXT: movs r2, #0
7384; CHECK-NEXT: dlstp.16 lr, r6
7485; CHECK-NEXT: .p2align 2
@@ -77,22 +88,22 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
7788; CHECK-NEXT: vldrh.u16 q0, [r0], #16
7889; CHECK-NEXT: vldrh.u16 q1, [r1], #16
7990; CHECK-NEXT: vmlsldava.s16 r2, r7, q0, q1
80- ; CHECK-NEXT: vmlaldavax.s16 r8 , r5, q0, q1
91+ ; CHECK-NEXT: vmlaldavax.s16 r4 , r5, q0, q1
8192; CHECK-NEXT: letp lr, .LBB0_7
8293; CHECK-NEXT: @ %bb.8: @ %if.end.loopexit177
83- ; CHECK-NEXT: mov r6, r8
84- ; CHECK-NEXT: mov r8 , r2
94+ ; CHECK-NEXT: mov r8, r4
95+ ; CHECK-NEXT: mov r4 , r2
8596; CHECK-NEXT: b .LBB0_10
8697; CHECK-NEXT: .p2align 2
8798; CHECK-NEXT: .LBB0_9:
88- ; CHECK-NEXT: mov r7, r8
89- ; CHECK-NEXT: movs r6 , #0
90- ; CHECK-NEXT: mov r5, r8
99+ ; CHECK-NEXT: mov r7, r4
100+ ; CHECK-NEXT: mov.w r8 , #0
101+ ; CHECK-NEXT: mov r5, r4
91102; CHECK-NEXT: .LBB0_10: @ %if.end
92- ; CHECK-NEXT: asrl r8 , r7, #6
93- ; CHECK-NEXT: asrl r6 , r5, #6
94- ; CHECK-NEXT: str.w r8 , [r3]
95- ; CHECK-NEXT: str.w r6 , [r12]
103+ ; CHECK-NEXT: asrl r4 , r7, #6
104+ ; CHECK-NEXT: asrl r8 , r5, #6
105+ ; CHECK-NEXT: str r4 , [r3]
106+ ; CHECK-NEXT: str.w r8 , [r12]
96107; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
97108entry:
98109 %cmp = icmp ugt i32 %numSamples , 15
0 commit comments