@@ -34,48 +34,39 @@ declare { ptr, i20, i20 } @llvm.aie2.add.3d(ptr, i20, i20, i20, i20, i20, i20, i
3434define void @add2d (ptr noalias %params , ptr noalias %ifm1_data , ptr noalias %ifm2_data , ptr noalias %ofm_data , ptr %.out , ptr %conv.i.i.i.out , ptr %idx.ext9.out , ptr %.out1 , ptr %.out2 , ptr %.out3 , ptr %.out4 , ptr %.out5 , ptr %conv.i.i.i.i.i.out , ptr %.out6 , ptr %conv.i.i.i46.out , ptr %xtraiter.out , ptr %in_ptr1.051.unr.ce.out , ptr %in_ptr2.0.in50.unr.ce.out , ptr %out_ptr.049.unr.ce.out , ptr %itr_left_cnt0.048.unr.ce.out , ptr %itr_left_cnt1.047.unr.ce.out ) #3 {
3535; ASM-LABEL: add2d:
3636; ASM: // %bb.0: // %newFuncRoot
37- ; ASM-NEXT: lda r2, [p0, #64]; paddb [p0], #40; nopm
37+ ; ASM-NEXT: lda r2, [p0, #64]; paddb [p0], #40; nopxm ; nops
3838; ASM-NEXT: lda m2, [p0], #-4
3939; ASM-NEXT: lda m5, [p0], #8
4040; ASM-NEXT: lda m4, [p0], #8
41- ; ASM-NEXT: lda m3, [p0], #-24; paddb [sp], #32
42- ; ASM-NEXT: lda r0, [p0], #4; st p6, [sp, #-28] // 4-byte Folded Spill
43- ; ASM-NEXT: lda r1, [p0], #-12; mov p6, sp
44- ; ASM-NEXT: lda r3, [p0], #40; paddb [p6], #-36
45- ; ASM-NEXT: lda p7, [p6, #0]; mov p6, sp
46- ; ASM-NEXT: paddb [p6], #-40
47- ; ASM-NEXT: lda r5, [p6, #0]; mov p6, sp
48- ; ASM-NEXT: lda m1, [p0], #36; paddb [p6], #-44
49- ; ASM-NEXT: lda p6, [p6, #0]
50- ; ASM-NEXT: lda m0, [p0], #-8; st p7, [sp, #-32] // 4-byte Folded Spill
51- ; ASM-NEXT: lda dn0, [p0], #-8; st r3, [p4, #0]
52- ; ASM-NEXT: lda dj0, [p0], #12; nez r4, r1; mov p4, sp
53- ; ASM-NEXT: lda dn4, [p0], #-8; paddb [p4], #-48; st r4, [p5, #0]
54- ; ASM-NEXT: lda p4, [p4, #0]; mov p5, sp
55- ; ASM-NEXT: lda dj4, [p0], #-36; st m1, [p7, #0]
56- ; ASM-NEXT: lda r1, [p0, #0]; mov p7, r5
57- ; ASM-NEXT: lda r5, [p0, #-36]; paddb [p5], #-52; mov p0, sp
58- ; ASM-NEXT: lda p5, [p5, #0]; st m0, [p7, #0]
59- ; ASM-NEXT: paddb [p0], #-72; mov p7, sp
60- ; ASM-NEXT: lda p0, [p0, #0]; paddb [p7], #-56; st dj0, [p6, #0]
61- ; ASM-NEXT: lda r6, [p7, #0]; mov p6, sp
62- ; ASM-NEXT: paddb [p6], #-60; mov p7, sp
63- ; ASM-NEXT: lda r7, [p6, #0]; paddb [p7], #-64; mov p6, sp
64- ; ASM-NEXT: lda p7, [p7, #0]; st dj4, [p4, #0]
65- ; ASM-NEXT: mov p4, sp
66- ; ASM-NEXT: paddb [p4], #-76; st dn0, [p5, #0]
67- ; ASM-NEXT: lda r11, [p4, #0]; paddb [p6], #-68; mov p4, sp
68- ; ASM-NEXT: lda r8, [p6, #0]; paddb [p4], #-80; mov p5, r6
69- ; ASM-NEXT: lda p6, [p4, #0]; mov p4, sp
70- ; ASM-NEXT: mova r6, #1; paddb [p4], #-84; nez r1, r1; st dn4, [p5, #0]
71- ; ASM-NEXT: lda r9, [p4, #0]; ne r6, r0, r6; mov p4, sp
72- ; ASM-NEXT: mova r0, #3; paddb [p4], #-88; add r7, r2, #-1; mov p5, r7
73- ; ASM-NEXT: lda r10, [p4, #0]; ltu r7, r7, r0; mov p4, sp
74- ; ASM-NEXT: jz r7, #.LBB0_2
75- ; ASM-NEXT: paddb [p4], #-92; st r1, [p5, #0] // Delay Slot 5
76- ; ASM-NEXT: lda p4, [p4, #0]; st r5, [p7, #0] // Delay Slot 4
77- ; ASM-NEXT: paddb [p2], m5; mov p7, r8 // Delay Slot 3
78- ; ASM-NEXT: st r6, [p7, #0]; paddb [p2], m4; and r8, r2, r0 // Delay Slot 2
41+ ; ASM-NEXT: lda m3, [p0], #-24
42+ ; ASM-NEXT: lda r0, [p0], #4
43+ ; ASM-NEXT: lda r1, [p0], #-12
44+ ; ASM-NEXT: lda r3, [p0], #40
45+ ; ASM-NEXT: lda m1, [p0], #36
46+ ; ASM-NEXT: lda m0, [p0], #-8
47+ ; ASM-NEXT: lda dn0, [p0], #-8
48+ ; ASM-NEXT: lda dj0, [p0], #12
49+ ; ASM-NEXT: lda dn4, [p0], #-8; paddb [sp], #32
50+ ; ASM-NEXT: lda dj4, [p0], #-36; st p7, [sp, #-32] // 4-byte Folded Spill
51+ ; ASM-NEXT: lda r1, [p0, #0]; mov p7, sp
52+ ; ASM-NEXT: lda r5, [p0, #-36]; paddb [p7], #-36
53+ ; ASM-NEXT: lda p0, [p7], #-4
54+ ; ASM-NEXT: lda p0, [p7], #-4
55+ ; ASM-NEXT: lda p0, [p7], #-4
56+ ; ASM-NEXT: lda p0, [p7], #-4
57+ ; ASM-NEXT: lda p0, [p7], #-4; st p6, [sp, #-28]; nez r4, r1 // 4-byte Folded Spill
58+ ; ASM-NEXT: lda p5, [p7], #-4; st r3, [p4, #0]
59+ ; ASM-NEXT: lda p6, [p7], #-4; st r4, [p5, #0]
60+ ; ASM-NEXT: lda p0, [p7], #-4; st m1, [p0, #0]; add r7, r2, #-1; mov r6, #1
61+ ; ASM-NEXT: lda p4, [p7], #-4; st m0, [p0, #0]; ne r6, r0, r6
62+ ; ASM-NEXT: lda r13, [p7], #-4; st dj0, [p0, #0]; movx r0, #3
63+ ; ASM-NEXT: st dj4, [p0, #0]; ltu r7, r7, r0
64+ ; ASM-NEXT: lda r9, [p7], #-4; st dn0, [p0, #0]; nez r1, r1
65+ ; ASM-NEXT: lda r10, [p7], #-4; st dn4, [p5, #0]; jz r7, #.LBB0_2
66+ ; ASM-NEXT: lda r11, [p7], #-4; st r1, [p6, #0] // Delay Slot 5
67+ ; ASM-NEXT: lda p7, [p7, #-4]; st r5, [p0, #0] // Delay Slot 4
68+ ; ASM-NEXT: paddb [p2], m5; st r6, [p4, #0] // Delay Slot 3
69+ ; ASM-NEXT: lda r12, [p7, #0]; paddb [p2], m4; and r8, r2, r0; mov p0, r13 // Delay Slot 2
7970; ASM-NEXT: padda [p1], m2; paddb [p2], m3; movx r0, #0; st r8, [p0, #0] // Delay Slot 1
8071; ASM-NEXT: // %bb.1:
8172; ASM-NEXT: j #.LBB0_5
@@ -123,18 +114,20 @@ define void @add2d(ptr noalias %params, ptr noalias %ifm1_data, ptr noalias %ifm
123114; ASM-NEXT: nop
124115; ASM-NEXT: nop
125116; ASM-NEXT: vst.srs.d8.s32 cm0, s0, [p3], #32
126- ; ASM-NEXT: vst.srs.d8.s32 cm2, s0, [p3], #32; mov crUPSSign, #0
127- ; ASM-NEXT: vst.srs.d8.s32 cm3, s0, [p3], #32; mov r0, dc0
128- ; ASM-NEXT: vst.srs.d8.s32 cm1, s0, [p3], #32; mov r1, dc4
117+ ; ASM-NEXT: vst.srs.d8.s32 cm2, s0, [p3], #32
118+ ; ASM-NEXT: vst.srs.d8.s32 cm3, s0, [p3], #32; mov crUPSSign, #0
119+ ; ASM-NEXT: vst.srs.d8.s32 cm1, s0, [p3], #32; mov r0, dc0
120+ ; ASM-NEXT: mov r1, dc4
129121; ASM-NEXT: mov crSRSSign, #0
130122; ASM-NEXT: .LBB0_5: // %for.cond.cleanup.unr-lcssa.split
131- ; ASM-NEXT: nopx ; mov p0, r10
132- ; ASM-NEXT: lda p7, [sp, #-32]; st r1, [p4, #0] // 4-byte Folded Reload
133- ; ASM-NEXT: lda p6, [sp, #-28]; st r0, [p0, #0] // 4-byte Folded Reload
134- ; ASM-NEXT: ret lr ; mov p0, r9
135- ; ASM-NEXT: st p3, [p0, #0] // Delay Slot 5
136- ; ASM-NEXT: mov p0, r11 // Delay Slot 4
137- ; ASM-NEXT: st p2, [p6, #0] // Delay Slot 3
123+ ; ASM-NEXT: st r1, [p7, #0]; nopx
124+ ; ASM-NEXT: mov p0, r12
125+ ; ASM-NEXT: lda p7, [sp, #-32]; st r0, [p0, #0] // 4-byte Folded Reload
126+ ; ASM-NEXT: lda p6, [sp, #-28]; mov p0, r11 // 4-byte Folded Reload
127+ ; ASM-NEXT: st p3, [p0, #0]; ret lr
128+ ; ASM-NEXT: mov p0, r10 // Delay Slot 5
129+ ; ASM-NEXT: st p2, [p0, #0] // Delay Slot 4
130+ ; ASM-NEXT: mov p0, r9 // Delay Slot 3
138131; ASM-NEXT: st p1, [p0, #0] // Delay Slot 2
139132; ASM-NEXT: paddb [sp], #-32 // Delay Slot 1
140133newFuncRoot:
0 commit comments