Skip to content

Commit 1fa9a28

Browse files
[AIE2P] Add base tests exposing cycles in copy bundles
Co-Authored-By: Krishnam Tibrewala <[email protected]>
1 parent b2937da commit 1fa9a28

File tree

2 files changed

+441
-0
lines changed

2 files changed

+441
-0
lines changed
Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
;
4+
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
5+
; See https://llvm.org/LICENSE.txt for license information.
6+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
;
8+
; (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
9+
; RUN: not llc -mtriple aie2p -o %t.s %s 2>&1 | FileCheck %s --check-prefix=BUNDLE-ERROR
10+
; RUN: llc -mtriple=aie2p --aie-staged-ra-fine-grained-alloc=false %s -o - | FileCheck %s --check-prefix=COARSE-GRAINED
11+
12+
; Function Attrs: nounwind readnone
13+
; BUNDLE-ERROR: error: register rewriting failed: cycle in copy bundle
14+
define void @heavy_3d_user(i32 %dimsAI.sroa.5.0.copyload.i, i32 %dimsAI.sroa.7.0.copyload.i, i32 %dimsAI.sroa.9.0.copyload.i, i32 %dimsAO.sroa.7.0.copyload.i, i32 %dimsAO.sroa.4.0.copyload.i, i32 %dimsAO.sroa.6.0.copyload.i, i32 %dimsAO.sroa.0.0.copyload.i, i32 %dimsAO.sroa.5.0.copyload.i, i32 %dimsW.sroa.4.0.copyload.i, i32 %dimsW.sroa.6.0.copyload.i, i20 %0, i1 %1, i32 %dimsAI.sroa.11.0.copyload.i) {
15+
; COARSE-GRAINED-LABEL: heavy_3d_user:
16+
; COARSE-GRAINED: // %bb.0: // %entry
17+
; COARSE-GRAINED-NEXT: nopa ; nopb ; paddxm [sp], #384; nops
18+
; COARSE-GRAINED-NEXT: mova m0, #-388; st r9, [sp, #-356]; mov p1, sp // 4-byte Folded Spill
19+
; COARSE-GRAINED-NEXT: mova m0, #-392; paddb [p1], m0; st r10, [sp, #-360] // 4-byte Folded Spill
20+
; COARSE-GRAINED-NEXT: lda dj0, [p1, #0]; st r11, [sp, #-364]; mov p1, sp // 4-byte Folded Spill
21+
; COARSE-GRAINED-NEXT: mova m0, #-400; paddb [p1], m0; st r12, [sp, #-368] // 4-byte Folded Spill
22+
; COARSE-GRAINED-NEXT: lda dj4, [p1, #0]; st r13, [sp, #-372]; mov p1, sp // 4-byte Folded Spill
23+
; COARSE-GRAINED-NEXT: padda [p1], m0; st r14, [sp, #-376] // 4-byte Folded Spill
24+
; COARSE-GRAINED-NEXT: lda m0, [p1, #0]; st r15, [sp, #-380] // 4-byte Folded Spill
25+
; COARSE-GRAINED-NEXT: st p6, [sp, #-384] // 4-byte Folded Spill
26+
; COARSE-GRAINED-NEXT: mova r16, #0; st lr, [sp, #-348] // 4-byte Folded Spill
27+
; COARSE-GRAINED-NEXT: st r8, [sp, #-352]; vbcst.32 x0, r16 // 4-byte Folded Spill
28+
; COARSE-GRAINED-NEXT: st r0, [sp, #-248]; mov p6, p0 // 4-byte Folded Spill
29+
; COARSE-GRAINED-NEXT: vst x0, [sp, #-128]; mov p1, sp // 64-byte Folded Spill
30+
; COARSE-GRAINED-NEXT: st dj0, [sp, #-304] // 4-byte Folded Spill
31+
; COARSE-GRAINED-NEXT: mova m0, #-396; st m0, [sp, #-280] // 4-byte Folded Spill
32+
; COARSE-GRAINED-NEXT: padda [p1], m0; st dj0, [sp, #-272]; vmov x1, x0 // 4-byte Folded Spill
33+
; COARSE-GRAINED-NEXT: lda r8, [p1, #0]; st dj0, [sp, #-336]; mov p3, #0 // 4-byte Folded Spill
34+
; COARSE-GRAINED-NEXT: vst x1, [sp, #-64]; jl p3 // 64-byte Folded Spill
35+
; COARSE-GRAINED-NEXT: mova p2, #0; st dj4, [sp, #-288] // 4-byte Folded Spill Delay Slot 5
36+
; COARSE-GRAINED-NEXT: mova dj4, #1; st dj4, [sp, #-256]; mov r9, r1 // 4-byte Folded Spill Delay Slot 4
37+
; COARSE-GRAINED-NEXT: mova m0, #0; st dj4, [sp, #-320]; or r10, r2, r2; mov r11, r3 // 4-byte Folded Spill Delay Slot 3
38+
; COARSE-GRAINED-NEXT: mova p0, #0; st m0, [sp, #-344]; or r12, r4, r4; mov r13, r5 // 4-byte Folded Spill Delay Slot 2
39+
; COARSE-GRAINED-NEXT: mova p1, #0; or r14, r6, r6; mov r15, r7 // Delay Slot 1
40+
; COARSE-GRAINED-NEXT: lda m1, [sp, #-344]; nopb ; nopxm // 4-byte Folded Reload
41+
; COARSE-GRAINED-NEXT: lda dj5, [sp, #-320] // 4-byte Folded Reload
42+
; COARSE-GRAINED-NEXT: lda m4, [sp, #-296]; mov dn4, r15 // 4-byte Folded Reload
43+
; COARSE-GRAINED-NEXT: st dn4, [sp, #-260]; mov dj0, r12 // 4-byte Folded Spill
44+
; COARSE-GRAINED-NEXT: st dj0, [sp, #-272]; mov dn0, r14 // 4-byte Folded Spill
45+
; COARSE-GRAINED-NEXT: mova dc3, #0; st dn0, [sp, #-276]; mov m0, r11 // 4-byte Folded Spill
46+
; COARSE-GRAINED-NEXT: lda m3, [sp, #-280]; movs dj4, r13; mov dc7, dc3 // 4-byte Folded Reload
47+
; COARSE-GRAINED-NEXT: lda m0, [sp, #-312]; st m0, [sp, #-280] // 4-byte Folded Reload4-byte Folded Spill
48+
; COARSE-GRAINED-NEXT: lda dj4, [sp, #-288]; st dj4, [sp, #-256] // 4-byte Folded Reload4-byte Folded Spill
49+
; COARSE-GRAINED-NEXT: lda m5, [sp, #-328]; movs dj6, dj5; mov m2, m1 // 4-byte Folded Reload
50+
; COARSE-GRAINED-NEXT: lda dn0, [sp, #-308]; movs dn3, m1; mov m1, dj5 // 4-byte Folded Reload
51+
; COARSE-GRAINED-NEXT: lda dj0, [sp, #-304]; st m4, [sp, #-296] // 4-byte Folded Reload4-byte Folded Spill
52+
; COARSE-GRAINED-NEXT: lda dn4, [sp, #-292]; st m4, [sp, #-328] // 4-byte Folded Reload4-byte Folded Spill
53+
; COARSE-GRAINED-NEXT: movs dc0, m2; mov dc6, m2
54+
; COARSE-GRAINED-NEXT: st m0, [sp, #-312] // 4-byte Folded Spill
55+
; COARSE-GRAINED-NEXT: st dj4, [sp, #-288] // 4-byte Folded Spill
56+
; COARSE-GRAINED-NEXT: movs m0, m2; mov dc4, m2
57+
; COARSE-GRAINED-NEXT: st dn0, [sp, #-308] // 4-byte Folded Spill
58+
; COARSE-GRAINED-NEXT: st dj0, [sp, #-304] // 4-byte Folded Spill
59+
; COARSE-GRAINED-NEXT: lda dj3, [sp, #-248]; st dn4, [sp, #-292] // 4-byte Folded Reload4-byte Folded Spill
60+
; COARSE-GRAINED-NEXT: st m2, [sp, #-248] // 4-byte Folded Spill
61+
; COARSE-GRAINED-NEXT: st dj6, [sp, #-224] // 4-byte Folded Spill
62+
; COARSE-GRAINED-NEXT: st dn0, [sp, #-340] // 4-byte Folded Spill
63+
; COARSE-GRAINED-NEXT: st dj0, [sp, #-336] // 4-byte Folded Spill
64+
; COARSE-GRAINED-NEXT: st dn4, [sp, #-324] // 4-byte Folded Spill
65+
; COARSE-GRAINED-NEXT: st dc4, [sp, #-252] // 4-byte Folded Spill
66+
; COARSE-GRAINED-NEXT: vlda x2, [sp, #-128]; movs dj4, dj5; mov dc4, dj5 // 64-byte Folded Reload
67+
; COARSE-GRAINED-NEXT: vlda x3, [sp, #-64]; st dc0, [sp, #-268] // 64-byte Folded Reload4-byte Folded Spill
68+
; COARSE-GRAINED-NEXT: st dc0, [sp, #-300] // 4-byte Folded Spill
69+
; COARSE-GRAINED-NEXT: st dc6, [sp, #-220] // 4-byte Folded Spill
70+
; COARSE-GRAINED-NEXT: st m0, [sp, #-344] // 4-byte Folded Spill
71+
; COARSE-GRAINED-NEXT: st dc0, [sp, #-332]; mov dn7, r9 // 4-byte Folded Spill
72+
; COARSE-GRAINED-NEXT: st dj4, [sp, #-320]; mov dj7, r10 // 4-byte Folded Spill
73+
; COARSE-GRAINED-NEXT: st dc4, [sp, #-284]; vmov lfl0, x2 // 4-byte Folded Spill
74+
; COARSE-GRAINED-NEXT: lda m7, [sp, #-264]; st dc4, [sp, #-316]; movx r0, #1; vmov lfh0, x3 // 4-byte Folded Reload4-byte Folded Spill
75+
; COARSE-GRAINED-NEXT: mova r3, #0; movs dc5, m2; and r1, r8, r0; mov dc1, m2
76+
; COARSE-GRAINED-NEXT: .LBB0_1: // %for.body.i
77+
; COARSE-GRAINED-NEXT: // =>This Loop Header: Depth=1
78+
; COARSE-GRAINED-NEXT: // Child Loop BB0_2 Depth 2
79+
; COARSE-GRAINED-NEXT: lda m0, [sp, #-344]; nopb ; nopx // 4-byte Folded Reload
80+
; COARSE-GRAINED-NEXT: lda dc0, [sp, #-332] // 4-byte Folded Reload
81+
; COARSE-GRAINED-NEXT: lda dj4, [sp, #-320] // 4-byte Folded Reload
82+
; COARSE-GRAINED-NEXT: nop
83+
; COARSE-GRAINED-NEXT: lda dn1, [sp, #-244]; movs dj1, p6; mov dn1, dn3 // 4-byte Folded Reload
84+
; COARSE-GRAINED-NEXT: movs dn5, dn3; mov m2, m1
85+
; COARSE-GRAINED-NEXT: lda dn5, [sp, #-228]; movs dj5, p6; mov dc6, dc5 // 4-byte Folded Reload
86+
; COARSE-GRAINED-NEXT: mova p1, #0; st m2, [sp, #-216]; mov r25, r3 // 4-byte Folded Spill
87+
; COARSE-GRAINED-NEXT: vldb.pop.576.3d ex0, [p1, lf1, r25, d1]; st dc6, [sp, #-188] // 4-byte Folded Spill
88+
; COARSE-GRAINED-NEXT: movs dc1, dc0; mov dj1, m0
89+
; COARSE-GRAINED-NEXT: movs m1, m0; mov dj5, dj4
90+
; COARSE-GRAINED-NEXT: st dn1, [sp, #-340]; vmov lfl1, lfl0 // 4-byte Folded Spill
91+
; COARSE-GRAINED-NEXT: lda m5, [sp, #-232]; st dc1, [sp, #-332]; vmov lfh1, lfh0 // 4-byte Folded Reload4-byte Folded Spill
92+
; COARSE-GRAINED-NEXT: lda dc5, [sp, #-220]; movs dn1, dn3; mov dc1, dc3 // 4-byte Folded Reload
93+
; COARSE-GRAINED-NEXT: st dn5, [sp, #-324] // 4-byte Folded Spill
94+
; COARSE-GRAINED-NEXT: st dj5, [sp, #-320] // 4-byte Folded Spill
95+
; COARSE-GRAINED-NEXT: movs dn5, dn3; mov dj5, m0
96+
; COARSE-GRAINED-NEXT: st m1, [sp, #-344] // 4-byte Folded Spill
97+
; COARSE-GRAINED-NEXT: st dj1, [sp, #-336] // 4-byte Folded Spill
98+
; COARSE-GRAINED-NEXT: st m5, [sp, #-328] // 4-byte Folded Spill
99+
; COARSE-GRAINED-NEXT: st dc5, [sp, #-316] // 4-byte Folded Spill
100+
; COARSE-GRAINED-NEXT: st m1, [sp, #-248] // 4-byte Folded Spill
101+
; COARSE-GRAINED-NEXT: st dj1, [sp, #-240] // 4-byte Folded Spill
102+
; COARSE-GRAINED-NEXT: st m5, [sp, #-232] // 4-byte Folded Spill
103+
; COARSE-GRAINED-NEXT: st dn1, [sp, #-244] // 4-byte Folded Spill
104+
; COARSE-GRAINED-NEXT: mova p0, #0; st dn5, [sp, #-228] // 4-byte Folded Spill
105+
; COARSE-GRAINED-NEXT: paddb.3d [p0], d1; st dj5, [sp, #-224] // 4-byte Folded Spill
106+
; COARSE-GRAINED-NEXT: st dc1, [sp, #-236] // 4-byte Folded Spill
107+
; COARSE-GRAINED-NEXT: mova p0, #0; st dc5, [sp, #-220] // 4-byte Folded Spill
108+
; COARSE-GRAINED-NEXT: .LBB0_2: // %for.body125.i
109+
; COARSE-GRAINED-NEXT: // Parent Loop BB0_1 Depth=1
110+
; COARSE-GRAINED-NEXT: // => This Inner Loop Header: Depth=2
111+
; COARSE-GRAINED-NEXT: nops ; mov dn1, dn3
112+
; COARSE-GRAINED-NEXT: movs m1, m3; mov dj1, dj3
113+
; COARSE-GRAINED-NEXT: movs dc1, dc3; mov dn5, dn7
114+
; COARSE-GRAINED-NEXT: movs m5, m7; mov dc5, dc7
115+
; COARSE-GRAINED-NEXT: movs dj5, dj7; mov r25, r3
116+
; COARSE-GRAINED-NEXT: movs p1, p0; vmov lfl1, x2
117+
; COARSE-GRAINED-NEXT: .L_LEnd0:
118+
; COARSE-GRAINED-NEXT: nopa ; vldb.pop.576.3d ex0, [p1, lf1, r25, d1]; nops ; nopx ; vmov lfh1, x3; nopv
119+
; COARSE-GRAINED-NEXT: // %bb.3: // %for.cond.cleanup124.i
120+
; COARSE-GRAINED-NEXT: // in Loop: Header=BB0_1 Depth=1
121+
; COARSE-GRAINED-NEXT: lda m2, [sp, #-344]; nopb ; nopx // 4-byte Folded Reload
122+
; COARSE-GRAINED-NEXT: lda dn2, [sp, #-276] // 4-byte Folded Reload
123+
; COARSE-GRAINED-NEXT: nop
124+
; COARSE-GRAINED-NEXT: nop
125+
; COARSE-GRAINED-NEXT: lda dj2, [sp, #-272] // 4-byte Folded Reload
126+
; COARSE-GRAINED-NEXT: lda m6, [sp, #-264] // 4-byte Folded Reload
127+
; COARSE-GRAINED-NEXT: lda dn6, [sp, #-260] // 4-byte Folded Reload
128+
; COARSE-GRAINED-NEXT: lda dj6, [sp, #-256] // 4-byte Folded Reload
129+
; COARSE-GRAINED-NEXT: lda dj0, [sp, #-304]; mov dn0, m2 // 4-byte Folded Reload
130+
; COARSE-GRAINED-NEXT: lda m4, [sp, #-296]; movs m0, m2; mov dn4, m2 // 4-byte Folded Reload
131+
; COARSE-GRAINED-NEXT: lda dj4, [sp, #-288]; st dn2, [sp, #-276] // 4-byte Folded Reload4-byte Folded Spill
132+
; COARSE-GRAINED-NEXT: st dj2, [sp, #-272] // 4-byte Folded Spill
133+
; COARSE-GRAINED-NEXT: lda dc0, [sp, #-300]; st m6, [sp, #-264] // 4-byte Folded Reload4-byte Folded Spill
134+
; COARSE-GRAINED-NEXT: lda dc4, [sp, #-284]; st dn6, [sp, #-260] // 4-byte Folded Reload4-byte Folded Spill
135+
; COARSE-GRAINED-NEXT: lda dc2, [sp, #-268]; st dj6, [sp, #-256] // 4-byte Folded Reload4-byte Folded Spill
136+
; COARSE-GRAINED-NEXT: lda dc6, [sp, #-252]; st dj0, [sp, #-304] // 4-byte Folded Reload4-byte Folded Spill
137+
; COARSE-GRAINED-NEXT: lda m2, [sp, #-280]; st m4, [sp, #-296] // 4-byte Folded Reload4-byte Folded Spill
138+
; COARSE-GRAINED-NEXT: st dj4, [sp, #-288] // 4-byte Folded Spill
139+
; COARSE-GRAINED-NEXT: st m0, [sp, #-312] // 4-byte Folded Spill
140+
; COARSE-GRAINED-NEXT: lda dj0, [sp, #-304]; st dn0, [sp, #-308]; mov p1, #0 // 4-byte Folded Reload4-byte Folded Spill
141+
; COARSE-GRAINED-NEXT: lda m4, [sp, #-296]; paddb.3d [p1], d0; st dn4, [sp, #-292] // 4-byte Folded Reload4-byte Folded Spill
142+
; COARSE-GRAINED-NEXT: lda dn0, [sp, #-308]; st dc0, [sp, #-300] // 4-byte Folded Reload4-byte Folded Spill
143+
; COARSE-GRAINED-NEXT: lda dn4, [sp, #-292]; st dc4, [sp, #-284]; mov p0, #0 // 4-byte Folded Reload4-byte Folded Spill
144+
; COARSE-GRAINED-NEXT: lda m2, [sp, #-344]; paddb.3d [p0], d2; st m2, [sp, #-280] // 4-byte Folded Reload4-byte Folded Spill
145+
; COARSE-GRAINED-NEXT: st dc2, [sp, #-268] // 4-byte Folded Spill
146+
; COARSE-GRAINED-NEXT: st dc6, [sp, #-252] // 4-byte Folded Spill
147+
; COARSE-GRAINED-NEXT: lda dj6, [sp, #-320]; st dj0, [sp, #-304] // 4-byte Folded Reload4-byte Folded Spill
148+
; COARSE-GRAINED-NEXT: st m4, [sp, #-296] // 4-byte Folded Spill
149+
; COARSE-GRAINED-NEXT: lda m6, [sp, #-328]; st dn0, [sp, #-308] // 4-byte Folded Reload4-byte Folded Spill
150+
; COARSE-GRAINED-NEXT: lda dc2, [sp, #-332]; st dn4, [sp, #-292] // 4-byte Folded Reload4-byte Folded Spill
151+
; COARSE-GRAINED-NEXT: mov dn2, m2
152+
; COARSE-GRAINED-NEXT: lda m2, [sp, #-216]; movs dj2, m2; mov dn6, m2 // 4-byte Folded Reload
153+
; COARSE-GRAINED-NEXT: lda m0, [sp, #-312]; movs dc6, m2; mov m0, m2 // 4-byte Folded Reload
154+
; COARSE-GRAINED-NEXT: lda dj4, [sp, #-288]; movs dj4, dj6; mov dc4, m2 // 4-byte Folded Reload
155+
; COARSE-GRAINED-NEXT: lda dc0, [sp, #-300]; st m0, [sp, #-344] // 4-byte Folded Reload4-byte Folded Spill
156+
; COARSE-GRAINED-NEXT: lda dc6, [sp, #-188]; st dj4, [sp, #-320]; xor r2, r8, r0; mov p0, #0 // 4-byte Folded Reload4-byte Folded Spill
157+
; COARSE-GRAINED-NEXT: st dc4, [sp, #-284]; paddb.3d [p0], d2; and r2, r2, r0 // 4-byte Folded Spill
158+
; COARSE-GRAINED-NEXT: movs dc0, dc2; jnz r2, #.LBB0_1
159+
; COARSE-GRAINED-NEXT: st dc0, [sp, #-332] // 4-byte Folded Spill Delay Slot 5
160+
; COARSE-GRAINED-NEXT: st m0, [sp, #-312] // 4-byte Folded Spill Delay Slot 4
161+
; COARSE-GRAINED-NEXT: st dj4, [sp, #-288] // 4-byte Folded Spill Delay Slot 3
162+
; COARSE-GRAINED-NEXT: st dc0, [sp, #-300] // 4-byte Folded Spill Delay Slot 2
163+
; COARSE-GRAINED-NEXT: movs m1, m2; mov dc5, dc6 // Delay Slot 1
164+
; COARSE-GRAINED-NEXT: // %bb.4: // %ret.exit
165+
; COARSE-GRAINED-NEXT: lda p6, [sp, #-384] // 4-byte Folded Reload
166+
; COARSE-GRAINED-NEXT: lda r15, [sp, #-380] // 4-byte Folded Reload
167+
; COARSE-GRAINED-NEXT: lda r14, [sp, #-376] // 4-byte Folded Reload
168+
; COARSE-GRAINED-NEXT: lda lr, [sp, #-348] // 4-byte Folded Reload
169+
; COARSE-GRAINED-NEXT: lda r13, [sp, #-372] // 4-byte Folded Reload
170+
; COARSE-GRAINED-NEXT: lda r12, [sp, #-368] // 4-byte Folded Reload
171+
; COARSE-GRAINED-NEXT: lda r11, [sp, #-364] // 4-byte Folded Reload
172+
; COARSE-GRAINED-NEXT: lda r10, [sp, #-360] // 4-byte Folded Reload
173+
; COARSE-GRAINED-NEXT: lda r9, [sp, #-356] // 4-byte Folded Reload
174+
; COARSE-GRAINED-NEXT: lda r8, [sp, #-352] // 4-byte Folded Reload
175+
; COARSE-GRAINED-NEXT: ret lr
176+
; COARSE-GRAINED-NEXT: nop // Delay Slot 5
177+
; COARSE-GRAINED-NEXT: nop // Delay Slot 4
178+
; COARSE-GRAINED-NEXT: nop // Delay Slot 3
179+
; COARSE-GRAINED-NEXT: paddxm [sp], #-384 // Delay Slot 2
180+
; COARSE-GRAINED-NEXT: nop // Delay Slot 1
181+
entry:
182+
tail call void null(ptr null, ptr null, ptr null)
183+
%2 = trunc i32 %dimsAI.sroa.11.0.copyload.i to i20
184+
%3 = trunc i32 %dimsAI.sroa.5.0.copyload.i to i20
185+
%4 = trunc i32 %dimsAI.sroa.7.0.copyload.i to i20
186+
%5 = trunc i32 %dimsAI.sroa.9.0.copyload.i to i20
187+
%6 = trunc i32 %dimsAO.sroa.7.0.copyload.i to i20
188+
%7 = trunc i32 %dimsAO.sroa.4.0.copyload.i to i20
189+
%8 = trunc i32 %dimsAO.sroa.6.0.copyload.i to i20
190+
%9 = trunc i32 %dimsAO.sroa.0.0.copyload.i to i20
191+
%10 = trunc i32 %dimsAO.sroa.5.0.copyload.i to i20
192+
%11 = trunc i32 %dimsW.sroa.4.0.copyload.i to i20
193+
%12 = trunc i32 %dimsW.sroa.6.0.copyload.i to i20
194+
br label %for.body.i
195+
196+
for.body.i: ; preds = %if.end239.i, %entry
197+
%dimsAI.sroa.13.0458.i = phi i32 [ 0, %entry ], [ %40, %if.end239.i ]
198+
%dimsAO.sroa.10.0457.i = phi i32 [ 0, %entry ], [ %29, %if.end239.i ]
199+
%dimsAO.sroa.8.0456.i = phi i32 [ 0, %entry ], [ %27, %if.end239.i ]
200+
%dimsW.sroa.10.0455.i = phi i32 [ 1, %entry ], [ 0, %if.end239.i ]
201+
%dimsW.sroa.8.0454.i = phi i32 [ 0, %entry ], [ %34, %if.end239.i ]
202+
%iterator_psum_cnt1.0452.i = phi i32 [ 0, %entry ], [ %22, %if.end239.i ]
203+
%iterator_pout_cnt0.0451.i = phi i32 [ 0, %entry ], [ %45, %if.end239.i ]
204+
%13 = trunc i32 0 to i20
205+
%14 = trunc i32 %iterator_psum_cnt1.0452.i to i20
206+
%15 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %13, i20 0, i20 %14)
207+
%16 = extractvalue { ptr, i20, i20 } %15, 2
208+
%17 = trunc i32 %dimsAI.sroa.13.0458.i to i20
209+
%18 = tail call { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5) null, <32 x i32> zeroinitializer, i32 0, i20 1, i20 0, i20 %17, i20 %0, i20 0, i20 0, i20 %0)
210+
%19 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %18, 5
211+
%20 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %18, 6
212+
br label %for.body125.i
213+
214+
for.cond.cleanup124.i: ; preds = %for.body125.i
215+
%21 = extractvalue { ptr, i20, i20 } %15, 1
216+
%22 = zext i20 %16 to i32
217+
%23 = trunc i32 %dimsAO.sroa.8.0456.i to i20
218+
%24 = trunc i32 %dimsAO.sroa.10.0457.i to i20
219+
%25 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 %6, i20 %7, i20 %8, i20 %9, i20 %23, i20 %10, i20 %24)
220+
%26 = extractvalue { ptr, i20, i20 } %25, 1
221+
%27 = zext i20 %26 to i32
222+
%28 = extractvalue { ptr, i20, i20 } %25, 2
223+
%29 = zext i20 %28 to i32
224+
%30 = trunc i32 %dimsW.sroa.8.0454.i to i20
225+
%31 = trunc i32 %dimsW.sroa.10.0455.i to i20
226+
%32 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 %11, i20 %12, i20 0, i20 %30, i20 0, i20 %31)
227+
%33 = extractvalue { ptr, i20, i20 } %32, 1
228+
%34 = zext i20 %33 to i32
229+
%35 = extractvalue { ptr, i20, i20 } %32, 2
230+
br i1 %1, label %if.else.i14, label %if.end239.i
231+
232+
for.body125.i: ; preds = %for.body125.i, %for.body.i
233+
%36 = trunc i32 0 to i20
234+
%37 = trunc i32 0 to i20
235+
%38 = tail call { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5) null, <32 x i32> zeroinitializer, i32 0, i20 %2, i20 0, i20 %36, i20 %3, i20 %4, i20 %37, i20 %5)
236+
%39 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %38, 3
237+
%40 = zext i20 %39 to i32
238+
%41 = call i1 @llvm.loop.decrement.i32(i32 0)
239+
br i1 %41, label %for.body125.i, label %for.cond.cleanup124.i
240+
241+
if.else.i14: ; preds = %for.cond.cleanup124.i
242+
%add.ptr.i327.i = getelementptr i8, ptr null, i20 0
243+
br label %if.end239.i
244+
245+
if.end239.i: ; preds = %if.else.i14, %for.cond.cleanup124.i
246+
%42 = trunc i32 %iterator_pout_cnt0.0451.i to i20
247+
%43 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 1, i20 0, i20 %42, i20 0, i20 0)
248+
%44 = extractvalue { ptr, i20, i20 } %43, 1
249+
%45 = zext i20 %44 to i32
250+
%46 = extractvalue { ptr, i20, i20 } %43, 2
251+
br i1 %1, label %ret.exit, label %for.body.i
252+
253+
ret.exit: ; preds = %if.end239.i
254+
ret void
255+
}
256+
257+
; Function Attrs: nounwind memory(none)
258+
declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #0
259+
260+
; Function Attrs: nounwind memory(argmem: read)
261+
declare { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5), <32 x i32>, i32, i20, i20, i20, i20, i20, i20, i20) #1
262+
263+
; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
264+
declare i1 @llvm.loop.decrement.i32(i32) #2
265+
266+
; uselistorder directives
267+
uselistorder ptr @llvm.aie2p.add.3d, { 3, 2, 1, 0 }
268+
uselistorder ptr @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5, { 1, 0 }
269+
270+
attributes #0 = { nounwind memory(none) }
271+
attributes #1 = { nounwind memory(argmem: read) }
272+
attributes #2 = { nocallback noduplicate nofree nosync nounwind willreturn }

0 commit comments

Comments
 (0)