|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | + |
| 3 | +; |
| 4 | +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. |
| 5 | +; See https://llvm.org/LICENSE.txt for license information. |
| 6 | +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | +; |
| 8 | +; (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates |
| 9 | +; RUN: not llc -mtriple aie2p -o %t.s %s 2>&1 | FileCheck %s --check-prefix=BUNDLE-ERROR |
| 10 | +; RUN: llc -mtriple=aie2p --aie-staged-ra-fine-grained-alloc=false %s -o - | FileCheck %s --check-prefix=COARSE-GRAINED |
| 11 | + |
| 12 | +; Function Attrs: nounwind readnone |
| 13 | +; BUNDLE-ERROR: error: register rewriting failed: cycle in copy bundle |
| 14 | +define void @heavy_3d_user(i32 %dimsAI.sroa.5.0.copyload.i, i32 %dimsAI.sroa.7.0.copyload.i, i32 %dimsAI.sroa.9.0.copyload.i, i32 %dimsAO.sroa.7.0.copyload.i, i32 %dimsAO.sroa.4.0.copyload.i, i32 %dimsAO.sroa.6.0.copyload.i, i32 %dimsAO.sroa.0.0.copyload.i, i32 %dimsAO.sroa.5.0.copyload.i, i32 %dimsW.sroa.4.0.copyload.i, i32 %dimsW.sroa.6.0.copyload.i, i20 %0, i1 %1, i32 %dimsAI.sroa.11.0.copyload.i) { |
| 15 | +; COARSE-GRAINED-LABEL: heavy_3d_user: |
| 16 | +; COARSE-GRAINED: // %bb.0: // %entry |
| 17 | +; COARSE-GRAINED-NEXT: nopa ; nopb ; paddxm [sp], #384; nops |
| 18 | +; COARSE-GRAINED-NEXT: mova m0, #-388; st r9, [sp, #-356]; mov p1, sp // 4-byte Folded Spill |
| 19 | +; COARSE-GRAINED-NEXT: mova m0, #-392; paddb [p1], m0; st r10, [sp, #-360] // 4-byte Folded Spill |
| 20 | +; COARSE-GRAINED-NEXT: lda dj0, [p1, #0]; st r11, [sp, #-364]; mov p1, sp // 4-byte Folded Spill |
| 21 | +; COARSE-GRAINED-NEXT: mova m0, #-400; paddb [p1], m0; st r12, [sp, #-368] // 4-byte Folded Spill |
| 22 | +; COARSE-GRAINED-NEXT: lda dj4, [p1, #0]; st r13, [sp, #-372]; mov p1, sp // 4-byte Folded Spill |
| 23 | +; COARSE-GRAINED-NEXT: padda [p1], m0; st r14, [sp, #-376] // 4-byte Folded Spill |
| 24 | +; COARSE-GRAINED-NEXT: lda m0, [p1, #0]; st r15, [sp, #-380] // 4-byte Folded Spill |
| 25 | +; COARSE-GRAINED-NEXT: st p6, [sp, #-384] // 4-byte Folded Spill |
| 26 | +; COARSE-GRAINED-NEXT: mova r16, #0; st lr, [sp, #-348] // 4-byte Folded Spill |
| 27 | +; COARSE-GRAINED-NEXT: st r8, [sp, #-352]; vbcst.32 x0, r16 // 4-byte Folded Spill |
| 28 | +; COARSE-GRAINED-NEXT: st r0, [sp, #-248]; mov p6, p0 // 4-byte Folded Spill |
| 29 | +; COARSE-GRAINED-NEXT: vst x0, [sp, #-128]; mov p1, sp // 64-byte Folded Spill |
| 30 | +; COARSE-GRAINED-NEXT: st dj0, [sp, #-304] // 4-byte Folded Spill |
| 31 | +; COARSE-GRAINED-NEXT: mova m0, #-396; st m0, [sp, #-280] // 4-byte Folded Spill |
| 32 | +; COARSE-GRAINED-NEXT: padda [p1], m0; st dj0, [sp, #-272]; vmov x1, x0 // 4-byte Folded Spill |
| 33 | +; COARSE-GRAINED-NEXT: lda r8, [p1, #0]; st dj0, [sp, #-336]; mov p3, #0 // 4-byte Folded Spill |
| 34 | +; COARSE-GRAINED-NEXT: vst x1, [sp, #-64]; jl p3 // 64-byte Folded Spill |
| 35 | +; COARSE-GRAINED-NEXT: mova p2, #0; st dj4, [sp, #-288] // 4-byte Folded Spill Delay Slot 5 |
| 36 | +; COARSE-GRAINED-NEXT: mova dj4, #1; st dj4, [sp, #-256]; mov r9, r1 // 4-byte Folded Spill Delay Slot 4 |
| 37 | +; COARSE-GRAINED-NEXT: mova m0, #0; st dj4, [sp, #-320]; or r10, r2, r2; mov r11, r3 // 4-byte Folded Spill Delay Slot 3 |
| 38 | +; COARSE-GRAINED-NEXT: mova p0, #0; st m0, [sp, #-344]; or r12, r4, r4; mov r13, r5 // 4-byte Folded Spill Delay Slot 2 |
| 39 | +; COARSE-GRAINED-NEXT: mova p1, #0; or r14, r6, r6; mov r15, r7 // Delay Slot 1 |
| 40 | +; COARSE-GRAINED-NEXT: lda m1, [sp, #-344]; nopb ; nopxm // 4-byte Folded Reload |
| 41 | +; COARSE-GRAINED-NEXT: lda dj5, [sp, #-320] // 4-byte Folded Reload |
| 42 | +; COARSE-GRAINED-NEXT: lda m4, [sp, #-296]; mov dn4, r15 // 4-byte Folded Reload |
| 43 | +; COARSE-GRAINED-NEXT: st dn4, [sp, #-260]; mov dj0, r12 // 4-byte Folded Spill |
| 44 | +; COARSE-GRAINED-NEXT: st dj0, [sp, #-272]; mov dn0, r14 // 4-byte Folded Spill |
| 45 | +; COARSE-GRAINED-NEXT: mova dc3, #0; st dn0, [sp, #-276]; mov m0, r11 // 4-byte Folded Spill |
| 46 | +; COARSE-GRAINED-NEXT: lda m3, [sp, #-280]; movs dj4, r13; mov dc7, dc3 // 4-byte Folded Reload |
| 47 | +; COARSE-GRAINED-NEXT: lda m0, [sp, #-312]; st m0, [sp, #-280] // 4-byte Folded Reload4-byte Folded Spill |
| 48 | +; COARSE-GRAINED-NEXT: lda dj4, [sp, #-288]; st dj4, [sp, #-256] // 4-byte Folded Reload4-byte Folded Spill |
| 49 | +; COARSE-GRAINED-NEXT: lda m5, [sp, #-328]; movs dj6, dj5; mov m2, m1 // 4-byte Folded Reload |
| 50 | +; COARSE-GRAINED-NEXT: lda dn0, [sp, #-308]; movs dn3, m1; mov m1, dj5 // 4-byte Folded Reload |
| 51 | +; COARSE-GRAINED-NEXT: lda dj0, [sp, #-304]; st m4, [sp, #-296] // 4-byte Folded Reload4-byte Folded Spill |
| 52 | +; COARSE-GRAINED-NEXT: lda dn4, [sp, #-292]; st m4, [sp, #-328] // 4-byte Folded Reload4-byte Folded Spill |
| 53 | +; COARSE-GRAINED-NEXT: movs dc0, m2; mov dc6, m2 |
| 54 | +; COARSE-GRAINED-NEXT: st m0, [sp, #-312] // 4-byte Folded Spill |
| 55 | +; COARSE-GRAINED-NEXT: st dj4, [sp, #-288] // 4-byte Folded Spill |
| 56 | +; COARSE-GRAINED-NEXT: movs m0, m2; mov dc4, m2 |
| 57 | +; COARSE-GRAINED-NEXT: st dn0, [sp, #-308] // 4-byte Folded Spill |
| 58 | +; COARSE-GRAINED-NEXT: st dj0, [sp, #-304] // 4-byte Folded Spill |
| 59 | +; COARSE-GRAINED-NEXT: lda dj3, [sp, #-248]; st dn4, [sp, #-292] // 4-byte Folded Reload4-byte Folded Spill |
| 60 | +; COARSE-GRAINED-NEXT: st m2, [sp, #-248] // 4-byte Folded Spill |
| 61 | +; COARSE-GRAINED-NEXT: st dj6, [sp, #-224] // 4-byte Folded Spill |
| 62 | +; COARSE-GRAINED-NEXT: st dn0, [sp, #-340] // 4-byte Folded Spill |
| 63 | +; COARSE-GRAINED-NEXT: st dj0, [sp, #-336] // 4-byte Folded Spill |
| 64 | +; COARSE-GRAINED-NEXT: st dn4, [sp, #-324] // 4-byte Folded Spill |
| 65 | +; COARSE-GRAINED-NEXT: st dc4, [sp, #-252] // 4-byte Folded Spill |
| 66 | +; COARSE-GRAINED-NEXT: vlda x2, [sp, #-128]; movs dj4, dj5; mov dc4, dj5 // 64-byte Folded Reload |
| 67 | +; COARSE-GRAINED-NEXT: vlda x3, [sp, #-64]; st dc0, [sp, #-268] // 64-byte Folded Reload4-byte Folded Spill |
| 68 | +; COARSE-GRAINED-NEXT: st dc0, [sp, #-300] // 4-byte Folded Spill |
| 69 | +; COARSE-GRAINED-NEXT: st dc6, [sp, #-220] // 4-byte Folded Spill |
| 70 | +; COARSE-GRAINED-NEXT: st m0, [sp, #-344] // 4-byte Folded Spill |
| 71 | +; COARSE-GRAINED-NEXT: st dc0, [sp, #-332]; mov dn7, r9 // 4-byte Folded Spill |
| 72 | +; COARSE-GRAINED-NEXT: st dj4, [sp, #-320]; mov dj7, r10 // 4-byte Folded Spill |
| 73 | +; COARSE-GRAINED-NEXT: st dc4, [sp, #-284]; vmov lfl0, x2 // 4-byte Folded Spill |
| 74 | +; COARSE-GRAINED-NEXT: lda m7, [sp, #-264]; st dc4, [sp, #-316]; movx r0, #1; vmov lfh0, x3 // 4-byte Folded Reload4-byte Folded Spill |
| 75 | +; COARSE-GRAINED-NEXT: mova r3, #0; movs dc5, m2; and r1, r8, r0; mov dc1, m2 |
| 76 | +; COARSE-GRAINED-NEXT: .LBB0_1: // %for.body.i |
| 77 | +; COARSE-GRAINED-NEXT: // =>This Loop Header: Depth=1 |
| 78 | +; COARSE-GRAINED-NEXT: // Child Loop BB0_2 Depth 2 |
| 79 | +; COARSE-GRAINED-NEXT: lda m0, [sp, #-344]; nopb ; nopx // 4-byte Folded Reload |
| 80 | +; COARSE-GRAINED-NEXT: lda dc0, [sp, #-332] // 4-byte Folded Reload |
| 81 | +; COARSE-GRAINED-NEXT: lda dj4, [sp, #-320] // 4-byte Folded Reload |
| 82 | +; COARSE-GRAINED-NEXT: nop |
| 83 | +; COARSE-GRAINED-NEXT: lda dn1, [sp, #-244]; movs dj1, p6; mov dn1, dn3 // 4-byte Folded Reload |
| 84 | +; COARSE-GRAINED-NEXT: movs dn5, dn3; mov m2, m1 |
| 85 | +; COARSE-GRAINED-NEXT: lda dn5, [sp, #-228]; movs dj5, p6; mov dc6, dc5 // 4-byte Folded Reload |
| 86 | +; COARSE-GRAINED-NEXT: mova p1, #0; st m2, [sp, #-216]; mov r25, r3 // 4-byte Folded Spill |
| 87 | +; COARSE-GRAINED-NEXT: vldb.pop.576.3d ex0, [p1, lf1, r25, d1]; st dc6, [sp, #-188] // 4-byte Folded Spill |
| 88 | +; COARSE-GRAINED-NEXT: movs dc1, dc0; mov dj1, m0 |
| 89 | +; COARSE-GRAINED-NEXT: movs m1, m0; mov dj5, dj4 |
| 90 | +; COARSE-GRAINED-NEXT: st dn1, [sp, #-340]; vmov lfl1, lfl0 // 4-byte Folded Spill |
| 91 | +; COARSE-GRAINED-NEXT: lda m5, [sp, #-232]; st dc1, [sp, #-332]; vmov lfh1, lfh0 // 4-byte Folded Reload4-byte Folded Spill |
| 92 | +; COARSE-GRAINED-NEXT: lda dc5, [sp, #-220]; movs dn1, dn3; mov dc1, dc3 // 4-byte Folded Reload |
| 93 | +; COARSE-GRAINED-NEXT: st dn5, [sp, #-324] // 4-byte Folded Spill |
| 94 | +; COARSE-GRAINED-NEXT: st dj5, [sp, #-320] // 4-byte Folded Spill |
| 95 | +; COARSE-GRAINED-NEXT: movs dn5, dn3; mov dj5, m0 |
| 96 | +; COARSE-GRAINED-NEXT: st m1, [sp, #-344] // 4-byte Folded Spill |
| 97 | +; COARSE-GRAINED-NEXT: st dj1, [sp, #-336] // 4-byte Folded Spill |
| 98 | +; COARSE-GRAINED-NEXT: st m5, [sp, #-328] // 4-byte Folded Spill |
| 99 | +; COARSE-GRAINED-NEXT: st dc5, [sp, #-316] // 4-byte Folded Spill |
| 100 | +; COARSE-GRAINED-NEXT: st m1, [sp, #-248] // 4-byte Folded Spill |
| 101 | +; COARSE-GRAINED-NEXT: st dj1, [sp, #-240] // 4-byte Folded Spill |
| 102 | +; COARSE-GRAINED-NEXT: st m5, [sp, #-232] // 4-byte Folded Spill |
| 103 | +; COARSE-GRAINED-NEXT: st dn1, [sp, #-244] // 4-byte Folded Spill |
| 104 | +; COARSE-GRAINED-NEXT: mova p0, #0; st dn5, [sp, #-228] // 4-byte Folded Spill |
| 105 | +; COARSE-GRAINED-NEXT: paddb.3d [p0], d1; st dj5, [sp, #-224] // 4-byte Folded Spill |
| 106 | +; COARSE-GRAINED-NEXT: st dc1, [sp, #-236] // 4-byte Folded Spill |
| 107 | +; COARSE-GRAINED-NEXT: mova p0, #0; st dc5, [sp, #-220] // 4-byte Folded Spill |
| 108 | +; COARSE-GRAINED-NEXT: .LBB0_2: // %for.body125.i |
| 109 | +; COARSE-GRAINED-NEXT: // Parent Loop BB0_1 Depth=1 |
| 110 | +; COARSE-GRAINED-NEXT: // => This Inner Loop Header: Depth=2 |
| 111 | +; COARSE-GRAINED-NEXT: nops ; mov dn1, dn3 |
| 112 | +; COARSE-GRAINED-NEXT: movs m1, m3; mov dj1, dj3 |
| 113 | +; COARSE-GRAINED-NEXT: movs dc1, dc3; mov dn5, dn7 |
| 114 | +; COARSE-GRAINED-NEXT: movs m5, m7; mov dc5, dc7 |
| 115 | +; COARSE-GRAINED-NEXT: movs dj5, dj7; mov r25, r3 |
| 116 | +; COARSE-GRAINED-NEXT: movs p1, p0; vmov lfl1, x2 |
| 117 | +; COARSE-GRAINED-NEXT: .L_LEnd0: |
| 118 | +; COARSE-GRAINED-NEXT: nopa ; vldb.pop.576.3d ex0, [p1, lf1, r25, d1]; nops ; nopx ; vmov lfh1, x3; nopv |
| 119 | +; COARSE-GRAINED-NEXT: // %bb.3: // %for.cond.cleanup124.i |
| 120 | +; COARSE-GRAINED-NEXT: // in Loop: Header=BB0_1 Depth=1 |
| 121 | +; COARSE-GRAINED-NEXT: lda m2, [sp, #-344]; nopb ; nopx // 4-byte Folded Reload |
| 122 | +; COARSE-GRAINED-NEXT: lda dn2, [sp, #-276] // 4-byte Folded Reload |
| 123 | +; COARSE-GRAINED-NEXT: nop |
| 124 | +; COARSE-GRAINED-NEXT: nop |
| 125 | +; COARSE-GRAINED-NEXT: lda dj2, [sp, #-272] // 4-byte Folded Reload |
| 126 | +; COARSE-GRAINED-NEXT: lda m6, [sp, #-264] // 4-byte Folded Reload |
| 127 | +; COARSE-GRAINED-NEXT: lda dn6, [sp, #-260] // 4-byte Folded Reload |
| 128 | +; COARSE-GRAINED-NEXT: lda dj6, [sp, #-256] // 4-byte Folded Reload |
| 129 | +; COARSE-GRAINED-NEXT: lda dj0, [sp, #-304]; mov dn0, m2 // 4-byte Folded Reload |
| 130 | +; COARSE-GRAINED-NEXT: lda m4, [sp, #-296]; movs m0, m2; mov dn4, m2 // 4-byte Folded Reload |
| 131 | +; COARSE-GRAINED-NEXT: lda dj4, [sp, #-288]; st dn2, [sp, #-276] // 4-byte Folded Reload4-byte Folded Spill |
| 132 | +; COARSE-GRAINED-NEXT: st dj2, [sp, #-272] // 4-byte Folded Spill |
| 133 | +; COARSE-GRAINED-NEXT: lda dc0, [sp, #-300]; st m6, [sp, #-264] // 4-byte Folded Reload4-byte Folded Spill |
| 134 | +; COARSE-GRAINED-NEXT: lda dc4, [sp, #-284]; st dn6, [sp, #-260] // 4-byte Folded Reload4-byte Folded Spill |
| 135 | +; COARSE-GRAINED-NEXT: lda dc2, [sp, #-268]; st dj6, [sp, #-256] // 4-byte Folded Reload4-byte Folded Spill |
| 136 | +; COARSE-GRAINED-NEXT: lda dc6, [sp, #-252]; st dj0, [sp, #-304] // 4-byte Folded Reload4-byte Folded Spill |
| 137 | +; COARSE-GRAINED-NEXT: lda m2, [sp, #-280]; st m4, [sp, #-296] // 4-byte Folded Reload4-byte Folded Spill |
| 138 | +; COARSE-GRAINED-NEXT: st dj4, [sp, #-288] // 4-byte Folded Spill |
| 139 | +; COARSE-GRAINED-NEXT: st m0, [sp, #-312] // 4-byte Folded Spill |
| 140 | +; COARSE-GRAINED-NEXT: lda dj0, [sp, #-304]; st dn0, [sp, #-308]; mov p1, #0 // 4-byte Folded Reload4-byte Folded Spill |
| 141 | +; COARSE-GRAINED-NEXT: lda m4, [sp, #-296]; paddb.3d [p1], d0; st dn4, [sp, #-292] // 4-byte Folded Reload4-byte Folded Spill |
| 142 | +; COARSE-GRAINED-NEXT: lda dn0, [sp, #-308]; st dc0, [sp, #-300] // 4-byte Folded Reload4-byte Folded Spill |
| 143 | +; COARSE-GRAINED-NEXT: lda dn4, [sp, #-292]; st dc4, [sp, #-284]; mov p0, #0 // 4-byte Folded Reload4-byte Folded Spill |
| 144 | +; COARSE-GRAINED-NEXT: lda m2, [sp, #-344]; paddb.3d [p0], d2; st m2, [sp, #-280] // 4-byte Folded Reload4-byte Folded Spill |
| 145 | +; COARSE-GRAINED-NEXT: st dc2, [sp, #-268] // 4-byte Folded Spill |
| 146 | +; COARSE-GRAINED-NEXT: st dc6, [sp, #-252] // 4-byte Folded Spill |
| 147 | +; COARSE-GRAINED-NEXT: lda dj6, [sp, #-320]; st dj0, [sp, #-304] // 4-byte Folded Reload4-byte Folded Spill |
| 148 | +; COARSE-GRAINED-NEXT: st m4, [sp, #-296] // 4-byte Folded Spill |
| 149 | +; COARSE-GRAINED-NEXT: lda m6, [sp, #-328]; st dn0, [sp, #-308] // 4-byte Folded Reload4-byte Folded Spill |
| 150 | +; COARSE-GRAINED-NEXT: lda dc2, [sp, #-332]; st dn4, [sp, #-292] // 4-byte Folded Reload4-byte Folded Spill |
| 151 | +; COARSE-GRAINED-NEXT: mov dn2, m2 |
| 152 | +; COARSE-GRAINED-NEXT: lda m2, [sp, #-216]; movs dj2, m2; mov dn6, m2 // 4-byte Folded Reload |
| 153 | +; COARSE-GRAINED-NEXT: lda m0, [sp, #-312]; movs dc6, m2; mov m0, m2 // 4-byte Folded Reload |
| 154 | +; COARSE-GRAINED-NEXT: lda dj4, [sp, #-288]; movs dj4, dj6; mov dc4, m2 // 4-byte Folded Reload |
| 155 | +; COARSE-GRAINED-NEXT: lda dc0, [sp, #-300]; st m0, [sp, #-344] // 4-byte Folded Reload4-byte Folded Spill |
| 156 | +; COARSE-GRAINED-NEXT: lda dc6, [sp, #-188]; st dj4, [sp, #-320]; xor r2, r8, r0; mov p0, #0 // 4-byte Folded Reload4-byte Folded Spill |
| 157 | +; COARSE-GRAINED-NEXT: st dc4, [sp, #-284]; paddb.3d [p0], d2; and r2, r2, r0 // 4-byte Folded Spill |
| 158 | +; COARSE-GRAINED-NEXT: movs dc0, dc2; jnz r2, #.LBB0_1 |
| 159 | +; COARSE-GRAINED-NEXT: st dc0, [sp, #-332] // 4-byte Folded Spill Delay Slot 5 |
| 160 | +; COARSE-GRAINED-NEXT: st m0, [sp, #-312] // 4-byte Folded Spill Delay Slot 4 |
| 161 | +; COARSE-GRAINED-NEXT: st dj4, [sp, #-288] // 4-byte Folded Spill Delay Slot 3 |
| 162 | +; COARSE-GRAINED-NEXT: st dc0, [sp, #-300] // 4-byte Folded Spill Delay Slot 2 |
| 163 | +; COARSE-GRAINED-NEXT: movs m1, m2; mov dc5, dc6 // Delay Slot 1 |
| 164 | +; COARSE-GRAINED-NEXT: // %bb.4: // %ret.exit |
| 165 | +; COARSE-GRAINED-NEXT: lda p6, [sp, #-384] // 4-byte Folded Reload |
| 166 | +; COARSE-GRAINED-NEXT: lda r15, [sp, #-380] // 4-byte Folded Reload |
| 167 | +; COARSE-GRAINED-NEXT: lda r14, [sp, #-376] // 4-byte Folded Reload |
| 168 | +; COARSE-GRAINED-NEXT: lda lr, [sp, #-348] // 4-byte Folded Reload |
| 169 | +; COARSE-GRAINED-NEXT: lda r13, [sp, #-372] // 4-byte Folded Reload |
| 170 | +; COARSE-GRAINED-NEXT: lda r12, [sp, #-368] // 4-byte Folded Reload |
| 171 | +; COARSE-GRAINED-NEXT: lda r11, [sp, #-364] // 4-byte Folded Reload |
| 172 | +; COARSE-GRAINED-NEXT: lda r10, [sp, #-360] // 4-byte Folded Reload |
| 173 | +; COARSE-GRAINED-NEXT: lda r9, [sp, #-356] // 4-byte Folded Reload |
| 174 | +; COARSE-GRAINED-NEXT: lda r8, [sp, #-352] // 4-byte Folded Reload |
| 175 | +; COARSE-GRAINED-NEXT: ret lr |
| 176 | +; COARSE-GRAINED-NEXT: nop // Delay Slot 5 |
| 177 | +; COARSE-GRAINED-NEXT: nop // Delay Slot 4 |
| 178 | +; COARSE-GRAINED-NEXT: nop // Delay Slot 3 |
| 179 | +; COARSE-GRAINED-NEXT: paddxm [sp], #-384 // Delay Slot 2 |
| 180 | +; COARSE-GRAINED-NEXT: nop // Delay Slot 1 |
| 181 | +entry: |
| 182 | + tail call void null(ptr null, ptr null, ptr null) |
| 183 | + %2 = trunc i32 %dimsAI.sroa.11.0.copyload.i to i20 |
| 184 | + %3 = trunc i32 %dimsAI.sroa.5.0.copyload.i to i20 |
| 185 | + %4 = trunc i32 %dimsAI.sroa.7.0.copyload.i to i20 |
| 186 | + %5 = trunc i32 %dimsAI.sroa.9.0.copyload.i to i20 |
| 187 | + %6 = trunc i32 %dimsAO.sroa.7.0.copyload.i to i20 |
| 188 | + %7 = trunc i32 %dimsAO.sroa.4.0.copyload.i to i20 |
| 189 | + %8 = trunc i32 %dimsAO.sroa.6.0.copyload.i to i20 |
| 190 | + %9 = trunc i32 %dimsAO.sroa.0.0.copyload.i to i20 |
| 191 | + %10 = trunc i32 %dimsAO.sroa.5.0.copyload.i to i20 |
| 192 | + %11 = trunc i32 %dimsW.sroa.4.0.copyload.i to i20 |
| 193 | + %12 = trunc i32 %dimsW.sroa.6.0.copyload.i to i20 |
| 194 | + br label %for.body.i |
| 195 | + |
| 196 | +for.body.i: ; preds = %if.end239.i, %entry |
| 197 | + %dimsAI.sroa.13.0458.i = phi i32 [ 0, %entry ], [ %40, %if.end239.i ] |
| 198 | + %dimsAO.sroa.10.0457.i = phi i32 [ 0, %entry ], [ %29, %if.end239.i ] |
| 199 | + %dimsAO.sroa.8.0456.i = phi i32 [ 0, %entry ], [ %27, %if.end239.i ] |
| 200 | + %dimsW.sroa.10.0455.i = phi i32 [ 1, %entry ], [ 0, %if.end239.i ] |
| 201 | + %dimsW.sroa.8.0454.i = phi i32 [ 0, %entry ], [ %34, %if.end239.i ] |
| 202 | + %iterator_psum_cnt1.0452.i = phi i32 [ 0, %entry ], [ %22, %if.end239.i ] |
| 203 | + %iterator_pout_cnt0.0451.i = phi i32 [ 0, %entry ], [ %45, %if.end239.i ] |
| 204 | + %13 = trunc i32 0 to i20 |
| 205 | + %14 = trunc i32 %iterator_psum_cnt1.0452.i to i20 |
| 206 | + %15 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %13, i20 0, i20 %14) |
| 207 | + %16 = extractvalue { ptr, i20, i20 } %15, 2 |
| 208 | + %17 = trunc i32 %dimsAI.sroa.13.0458.i to i20 |
| 209 | + %18 = tail call { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5) null, <32 x i32> zeroinitializer, i32 0, i20 1, i20 0, i20 %17, i20 %0, i20 0, i20 0, i20 %0) |
| 210 | + %19 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %18, 5 |
| 211 | + %20 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %18, 6 |
| 212 | + br label %for.body125.i |
| 213 | + |
| 214 | +for.cond.cleanup124.i: ; preds = %for.body125.i |
| 215 | + %21 = extractvalue { ptr, i20, i20 } %15, 1 |
| 216 | + %22 = zext i20 %16 to i32 |
| 217 | + %23 = trunc i32 %dimsAO.sroa.8.0456.i to i20 |
| 218 | + %24 = trunc i32 %dimsAO.sroa.10.0457.i to i20 |
| 219 | + %25 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 %6, i20 %7, i20 %8, i20 %9, i20 %23, i20 %10, i20 %24) |
| 220 | + %26 = extractvalue { ptr, i20, i20 } %25, 1 |
| 221 | + %27 = zext i20 %26 to i32 |
| 222 | + %28 = extractvalue { ptr, i20, i20 } %25, 2 |
| 223 | + %29 = zext i20 %28 to i32 |
| 224 | + %30 = trunc i32 %dimsW.sroa.8.0454.i to i20 |
| 225 | + %31 = trunc i32 %dimsW.sroa.10.0455.i to i20 |
| 226 | + %32 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 %11, i20 %12, i20 0, i20 %30, i20 0, i20 %31) |
| 227 | + %33 = extractvalue { ptr, i20, i20 } %32, 1 |
| 228 | + %34 = zext i20 %33 to i32 |
| 229 | + %35 = extractvalue { ptr, i20, i20 } %32, 2 |
| 230 | + br i1 %1, label %if.else.i14, label %if.end239.i |
| 231 | + |
| 232 | +for.body125.i: ; preds = %for.body125.i, %for.body.i |
| 233 | + %36 = trunc i32 0 to i20 |
| 234 | + %37 = trunc i32 0 to i20 |
| 235 | + %38 = tail call { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5) null, <32 x i32> zeroinitializer, i32 0, i20 %2, i20 0, i20 %36, i20 %3, i20 %4, i20 %37, i20 %5) |
| 236 | + %39 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %38, 3 |
| 237 | + %40 = zext i20 %39 to i32 |
| 238 | + %41 = call i1 @llvm.loop.decrement.i32(i32 0) |
| 239 | + br i1 %41, label %for.body125.i, label %for.cond.cleanup124.i |
| 240 | + |
| 241 | +if.else.i14: ; preds = %for.cond.cleanup124.i |
| 242 | + %add.ptr.i327.i = getelementptr i8, ptr null, i20 0 |
| 243 | + br label %if.end239.i |
| 244 | + |
| 245 | +if.end239.i: ; preds = %if.else.i14, %for.cond.cleanup124.i |
| 246 | + %42 = trunc i32 %iterator_pout_cnt0.0451.i to i20 |
| 247 | + %43 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 1, i20 0, i20 %42, i20 0, i20 0) |
| 248 | + %44 = extractvalue { ptr, i20, i20 } %43, 1 |
| 249 | + %45 = zext i20 %44 to i32 |
| 250 | + %46 = extractvalue { ptr, i20, i20 } %43, 2 |
| 251 | + br i1 %1, label %ret.exit, label %for.body.i |
| 252 | + |
| 253 | +ret.exit: ; preds = %if.end239.i |
| 254 | + ret void |
| 255 | +} |
| 256 | + |
| 257 | +; Function Attrs: nounwind memory(none) |
| 258 | +declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #0 |
| 259 | + |
| 260 | +; Function Attrs: nounwind memory(argmem: read) |
| 261 | +declare { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5), <32 x i32>, i32, i20, i20, i20, i20, i20, i20, i20) #1 |
| 262 | + |
| 263 | +; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn |
| 264 | +declare i1 @llvm.loop.decrement.i32(i32) #2 |
| 265 | + |
| 266 | +; uselistorder directives |
| 267 | +uselistorder ptr @llvm.aie2p.add.3d, { 3, 2, 1, 0 } |
| 268 | +uselistorder ptr @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5, { 1, 0 } |
| 269 | + |
| 270 | +attributes #0 = { nounwind memory(none) } |
| 271 | +attributes #1 = { nounwind memory(argmem: read) } |
| 272 | +attributes #2 = { nocallback noduplicate nofree nosync nounwind willreturn } |
0 commit comments