Skip to content

Commit 4abfc43

Browse files
Martien de Jongmartien-de-jong
authored andcommitted
[AIE] Change the default of aie-addrspace-none-is-safe to true
This enables scheduling loads in A and B together if either of them doesn't have a memorybank annotation.
1 parent b1ac573 commit 4abfc43

File tree

12 files changed

+190
-221
lines changed

12 files changed

+190
-221
lines changed

llvm/lib/Target/AIE/AIEHazardRecognizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
7-
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
7+
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
88
//
99
//===----------------------------------------------------------------------===//
1010
//
@@ -44,7 +44,7 @@ static cl::opt<unsigned>
4444
cl::desc("Override maximum scoreboard depth to use."));
4545

4646
static cl::opt<bool> AddressSpaceNoneIsSafe(
47-
"aie-addrspace-none-is-safe", cl::Hidden, cl::init(false),
47+
"aie-addrspace-none-is-safe", cl::Hidden, cl::init(true),
4848
cl::desc("Assume that addrspace(0) doesn't cause conflicts."));
4949

5050
const AIEBaseMCFormats *FuncUnitWrapper::FormatInterface = nullptr;

llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll

Lines changed: 48 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; See https://llvm.org/LICENSE.txt for license information.
55
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
;
7-
; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
7+
; (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
88
; RUN: llc -O2 -mtriple=aie2 %s -o - | FileCheck %s --check-prefix=ASM
99

1010
; This is a reduced version of the Add2D_0 MLLib benchmark which only contains
@@ -35,60 +35,50 @@ define void @add2d(ptr noalias %params, ptr noalias %ifm1_data, ptr noalias %ifm
3535
; ASM-LABEL: add2d:
3636
; ASM: .p2align 4
3737
; ASM-NEXT: // %bb.0: // %newFuncRoot
38-
; ASM-NEXT: paddb [sp], #32; nopx
39-
; ASM-NEXT: st p7, [sp, #-32] // 4-byte Folded Spill
40-
; ASM-NEXT: paddb [p0], #40; st p6, [sp, #-28] // 4-byte Folded Spill
38+
; ASM-NEXT: paddb [p0], #40; nopx
4139
; ASM-NEXT: lda m2, [p0], #-4
4240
; ASM-NEXT: lda m3, [p0], #8
4341
; ASM-NEXT: lda m5, [p0], #8
4442
; ASM-NEXT: lda m4, [p0], #-24
45-
; ASM-NEXT: lda r4, [p0], #36
46-
; ASM-NEXT: lda r2, [p0], #-32
47-
; ASM-NEXT: lda r0, [p0], #-12; mov p6, sp
48-
; ASM-NEXT: lda r1, [p0], #40; paddb [p6], #-36
49-
; ASM-NEXT: lda p6, [p6, #0]; mov p7, sp
50-
; ASM-NEXT: paddb [p7], #-40
51-
; ASM-NEXT: lda r5, [p7, #0]
52-
; ASM-NEXT: lda m1, [p0], #36
43+
; ASM-NEXT: lda r4, [p0], #36; paddb [sp], #32
44+
; ASM-NEXT: lda r2, [p0], #-32; st p7, [sp, #-32] // 4-byte Folded Spill
45+
; ASM-NEXT: lda r0, [p0], #-12; st p6, [sp, #-28] // 4-byte Folded Spill
46+
; ASM-NEXT: lda r1, [p0], #40; mov p6, sp
47+
; ASM-NEXT: paddb [p6], #-36; mov p7, sp
48+
; ASM-NEXT: lda r5, [p6, #0]; paddb [p7], #-40
49+
; ASM-NEXT: lda p7, [p7, #0]; mov p6, sp
50+
; ASM-NEXT: lda m1, [p0], #36; paddb [p6], #-44
51+
; ASM-NEXT: lda p6, [p6, #0]
5352
; ASM-NEXT: lda m0, [p0], #-8
54-
; ASM-NEXT: lda dn0, [p0], #-8
55-
; ASM-NEXT: st r1, [p4, #0]
53+
; ASM-NEXT: lda dn0, [p0], #-8; st r1, [p4, #0]
5654
; ASM-NEXT: lda dj0, [p0], #12; nez r3, r0; mov p4, sp
57-
; ASM-NEXT: st r3, [p5, #0]
58-
; ASM-NEXT: lda dn4, [p0], #-8; paddb [p4], #-44; mov p5, sp
59-
; ASM-NEXT: lda p4, [p4, #0]; paddb [p5], #-48
60-
; ASM-NEXT: lda p7, [p5, #0]; mov p5, sp
61-
; ASM-NEXT: lda dj4, [p0], #-36; paddb [p5], #-52
62-
; ASM-NEXT: lda p5, [p5, #0]
63-
; ASM-NEXT: st m1, [p6, #0]
64-
; ASM-NEXT: mov p6, r5
65-
; ASM-NEXT: nop
66-
; ASM-NEXT: st m0, [p6, #0]
67-
; ASM-NEXT: st dj0, [p4, #0]
68-
; ASM-NEXT: st dj4, [p7, #0]
69-
; ASM-NEXT: st dn0, [p5, #0]
55+
; ASM-NEXT: lda dn4, [p0], #-8; st r3, [p5, #0]
56+
; ASM-NEXT: lda dj4, [p0], #-36; paddb [p4], #-48; mov p5, r5
57+
; ASM-NEXT: lda p4, [p4, #0]; st m1, [p5, #0]
7058
; ASM-NEXT: lda r0, [p0], #-36; mov p5, sp
71-
; ASM-NEXT: lda r5, [p0, #0]; paddb [p5], #-76; mov p6, sp
72-
; ASM-NEXT: lda r9, [p5, #0]; paddb [p6], #-56; mov p5, sp
73-
; ASM-NEXT: lda r6, [p6, #0]; paddb [p5], #-80; mov p4, sp
74-
; ASM-NEXT: lda r10, [p5, #0]; paddb [p4], #-60; mov p5, sp
75-
; ASM-NEXT: lda p6, [p4, #0]; paddb [p5], #-84
76-
; ASM-NEXT: lda r11, [p5, #0]; mov p0, sp
77-
; ASM-NEXT: paddb [p0], #-72; mov p4, sp
78-
; ASM-NEXT: lda p0, [p0, #0]; paddb [p4], #-64; mov p5, sp
79-
; ASM-NEXT: lda p7, [p4, #0]; paddb [p5], #-88; mov p4, sp
80-
; ASM-NEXT: lda r12, [p5, #0]; paddb [p4], #-68; mov p5, sp
81-
; ASM-NEXT: lda p4, [p4, #0]; paddb [p5], #-92
82-
; ASM-NEXT: lda r13, [p5, #0]
83-
; ASM-NEXT: mova r6, #1; add r7, r2, #-1; mov p5, r6
84-
; ASM-NEXT: mova r6, #3; ne r4, r4, r6
85-
; ASM-NEXT: ltu r7, r7, r6
86-
; ASM-NEXT: jz r7, #.LBB0_2
87-
; ASM-NEXT: st dn4, [p5, #0]; nez r0, r0 // Delay Slot 5
88-
; ASM-NEXT: st r0, [p6, #0] // Delay Slot 4
89-
; ASM-NEXT: paddb [p2], m3; st r5, [p7, #0] // Delay Slot 3
90-
; ASM-NEXT: padda [p1], m2; paddb [p2], m5; and r8, r2, r6; st r4, [p4, #0] // Delay Slot 2
91-
; ASM-NEXT: mova r6, #0; paddb [p2], m4; st r8, [p0, #0] // Delay Slot 1
59+
; ASM-NEXT: lda r5, [p0, #0]; paddb [p5], #-52
60+
; ASM-NEXT: lda p5, [p5, #0]; mov p0, sp
61+
; ASM-NEXT: st m0, [p7, #0]
62+
; ASM-NEXT: mov p7, sp
63+
; ASM-NEXT: paddb [p7], #-56; st dj0, [p6, #0]
64+
; ASM-NEXT: lda r6, [p7, #0]; mov p6, sp
65+
; ASM-NEXT: paddb [p0], #-72; mov p7, sp
66+
; ASM-NEXT: lda p0, [p0, #0]; paddb [p6], #-60; st dj4, [p4, #0]
67+
; ASM-NEXT: lda r7, [p6, #0]; mov p4, sp
68+
; ASM-NEXT: paddb [p4], #-76; mov p6, sp
69+
; ASM-NEXT: lda r11, [p4, #0]; paddb [p7], #-64; mov p4, sp
70+
; ASM-NEXT: lda p7, [p7, #0]; paddb [p6], #-68; st dn0, [p5, #0]
71+
; ASM-NEXT: lda r8, [p6, #0]; paddb [p4], #-80; nez r0, r0; mov p5, r6
72+
; ASM-NEXT: lda p6, [p4, #0]; st dn4, [p5, #0]; movx r6, #1
73+
; ASM-NEXT: ne r4, r4, r6; mov p4, sp
74+
; ASM-NEXT: mova r6, #3; paddb [p4], #-84; add r7, r2, #-1; mov p5, r7
75+
; ASM-NEXT: lda r9, [p4, #0]; ltu r7, r7, r6; mov p4, sp
76+
; ASM-NEXT: st r0, [p5, #0]; paddb [p4], #-88; jz r7, #.LBB0_2
77+
; ASM-NEXT: lda r10, [p4, #0]; mov p4, sp // Delay Slot 5
78+
; ASM-NEXT: paddb [p4], #-92; st r5, [p7, #0] // Delay Slot 4
79+
; ASM-NEXT: lda p4, [p4, #0]; paddb [p2], m3; mov p7, r8 // Delay Slot 3
80+
; ASM-NEXT: st r4, [p7, #0]; paddb [p2], m5; and r8, r2, r6 // Delay Slot 2
81+
; ASM-NEXT: padda [p1], m2; paddb [p2], m4; movx r6, #0; st r8, [p0, #0] // Delay Slot 1
9282
; ASM-NEXT: // %bb.1:
9383
; ASM-NEXT: nopb ; nopa ; nops ; j #.LBB0_5; nopv
9484
; ASM-NEXT: nopa ; nopx // Delay Slot 5
@@ -137,23 +127,19 @@ define void @add2d(ptr noalias %params, ptr noalias %ifm1_data, ptr noalias %ifm
137127
; ASM-NEXT: nop
138128
; ASM-NEXT: nop
139129
; ASM-NEXT: vst.srs.d8.s32 cm0, s0, [p3], #32
140-
; ASM-NEXT: vst.srs.d8.s32 cm1, s0, [p3], #32
141-
; ASM-NEXT: vst.srs.d8.s32 cm2, s0, [p3], #32; mov crUPSSign, #0
142-
; ASM-NEXT: vst.srs.d8.s32 cm3, s0, [p3], #32; mov r6, dc0
143-
; ASM-NEXT: mov r0, dc4
130+
; ASM-NEXT: vst.srs.d8.s32 cm1, s0, [p3], #32; mov crUPSSign, #0
131+
; ASM-NEXT: vst.srs.d8.s32 cm2, s0, [p3], #32; mov r6, dc0
132+
; ASM-NEXT: vst.srs.d8.s32 cm3, s0, [p3], #32; mov r0, dc4
144133
; ASM-NEXT: mov crSRSSign, #0
145134
; ASM-NEXT: .p2align 4
146135
; ASM-NEXT: .LBB0_5: // %for.cond.cleanup.unr-lcssa.split
147-
; ASM-NEXT: nopb ; lda p7, [sp, #-32]; nops ; nopxm ; nopv // 4-byte Folded Reload
148-
; ASM-NEXT: mov p0, r13
149-
; ASM-NEXT: st r0, [p0, #0]
150-
; ASM-NEXT: mov p0, r12
151-
; ASM-NEXT: st r6, [p0, #0]
152-
; ASM-NEXT: lda p6, [sp, #-28]; mov p0, r11 // 4-byte Folded Reload
153-
; ASM-NEXT: st p3, [p0, #0]; ret lr
154-
; ASM-NEXT: mov p0, r10 // Delay Slot 5
155-
; ASM-NEXT: st p2, [p0, #0] // Delay Slot 4
156-
; ASM-NEXT: mov p0, r9 // Delay Slot 3
136+
; ASM-NEXT: nopx ; mov p0, r10
137+
; ASM-NEXT: lda p7, [sp, #-32]; st r0, [p4, #0] // 4-byte Folded Reload
138+
; ASM-NEXT: lda p6, [sp, #-28]; st r6, [p0, #0] // 4-byte Folded Reload
139+
; ASM-NEXT: ret lr ; mov p0, r9
140+
; ASM-NEXT: st p3, [p0, #0] // Delay Slot 5
141+
; ASM-NEXT: mov p0, r11 // Delay Slot 4
142+
; ASM-NEXT: st p2, [p6, #0] // Delay Slot 3
157143
; ASM-NEXT: st p1, [p0, #0] // Delay Slot 2
158144
; ASM-NEXT: paddb [sp], #-32 // Delay Slot 1
159145
newFuncRoot:

0 commit comments

Comments
 (0)