Skip to content

Commit 6b97af9

Browse files
[AIEx] Modify Super-Reg-Rewrite pass to remove dead-MI from bundles of copies
With the new strategy liverange splitting end up creating bundle copies where in some of sub-reg are no longer in use at all and when we split them in Super-Reg-Rewrite we end up creating live range that start of index and ends as dead on same slot index. But there is another reg on the same slot-index (since we have a MOV bundle) which actually have a valid live range.
1 parent a681b6e commit 6b97af9

File tree

3 files changed

+224
-0
lines changed

3 files changed

+224
-0
lines changed

llvm/lib/Target/AIE/AIESuperRegRewriter.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,27 @@ void AIESuperRegRewriter::rewriteSuperReg(
316316
VRM.grow();
317317
LIS.removeInterval(Reg);
318318

319+
// The liverange splitting logic sometimes produces bundles of copies when
320+
// subregisters are involved. Sometimes some of the copies are not used,
321+
// since super-reg-rewriter is going to modify them into individual virtual
322+
// register with separate live ranges we need to make sure we remove the
323+
// dead-MI from the bundel of copies
324+
SmallVector<int, 8> SubRegsToRemove;
325+
for (auto &[SubRegIdx, VReg] : make_early_inc_range(SubRegToVReg)) {
326+
if (MRI.use_nodbg_empty(VReg))
327+
for (auto &MI : MRI.reg_nodbg_instructions(VReg)) {
328+
if (MI.isBundled() && MI.isCopy()) {
329+
Indexes.removeSingleMachineInstrFromMaps(MI);
330+
MI.eraseFromBundle();
331+
SubRegsToRemove.push_back(SubRegIdx);
332+
}
333+
break;
334+
}
335+
}
336+
337+
for (auto SubRegIdx : SubRegsToRemove)
338+
SubRegToVReg.erase(SubRegIdx);
339+
319340
for (auto &[SubRegIdx, VReg] : SubRegToVReg) {
320341
MCRegister SubPhysReg;
321342
if (AssignPhysRegIsValid)
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
;
3+
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
; See https://llvm.org/LICENSE.txt for license information.
5+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;
7+
; (c) Copyright 2024-25 Advanced Micro Devices, Inc. or its affiliates
8+
; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s
9+
10+
11+
target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32"
12+
target triple = "aie2p-none-unknown-elf"
13+
14+
define void @issue_1(i1 %exitcond.not.i) {
15+
; CHECK-LABEL: issue_1:
16+
; CHECK: .p2align 4
17+
; CHECK-NEXT: // %bb.0: // %entry
18+
; CHECK-NEXT: mova dn3, #0; nopb ; movx r1, #1; mov crupsmode, #0
19+
; CHECK-NEXT: and r7, r0, r1; mov r0, dn3
20+
; CHECK-NEXT: mova r2, #0; mov r1, dn3
21+
; CHECK-NEXT: movs dn7, dn3; vbcst.16 x0, r2
22+
; CHECK-NEXT: movs dc3, dn3; mov s0, r2
23+
; CHECK-NEXT: movs dc7, dn3; mov r2, dn3
24+
; CHECK-NEXT: movs dn0, dn3; mov r3, dn3
25+
; CHECK-NEXT: movs dc0, dn3; mov r4, dn3
26+
; CHECK-NEXT: movs dc5, dn3; mov r5, dn3
27+
; CHECK-NEXT: movs dj3, dn3; mov r6, dn3
28+
; CHECK-NEXT: movs m2, dn3; mov dj6, dn3
29+
; CHECK-NEXT: movs m1, dn3; mov dj2, dn3
30+
; CHECK-NEXT: movs dn1, dn3; mov dj1, dn3
31+
; CHECK-NEXT: movs dn5, dn3; mov dj5, dn3
32+
; CHECK-NEXT: movs dn4, dn3; mov dj0, dn3
33+
; CHECK-NEXT: movs m0, dn3; mov dj4, dn3
34+
; CHECK-NEXT: .p2align 4
35+
; CHECK-NEXT: .LBB0_1: // %for.body.i
36+
; CHECK-NEXT: // =>This Loop Header: Depth=1
37+
; CHECK-NEXT: // Child Loop BB0_2 Depth 2
38+
; CHECK-NEXT: nopa ; nopb ; movs dc4, dn3; nopx ; vups.2x cml0, x0, s0, upssign0; nopv
39+
; CHECK-NEXT: .p2align 4
40+
; CHECK-NEXT: .LBB0_2: // %for.body58.i
41+
; CHECK-NEXT: // Parent Loop BB0_1 Depth=1
42+
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
43+
; CHECK-NEXT: nopa ; nopb ; nopx ; mov dn2, dn7; movs dc2, dc3
44+
; CHECK-NEXT: mova p0, #0; movs dc6, dc7; mov dn6, r0
45+
; CHECK-NEXT: movs dn2, r1; paddb.3d [p0], d2; jz r7, #.LBB0_2
46+
; CHECK-NEXT: mov dn6, r2 // Delay Slot 5
47+
; CHECK-NEXT: movs dc2, dc4; mov dc6, r3 // Delay Slot 4
48+
; CHECK-NEXT: paddb.3d [p0], d2 // Delay Slot 3
49+
; CHECK-NEXT: mova p0, #0; mov dc1, r5 // Delay Slot 2
50+
; CHECK-NEXT: paddb.3d [p0], d1; mov dc4, dc2 // Delay Slot 1
51+
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup57.i
52+
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
53+
; CHECK-NEXT: nopa ; nopb ; nops ; j #.LBB0_1; nopv
54+
; CHECK-NEXT: nop // Delay Slot 5
55+
; CHECK-NEXT: nop // Delay Slot 4
56+
; CHECK-NEXT: nop // Delay Slot 3
57+
; CHECK-NEXT: mova p0, #0; mov dc4, dn3 // Delay Slot 2
58+
; CHECK-NEXT: paddb.3d [p0], d0 // Delay Slot 1
59+
entry:
60+
br label %for.body.i
61+
62+
for.body.i: ; preds = %for.cond.cleanup57.i, %entry
63+
%iterator_pout_cnt0.0489.i = phi i32 [ 0, %entry ], [ %4, %for.cond.cleanup57.i ]
64+
%Ky_cnt.0485.i = phi i32 [ 0, %entry ], [ %14, %for.cond.cleanup57.i ]
65+
%0 = tail call <32 x i32> @llvm.aie2p.acc32.v32.I512.ups(<32 x i16> zeroinitializer, i32 0, i32 0)
66+
br label %for.body58.i
67+
68+
for.cond.cleanup57.i: ; preds = %for.body58.i
69+
%1 = trunc i32 %iterator_pout_cnt0.0489.i to i20
70+
%2 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %1, i20 0, i20 0)
71+
%3 = extractvalue { ptr, i20, i20 } %2, 1
72+
%4 = zext i20 %3 to i32
73+
br label %for.body.i
74+
75+
for.body58.i: ; preds = %for.body58.i, %for.body.i
76+
%iterator_inner1_cnt0.1478.i = phi i32 [ 0, %for.body.i ], [ %10, %for.body58.i ]
77+
%Ky_cnt.1476.i = phi i32 [ %Ky_cnt.0485.i, %for.body.i ], [ %14, %for.body58.i ]
78+
%5 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0)
79+
%6 = extractvalue { ptr, i20, i20 } %5, 0
80+
%7 = trunc i32 %iterator_inner1_cnt0.1478.i to i20
81+
%8 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr %6, i20 0, i20 0, i20 0, i20 0, i20 %7, i20 0, i20 0)
82+
%9 = extractvalue { ptr, i20, i20 } %8, 1
83+
%10 = zext i20 %9 to i32
84+
%11 = trunc i32 %Ky_cnt.1476.i to i20
85+
%12 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 %11)
86+
%13 = extractvalue { ptr, i20, i20 } %12, 2
87+
%14 = zext i20 %13 to i32
88+
br i1 %exitcond.not.i, label %for.cond.cleanup57.i, label %for.body58.i
89+
90+
; uselistorder directives
91+
uselistorder i32 %14, { 1, 0 }
92+
}
93+
94+
; Function Attrs: nounwind memory(inaccessiblemem: read)
95+
declare <32 x i32> @llvm.aie2p.acc32.v32.I512.ups(<32 x i16>, i32, i32) #0
96+
97+
; Function Attrs: nounwind memory(none)
98+
declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #1
99+
100+
; uselistorder directives
101+
uselistorder ptr @llvm.aie2p.add.3d, { 3, 2, 1, 0 }
102+
103+
attributes #0 = { nounwind memory(inaccessiblemem: read) }
104+
attributes #1 = { nounwind memory(none) }
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
;
3+
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
; See https://llvm.org/LICENSE.txt for license information.
5+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;
7+
; (c) Copyright 2024-25 Advanced Micro Devices, Inc. or its affiliates
8+
; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s
9+
10+
11+
target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32"
12+
target triple = "aie2p-none-unknown-elf"
13+
14+
define void @issue_2(i32 %0, i1 %exitcond.not.i) {
15+
; CHECK-LABEL: issue_2:
16+
; CHECK: .p2align 4
17+
; CHECK-NEXT: // %bb.0: // %entry
18+
; CHECK-NEXT: mova m0, #0; nopb ; nopx
19+
; CHECK-NEXT: movs dc5, m0; mov dj0, m0
20+
; CHECK-NEXT: movs dj4, m0; mov dn0, m0
21+
; CHECK-NEXT: movs dj2, m0; mov dn4, m0
22+
; CHECK-NEXT: movs dj6, m0; mov dn2, m0
23+
; CHECK-NEXT: movs dj3, m0; mov dn6, m0
24+
; CHECK-NEXT: movs dj7, m0; mov dn3, m0
25+
; CHECK-NEXT: mova r2, #1; movs dn7, m0; mov dc0, m0
26+
; CHECK-NEXT: movs dc4, m0; and r5, r1, r2; mov r2, m0
27+
; CHECK-NEXT: movs dc3, m0; mov r1, m0
28+
; CHECK-NEXT: movs dc2, m0; mov m2, m0
29+
; CHECK-NEXT: mova dn5, #1; movs dj5, m0; mov m3, m0
30+
; CHECK-NEXT: mova r3, #0; movs dn1, m0; mov m1, m0
31+
; CHECK-NEXT: .p2align 4
32+
; CHECK-NEXT: .LBB0_1: // %for.body58.i
33+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
34+
; CHECK-NEXT: jz r5, #.LBB0_1
35+
; CHECK-NEXT: nop // Delay Slot 5
36+
; CHECK-NEXT: mova p0, #0 // Delay Slot 4
37+
; CHECK-NEXT: paddb.3d [p0], d0 // Delay Slot 3
38+
; CHECK-NEXT: mova p0, #0; mov dc6, r3 // Delay Slot 2
39+
; CHECK-NEXT: paddb.3d [p0], d2; or r3, r0, r0; mov dc0, dn5 // Delay Slot 1
40+
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup57.i
41+
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
42+
; CHECK-NEXT: nopa ; nopb ; movs dc7, dn5; nopx ; mov dc0, dc5; nopv
43+
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r3, dn5; movs dc1, r2
44+
; CHECK-NEXT: movs dj1, r1; mov dn5, m0
45+
; CHECK-NEXT: mova p0, #0; movs dc5, m0; j #.LBB0_1
46+
; CHECK-NEXT: paddb.3d [p0], d1 // Delay Slot 5
47+
; CHECK-NEXT: mova p0, #0; movs dc2, m0; mov dn5, r3 // Delay Slot 4
48+
; CHECK-NEXT: movs dj1, m0; paddb.3d [p0], d3; mov r2, dc1 // Delay Slot 3
49+
; CHECK-NEXT: mova p0, #0; movs dc5, dc0; mov dc1, m0 // Delay Slot 2
50+
; CHECK-NEXT: mova r3, #0; paddb.3d [p0], d1; movs dc4, m0; mov dc0, m0 // Delay Slot 1
51+
entry:
52+
br label %for.body.i
53+
54+
for.body.i: ; preds = %for.cond.cleanup57.i, %entry
55+
%iterator_outer0_cnt0.0496.i = phi i32 [ 0, %entry ], [ %4, %for.cond.cleanup57.i ]
56+
%iterator_weights_cnt0.0493.i = phi i32 [ 0, %entry ], [ %8, %for.cond.cleanup57.i ]
57+
%y_cnt.0487.i = phi i32 [ 0, %entry ], [ %12, %for.cond.cleanup57.i ]
58+
br label %for.body58.i
59+
60+
for.cond.cleanup57.i: ; preds = %for.body58.i
61+
%1 = trunc i32 %iterator_outer0_cnt0.0496.i to i20
62+
%2 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %1, i20 0, i20 1)
63+
%3 = extractvalue { ptr, i20, i20 } %2, 1
64+
%4 = zext i20 %3 to i32
65+
%5 = trunc i32 %iterator_weights_cnt0.0493.i to i20
66+
%6 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %5, i20 0, i20 0)
67+
%7 = extractvalue { ptr, i20, i20 } %6, 1
68+
%8 = zext i20 %7 to i32
69+
%9 = trunc i32 %y_cnt.0487.i to i20
70+
%10 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %9)
71+
%11 = extractvalue { ptr, i20, i20 } %10, 2
72+
%12 = zext i20 %11 to i32
73+
br label %for.body.i
74+
75+
for.body58.i: ; preds = %for.body58.i, %for.body.i
76+
%iterator_inner0_cnt0.1480.i = phi i32 [ 0, %for.body.i ], [ 1, %for.body58.i ]
77+
%iterator_inner0_cnt1.1479.i = phi i32 [ 0, %for.body.i ], [ %17, %for.body58.i ]
78+
%iterator_inner1_cnt0.1478.i = phi i32 [ 0, %for.body.i ], [ %22, %for.body58.i ]
79+
%iterator_inner1_cnt1.1477.i = phi i32 [ 0, %for.body.i ], [ %0, %for.body58.i ]
80+
%13 = trunc i32 %iterator_inner0_cnt0.1480.i to i20
81+
%14 = trunc i32 %iterator_inner0_cnt1.1479.i to i20
82+
%15 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %13, i20 0, i20 %14)
83+
%16 = extractvalue { ptr, i20, i20 } %15, 2
84+
%17 = zext i20 %16 to i32
85+
%18 = trunc i32 %iterator_inner1_cnt0.1478.i to i20
86+
%19 = trunc i32 %iterator_inner1_cnt1.1477.i to i20
87+
%20 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %18, i20 0, i20 %19)
88+
%21 = extractvalue { ptr, i20, i20 } %20, 1
89+
%22 = zext i20 %21 to i32
90+
br i1 %exitcond.not.i, label %for.cond.cleanup57.i, label %for.body58.i
91+
}
92+
93+
; Function Attrs: nounwind memory(none)
94+
declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #0
95+
96+
; uselistorder directives
97+
uselistorder ptr @llvm.aie2p.add.3d, { 4, 3, 2, 1, 0 }
98+
99+
attributes #0 = { nounwind memory(none) }

0 commit comments

Comments
 (0)