Skip to content

Commit 5abdce4

Browse files
[LLVM][AArch64InstrInfo] Prevent fill folding when DstReg is SP. (#148885)
We can remove subreg COPY instructions by filling directly into the COPY's destination register. However, this is only valid when the copy and fill have compatible register classes. Fixes #148659
1 parent ade2f10 commit 5abdce4

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6288,13 +6288,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
62886288
// LDRWui %0:sub_32<def,read-undef>, %stack.0
62896289
//
62906290
if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6291-
const TargetRegisterClass *FillRC;
6291+
const TargetRegisterClass *FillRC = nullptr;
62926292
switch (DstMO.getSubReg()) {
62936293
default:
6294-
FillRC = nullptr;
62956294
break;
62966295
case AArch64::sub_32:
6297-
FillRC = &AArch64::GPR32RegClass;
6296+
if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6297+
FillRC = &AArch64::GPR32RegClass;
62986298
break;
62996299
case AArch64::ssub:
63006300
FillRC = &AArch64::FPR32RegClass;

llvm/test/CodeGen/AArch64/spill-fold.mir

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
define i64 @test_subreg_fill_fold() { ret i64 0 }
1111
define double @test_subreg_fill_fold2() { ret double 0.0 }
1212
define <4 x float> @test_subreg_fill_fold3() { ret <4 x float> undef }
13+
define i64 @test_subreg_fill_fold4() { ret i64 0 }
1314
define i64 @test_nzcv_spill_fold() { ret i64 0 }
1415
...
1516
---
@@ -121,6 +122,24 @@ body: |
121122
RET_ReallyLR implicit $s0
122123
...
123124
---
125+
# CHECK-LABEL: name: test_subreg_fill_fold4
126+
# Ensure the COPY is maintained when its result register class is not compatible
127+
# with the fill load's.
128+
name: test_subreg_fill_fold4
129+
registers:
130+
- { id: 0, class: gpr32 }
131+
- { id: 1, class: gpr64sp }
132+
body: |
133+
bb.0:
134+
%0 = COPY $wzr
135+
INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp
136+
; CHECK: %2:gpr32 = LDRWui %stack.0, 0 :: (load (s32) from %stack.0)
137+
; CHECK: undef %1.sub_32:gpr64sp = COPY %2
138+
undef %1.sub_32:gpr64sp = COPY %0
139+
$x0 = COPY %1
140+
RET_ReallyLR implicit $x0
141+
...
142+
---
124143
# CHECK-LABEL: name: test_nzcv_spill_fold
125144
# Ensure that nzcv COPY cannot be folded.
126145
name: test_nzcv_spill_fold

0 commit comments

Comments
 (0)