Skip to content

Commit 44b686e

Browse files
authored
[AArch64] Unfold adds when eliminating frame index with scalable offset (#158597)
Fixes #157252. Peephole optimization tends to fold: ``` add %gpr1, %stack, 0 subs %gpr2, %gpr1, 0 ``` to ``` adds %gpr2, %stack, 0 ``` This patch undoes the fold in `rewriteAArch64FrameIndex` to process `adds` on the stack object.
1 parent a4c5a74 commit 44b686e

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6273,6 +6273,11 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
62736273
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
62746274
Offset, Bytes, NumPredicateVectors, NumDataVectors);
62756275

6276+
// Insert ADDSXri for scalable offset at the end.
6277+
bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6278+
if (NeedsFinalDefNZCV)
6279+
SetNZCV = false;
6280+
62766281
// First emit non-scalable frame offsets, or a simple 'mov'.
62776282
if (Bytes || (!Offset && SrcReg != DestReg)) {
62786283
assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
@@ -6292,8 +6297,6 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
62926297
FrameReg = DestReg;
62936298
}
62946299

6295-
assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
6296-
"SetNZCV not supported with SVE vectors");
62976300
assert(!(NeedsWinCFI && NumPredicateVectors) &&
62986301
"WinCFI can't allocate fractions of an SVE data vector");
62996302

@@ -6313,6 +6316,12 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
63136316
Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
63146317
FrameReg);
63156318
}
6319+
6320+
if (NeedsFinalDefNZCV)
6321+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6322+
.addReg(DestReg)
6323+
.addImm(0)
6324+
.addImm(0);
63166325
}
63176326

63186327
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass prologepilog -frame-pointer=none -o - %s | FileCheck %s
3+
---
4+
name: test_addsxri_scalable_offset
5+
stack:
6+
- { id: 0, type: default, size: 4, alignment: 4, stack-id: default }
7+
- { id: 1, type: default, size: 16, alignment: 16, stack-id: scalable-vector }
8+
body: |
9+
bb.0:
10+
; CHECK-LABEL: name: test_addsxri_scalable_offset
11+
; CHECK: liveins: $fp
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
14+
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
15+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
16+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
17+
; CHECK-NEXT: $x8 = ADDXri $sp, 12, 0
18+
; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg
19+
; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
20+
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
21+
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
22+
; CHECK-NEXT: RET_ReallyLR implicit $x8
23+
$x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
24+
RET_ReallyLR implicit $x8
25+
...

0 commit comments

Comments
 (0)