Skip to content

Commit a30b328

Browse files
committed
Unfold adds
1 parent 27c6b10 commit a30b328

File tree

3 files changed

+197
-110
lines changed

3 files changed

+197
-110
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,11 +1920,6 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
19201920
CmpInstr.getOperand(2).getImm() == 0) &&
19211921
"Caller guarantees that CmpInstr compares with constant 0");
19221922

1923-
// NZCV is not supported if the stack offset is scalable.
1924-
auto &ST = MI.getParent()->getParent()->getSubtarget<AArch64Subtarget>();
1925-
if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI())
1926-
return false;
1927-
19281923
std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
19291924
if (!NZVCUsed || NZVCUsed->C)
19301925
return false;
@@ -6569,35 +6564,69 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
65696564
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
65706565
}
65716566

6567+
// Unfold ADDSXri:
6568+
// adds %dest, %stack, c
6569+
// -->
6570+
// add %dest, %stack, 0
6571+
// adds %dest, %dest, c
6572+
static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg,
6573+
const AArch64InstrInfo *TII) {
6574+
auto *MBB = MI.getParent();
6575+
Register DestReg = MI.getOperand(0).getReg();
6576+
6577+
auto *Unfolded =
6578+
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DestReg)
6579+
.addReg(FrameReg)
6580+
.addImm(0)
6581+
.addImm(0)
6582+
.getInstr();
6583+
6584+
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDSXri), DestReg)
6585+
.addReg(DestReg)
6586+
.addImm(MI.getOperand(2).getImm())
6587+
.addImm(MI.getOperand(3).getImm());
6588+
6589+
MI.eraseFromParent();
6590+
Unfolded->getParent()->dump();
6591+
return Unfolded;
6592+
}
6593+
65726594
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
65736595
unsigned FrameReg, StackOffset &Offset,
65746596
const AArch64InstrInfo *TII) {
65756597
unsigned Opcode = MI.getOpcode();
65766598
unsigned ImmIdx = FrameRegIdx + 1;
65776599

6600+
MachineInstr *NewMI = &MI;
6601+
if (Opcode == AArch64::ADDSXri && Offset.getScalable()) {
6602+
NewMI = unfoldAddXri(MI, FrameReg, TII);
6603+
Opcode = AArch64::ADDXri;
6604+
}
6605+
65786606
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6579-
Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6580-
emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6581-
MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6582-
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6583-
MI.eraseFromParent();
6607+
Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm());
6608+
emitFrameOffset(*NewMI->getParent(), *NewMI,
6609+
NewMI->getDebugLoc(), NewMI->getOperand(0).getReg(),
6610+
FrameReg, Offset, TII, MachineInstr::NoFlags,
6611+
(Opcode == AArch64::ADDSXri));
6612+
NewMI->eraseFromParent();
65846613
Offset = StackOffset();
65856614
return true;
65866615
}
65876616

65886617
int64_t NewOffset;
65896618
unsigned UnscaledOp;
65906619
bool UseUnscaledOp;
6591-
int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6620+
int Status = isAArch64FrameOffsetLegal(*NewMI, Offset, &UseUnscaledOp,
65926621
&UnscaledOp, &NewOffset);
65936622
if (Status & AArch64FrameOffsetCanUpdate) {
65946623
if (Status & AArch64FrameOffsetIsLegal)
65956624
// Replace the FrameIndex with FrameReg.
6596-
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6625+
NewMI->getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
65976626
if (UseUnscaledOp)
6598-
MI.setDesc(TII->get(UnscaledOp));
6627+
NewMI->setDesc(TII->get(UnscaledOp));
65996628

6600-
MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6629+
NewMI->getOperand(ImmIdx).ChangeToImmediate(NewOffset);
66016630
return !Offset;
66026631
}
66036632

llvm/test/CodeGen/AArch64/pr157252.ll

Lines changed: 0 additions & 96 deletions
This file was deleted.
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=aarch64 -run-pass prologepilog -o - %s | FileCheck %s
3+
--- |
4+
define void @i(ptr %ad, ptr %0) #0 {
5+
entry:
6+
ret void
7+
}
8+
declare i32 @g(...)
9+
attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }
10+
...
11+
---
12+
name: i
13+
alignment: 4
14+
exposesReturnsTwice: false
15+
legalized: false
16+
regBankSelected: false
17+
selected: false
18+
failedISel: false
19+
tracksRegLiveness: true
20+
hasWinCFI: false
21+
noPhis: true
22+
isSSA: false
23+
noVRegs: true
24+
hasFakeUses: false
25+
callsEHReturn: false
26+
callsUnwindInit: false
27+
hasEHContTarget: false
28+
hasEHScopes: false
29+
hasEHFunclets: false
30+
isOutlined: false
31+
debugInstrRef: false
32+
failsVerification: false
33+
tracksDebugUserValues: true
34+
registers: []
35+
liveins: []
36+
frameInfo:
37+
isFrameAddressTaken: false
38+
isReturnAddressTaken: false
39+
hasStackMap: false
40+
hasPatchPoint: false
41+
stackSize: 0
42+
offsetAdjustment: 0
43+
maxAlignment: 16
44+
adjustsStack: true
45+
hasCalls: true
46+
stackProtector: ''
47+
functionContext: ''
48+
maxCallFrameSize: 0
49+
cvBytesOfCalleeSavedRegisters: 0
50+
hasOpaqueSPAdjustment: false
51+
hasVAStart: false
52+
hasMustTailInVarArgFunc: false
53+
hasTailCall: false
54+
isCalleeSavedInfoValid: false
55+
localFrameSize: 0
56+
fixedStack: []
57+
stack:
58+
- { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
59+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
60+
local-offset: -4, debug-info-variable: '', debug-info-expression: '',
61+
debug-info-location: '' }
62+
- { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
63+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
64+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
65+
- { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
66+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
67+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
68+
- { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
69+
stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
70+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
71+
entry_values: []
72+
callSites: []
73+
debugValueSubstitutions: []
74+
constants: []
75+
machineFunctionInfo: {}
76+
body: |
77+
; CHECK-LABEL: name: i
78+
; CHECK: bb.0:
79+
; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000)
80+
; CHECK-NEXT: liveins: $x0, $x1, $d11, $lr, $x19, $x28
81+
; CHECK-NEXT: {{ $}}
82+
; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.8)
83+
; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6)
84+
; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x19, $sp, 4 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
85+
; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0
86+
; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
87+
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
88+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
89+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8
90+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16
91+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
92+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
93+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48
94+
; CHECK-NEXT: B %bb.2
95+
; CHECK-NEXT: {{ $}}
96+
; CHECK-NEXT: bb.1 (inlineasm-br-indirect-target):
97+
; CHECK-NEXT: successors: %bb.3(0x80000000)
98+
; CHECK-NEXT: liveins: $x0, $x1
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: renamable $w19 = MOVi32imm 1
101+
; CHECK-NEXT: B %bb.3
102+
; CHECK-NEXT: {{ $}}
103+
; CHECK-NEXT: bb.2:
104+
; CHECK-NEXT: successors:
105+
; CHECK-NEXT: {{ $}}
106+
; CHECK-NEXT: bb.3 (inlineasm-br-indirect-target):
107+
; CHECK-NEXT: successors: %bb.4(0x80000000), %bb.3(0x00000000)
108+
; CHECK-NEXT: liveins: $w19
109+
; CHECK-NEXT: {{ $}}
110+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
111+
; CHECK-NEXT: B %bb.4
112+
; CHECK-NEXT: {{ $}}
113+
; CHECK-NEXT: bb.4:
114+
; CHECK-NEXT: successors: %bb.3(0x80000000)
115+
; CHECK-NEXT: liveins: $w19
116+
; CHECK-NEXT: {{ $}}
117+
; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
118+
; CHECK-NEXT: $x8 = ADDXri $sp, 28, 0
119+
; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg
120+
; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv
121+
; CHECK-NEXT: B %bb.3
122+
bb.0:
123+
successors: %bb.2(0x80000000), %bb.1(0x00000000)
124+
liveins: $x0, $x1
125+
126+
B %bb.2
127+
128+
bb.1 (inlineasm-br-indirect-target):
129+
successors: %bb.3(0x80000000)
130+
liveins: $x0, $x1
131+
132+
renamable $w19 = MOVi32imm 1
133+
B %bb.3
134+
135+
bb.2:
136+
successors:
137+
138+
bb.3 (inlineasm-br-indirect-target):
139+
successors: %bb.4(0x80000000), %bb.3(0x00000000)
140+
liveins: $w19
141+
142+
INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11
143+
B %bb.4
144+
145+
bb.4:
146+
successors: %bb.3(0x80000000)
147+
liveins: $w19
148+
149+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
150+
BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
151+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
152+
dead renamable $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv
153+
B %bb.3
154+
...

0 commit comments

Comments
 (0)