Skip to content

Commit 79d850a

Browse files
Yeting Kuotru
authored andcommitted
[RISCV] Use max pushed register to get pushed register number.
Previously we used the number of registers needed saved and pushable as the number of pushed registers. We also use pushed register number to caculate the stack size. It is not correct because Zcmp pushes registers from $ra to the max register needed saved and there is no gurantee that the needed saved registers are a sequenced list from $ra. There is an example about that. PushPopRegs should be 6 (ra,s0 - s4)= instead of 1. ``` ; llc -mtriple=riscv32 -mattr=+zcmp define void @foo() { entry: ; Old: .cfi_def_cfa_offset 16 ; New: .cfi_def_cfa_offset 32 tail call void asm sideeffect "li s4, 0", "~{s4}"() ret void } ``` Reviewed By: Jim, kito-cheng Differential Revision: https://reviews.llvm.org/D156407 (cherry picked from commit f68c687)
1 parent 44ce1f8 commit 79d850a

File tree

2 files changed

+146
-14
lines changed

2 files changed

+146
-14
lines changed

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -262,22 +262,16 @@ static unsigned getPushPopEncoding(const Register MaxReg) {
262262

263263
// Get the max reg of Push/Pop for restoring callee saved registers.
264264
static Register getMaxPushPopReg(const MachineFunction &MF,
265-
const std::vector<CalleeSavedInfo> &CSI,
266-
unsigned &PushPopRegs) {
265+
const std::vector<CalleeSavedInfo> &CSI) {
267266
Register MaxPushPopReg = RISCV::NoRegister;
268-
PushPopRegs = 0;
269267
for (auto &CS : CSI) {
270268
Register Reg = CS.getReg();
271-
if (RISCV::PGPRRegClass.contains(Reg)) {
269+
if (RISCV::PGPRRegClass.contains(Reg))
272270
MaxPushPopReg = std::max(MaxPushPopReg.id(), Reg.id());
273-
PushPopRegs += 1;
274-
}
275271
}
276272
// if rlist is {rs, s0-s10}, then s11 will also be included
277-
if (MaxPushPopReg == RISCV::X26) {
273+
if (MaxPushPopReg == RISCV::X26)
278274
MaxPushPopReg = RISCV::X27;
279-
PushPopRegs = 13;
280-
}
281275
return MaxPushPopReg;
282276
}
283277

@@ -1332,10 +1326,11 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
13321326
// Emit CM.PUSH with base SPimm & evaluate Push stack
13331327
RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
13341328
if (RVFI->isPushable(*MF)) {
1335-
unsigned PushPopRegs = 0;
1336-
Register MaxReg = getMaxPushPopReg(*MF, CSI, PushPopRegs);
1337-
RVFI->setRVPushRegs(PushPopRegs);
1338-
RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushPopRegs, 16));
1329+
Register MaxReg = getMaxPushPopReg(*MF, CSI);
1330+
unsigned PushedRegNum =
1331+
getPushPopEncoding(MaxReg) - llvm::RISCVZC::RLISTENCODE::RA + 1;
1332+
RVFI->setRVPushRegs(PushedRegNum);
1333+
RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
13391334

13401335
if (MaxReg != RISCV::NoRegister) {
13411336
// Use encoded number to represent registers to spill.
@@ -1347,7 +1342,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
13471342
PushBuilder.addImm((int64_t)RegEnc);
13481343
PushBuilder.addImm(0);
13491344

1350-
for (unsigned i = 0; i < PushPopRegs; i++)
1345+
for (unsigned i = 0; i < PushedRegNum; i++)
13511346
PushBuilder.addUse(AllPopRegs[i], RegState::Implicit);
13521347
}
13531348
} else if (const char *SpillLibCall = getSpillLibCallName(*MF, CSI)) {

llvm/test/CodeGen/RISCV/callee-saved-gprs.ll

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,3 +1990,140 @@ define void @caller() nounwind {
19901990
store volatile [32 x i32] %val, ptr @var
19911991
ret void
19921992
}
1993+
1994+
; This function tests if the stack size is correctly calculated when
1995+
; callee-saved registers are not a sequential list from $ra
1996+
define void @foo() {
1997+
; RV32I-LABEL: foo:
1998+
; RV32I: # %bb.0: # %entry
1999+
; RV32I-NEXT: addi sp, sp, -16
2000+
; RV32I-NEXT: .cfi_def_cfa_offset 16
2001+
; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
2002+
; RV32I-NEXT: .cfi_offset s4, -4
2003+
; RV32I-NEXT: #APP
2004+
; RV32I-NEXT: li s4, 0
2005+
; RV32I-NEXT: #NO_APP
2006+
; RV32I-NEXT: lw s4, 12(sp) # 4-byte Folded Reload
2007+
; RV32I-NEXT: addi sp, sp, 16
2008+
; RV32I-NEXT: ret
2009+
;
2010+
; RV32I-WITH-FP-LABEL: foo:
2011+
; RV32I-WITH-FP: # %bb.0: # %entry
2012+
; RV32I-WITH-FP-NEXT: addi sp, sp, -16
2013+
; RV32I-WITH-FP-NEXT: .cfi_def_cfa_offset 16
2014+
; RV32I-WITH-FP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
2015+
; RV32I-WITH-FP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
2016+
; RV32I-WITH-FP-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
2017+
; RV32I-WITH-FP-NEXT: .cfi_offset ra, -4
2018+
; RV32I-WITH-FP-NEXT: .cfi_offset s0, -8
2019+
; RV32I-WITH-FP-NEXT: .cfi_offset s4, -12
2020+
; RV32I-WITH-FP-NEXT: addi s0, sp, 16
2021+
; RV32I-WITH-FP-NEXT: .cfi_def_cfa s0, 0
2022+
; RV32I-WITH-FP-NEXT: #APP
2023+
; RV32I-WITH-FP-NEXT: li s4, 0
2024+
; RV32I-WITH-FP-NEXT: #NO_APP
2025+
; RV32I-WITH-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2026+
; RV32I-WITH-FP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
2027+
; RV32I-WITH-FP-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
2028+
; RV32I-WITH-FP-NEXT: addi sp, sp, 16
2029+
; RV32I-WITH-FP-NEXT: ret
2030+
;
2031+
; RV32IZCMP-LABEL: foo:
2032+
; RV32IZCMP: # %bb.0: # %entry
2033+
; RV32IZCMP-NEXT: cm.push {ra, s0-s4}, -32
2034+
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 32
2035+
; RV32IZCMP-NEXT: .cfi_offset s4, -4
2036+
; RV32IZCMP-NEXT: #APP
2037+
; RV32IZCMP-NEXT: li s4, 0
2038+
; RV32IZCMP-NEXT: #NO_APP
2039+
; RV32IZCMP-NEXT: cm.popret {ra, s0-s4}, 32
2040+
;
2041+
; RV32IZCMP-WITH-FP-LABEL: foo:
2042+
; RV32IZCMP-WITH-FP: # %bb.0: # %entry
2043+
; RV32IZCMP-WITH-FP-NEXT: addi sp, sp, -16
2044+
; RV32IZCMP-WITH-FP-NEXT: .cfi_def_cfa_offset 16
2045+
; RV32IZCMP-WITH-FP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
2046+
; RV32IZCMP-WITH-FP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
2047+
; RV32IZCMP-WITH-FP-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
2048+
; RV32IZCMP-WITH-FP-NEXT: .cfi_offset ra, -4
2049+
; RV32IZCMP-WITH-FP-NEXT: .cfi_offset s0, -8
2050+
; RV32IZCMP-WITH-FP-NEXT: .cfi_offset s4, -12
2051+
; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 16
2052+
; RV32IZCMP-WITH-FP-NEXT: .cfi_def_cfa s0, 0
2053+
; RV32IZCMP-WITH-FP-NEXT: #APP
2054+
; RV32IZCMP-WITH-FP-NEXT: li s4, 0
2055+
; RV32IZCMP-WITH-FP-NEXT: #NO_APP
2056+
; RV32IZCMP-WITH-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2057+
; RV32IZCMP-WITH-FP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
2058+
; RV32IZCMP-WITH-FP-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
2059+
; RV32IZCMP-WITH-FP-NEXT: addi sp, sp, 16
2060+
; RV32IZCMP-WITH-FP-NEXT: ret
2061+
;
2062+
; RV64I-LABEL: foo:
2063+
; RV64I: # %bb.0: # %entry
2064+
; RV64I-NEXT: addi sp, sp, -16
2065+
; RV64I-NEXT: .cfi_def_cfa_offset 16
2066+
; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
2067+
; RV64I-NEXT: .cfi_offset s4, -8
2068+
; RV64I-NEXT: #APP
2069+
; RV64I-NEXT: li s4, 0
2070+
; RV64I-NEXT: #NO_APP
2071+
; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
2072+
; RV64I-NEXT: addi sp, sp, 16
2073+
; RV64I-NEXT: ret
2074+
;
2075+
; RV64I-WITH-FP-LABEL: foo:
2076+
; RV64I-WITH-FP: # %bb.0: # %entry
2077+
; RV64I-WITH-FP-NEXT: addi sp, sp, -32
2078+
; RV64I-WITH-FP-NEXT: .cfi_def_cfa_offset 32
2079+
; RV64I-WITH-FP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
2080+
; RV64I-WITH-FP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
2081+
; RV64I-WITH-FP-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
2082+
; RV64I-WITH-FP-NEXT: .cfi_offset ra, -8
2083+
; RV64I-WITH-FP-NEXT: .cfi_offset s0, -16
2084+
; RV64I-WITH-FP-NEXT: .cfi_offset s4, -24
2085+
; RV64I-WITH-FP-NEXT: addi s0, sp, 32
2086+
; RV64I-WITH-FP-NEXT: .cfi_def_cfa s0, 0
2087+
; RV64I-WITH-FP-NEXT: #APP
2088+
; RV64I-WITH-FP-NEXT: li s4, 0
2089+
; RV64I-WITH-FP-NEXT: #NO_APP
2090+
; RV64I-WITH-FP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
2091+
; RV64I-WITH-FP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
2092+
; RV64I-WITH-FP-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
2093+
; RV64I-WITH-FP-NEXT: addi sp, sp, 32
2094+
; RV64I-WITH-FP-NEXT: ret
2095+
;
2096+
; RV64IZCMP-LABEL: foo:
2097+
; RV64IZCMP: # %bb.0: # %entry
2098+
; RV64IZCMP-NEXT: cm.push {ra, s0-s4}, -48
2099+
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 48
2100+
; RV64IZCMP-NEXT: .cfi_offset s4, -8
2101+
; RV64IZCMP-NEXT: #APP
2102+
; RV64IZCMP-NEXT: li s4, 0
2103+
; RV64IZCMP-NEXT: #NO_APP
2104+
; RV64IZCMP-NEXT: cm.popret {ra, s0-s4}, 48
2105+
;
2106+
; RV64IZCMP-WITH-FP-LABEL: foo:
2107+
; RV64IZCMP-WITH-FP: # %bb.0: # %entry
2108+
; RV64IZCMP-WITH-FP-NEXT: addi sp, sp, -32
2109+
; RV64IZCMP-WITH-FP-NEXT: .cfi_def_cfa_offset 32
2110+
; RV64IZCMP-WITH-FP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
2111+
; RV64IZCMP-WITH-FP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
2112+
; RV64IZCMP-WITH-FP-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
2113+
; RV64IZCMP-WITH-FP-NEXT: .cfi_offset ra, -8
2114+
; RV64IZCMP-WITH-FP-NEXT: .cfi_offset s0, -16
2115+
; RV64IZCMP-WITH-FP-NEXT: .cfi_offset s4, -24
2116+
; RV64IZCMP-WITH-FP-NEXT: addi s0, sp, 32
2117+
; RV64IZCMP-WITH-FP-NEXT: .cfi_def_cfa s0, 0
2118+
; RV64IZCMP-WITH-FP-NEXT: #APP
2119+
; RV64IZCMP-WITH-FP-NEXT: li s4, 0
2120+
; RV64IZCMP-WITH-FP-NEXT: #NO_APP
2121+
; RV64IZCMP-WITH-FP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
2122+
; RV64IZCMP-WITH-FP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
2123+
; RV64IZCMP-WITH-FP-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
2124+
; RV64IZCMP-WITH-FP-NEXT: addi sp, sp, 32
2125+
; RV64IZCMP-WITH-FP-NEXT: ret
2126+
entry:
2127+
tail call void asm sideeffect "li s4, 0", "~{s4}"()
2128+
ret void
2129+
}

0 commit comments

Comments
 (0)