Skip to content

Commit 386a497

Browse files
committed
[RISCV] Use Zilsd Pseudos in ISel
This is proposed as an alternative to #169529. The idea here is during selection, to choose between directly generating `LD`/`SD` or generating `PseudoLD_RV32_OPT`/`PseudoSD_RV32_OPT` based on the volatility of the access. Volatile operations will always become `LD`/`SD`, but non-volatile operations have a chance of becoming a pair of `LW`/`SW` depending on the register allocation, which might save some `MV` instructions. The advantage of this approach is that we don't need to go searching for instructions to pair (including comparing their memory operands) in the pre-ra pass, we already know these are paired, but they don't constrain the register allocator, unlike `LD`/`SD`. This PR is maybe not enough - we probably have to check the passes between ISel and the Pre-RA Load/Store Pairing pass cope with this correctly. This also fixes a verifier error with the kill flags.
1 parent 5f777b2 commit 386a497

File tree

3 files changed

+73
-51
lines changed

3 files changed

+73
-51
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 52 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,52 +1817,77 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
18171817
case RISCVISD::LD_RV32: {
18181818
assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
18191819

1820+
auto *MemNode = cast<MemSDNode>(Node);
1821+
18201822
SDValue Base, Offset;
1821-
SDValue Chain = Node->getOperand(0);
1822-
SDValue Addr = Node->getOperand(1);
1823+
SDValue Chain = MemNode->getChain();
1824+
SDValue Addr = MemNode->getBasePtr();
18231825
SelectAddrRegImm(Addr, Base, Offset);
18241826

18251827
SDValue Ops[] = {Base, Offset, Chain};
1826-
MachineSDNode *New = CurDAG->getMachineNode(
1827-
RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1828-
SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1829-
MVT::i32, SDValue(New, 0));
1830-
SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1831-
MVT::i32, SDValue(New, 0));
1832-
CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1828+
MachineSDNode *New;
1829+
SDValue Lo, Hi, OutChain;
1830+
if (MemNode->isVolatile()) {
1831+
New = CurDAG->getMachineNode(RISCV::LD_RV32, DL,
1832+
{MVT::Untyped, MVT::Other}, Ops);
1833+
1834+
Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, MVT::i32,
1835+
SDValue(New, 0));
1836+
Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, MVT::i32,
1837+
SDValue(New, 0));
1838+
OutChain = SDValue(New, 1);
1839+
} else {
1840+
New = CurDAG->getMachineNode(RISCV::PseudoLD_RV32_OPT, DL,
1841+
{MVT::i32, MVT::i32, MVT::Other}, Ops);
1842+
Lo = SDValue(New, 0);
1843+
Hi = SDValue(New, 1);
1844+
OutChain = SDValue(New, 2);
1845+
}
1846+
1847+
CurDAG->setNodeMemRefs(New, {MemNode->getMemOperand()});
18331848
ReplaceUses(SDValue(Node, 0), Lo);
18341849
ReplaceUses(SDValue(Node, 1), Hi);
1835-
ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1850+
ReplaceUses(SDValue(Node, 2), OutChain);
18361851
CurDAG->RemoveDeadNode(Node);
18371852
return;
18381853
}
18391854
case RISCVISD::SD_RV32: {
1855+
auto *MemNode = cast<MemSDNode>(Node);
1856+
18401857
SDValue Base, Offset;
1841-
SDValue Chain = Node->getOperand(0);
1842-
SDValue Addr = Node->getOperand(3);
1858+
SDValue Chain = MemNode->getChain();
1859+
SDValue Addr = MemNode->getBasePtr();
18431860
SelectAddrRegImm(Addr, Base, Offset);
18441861

18451862
SDValue Lo = Node->getOperand(1);
18461863
SDValue Hi = Node->getOperand(2);
18471864

1848-
SDValue RegPair;
1849-
// Peephole to use X0_Pair for storing zero.
1850-
if (isNullConstant(Lo) && isNullConstant(Hi)) {
1851-
RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1852-
} else {
1853-
SDValue Ops[] = {
1854-
CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1855-
CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1856-
CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1865+
MachineSDNode *New;
1866+
if (MemNode->isVolatile()) {
1867+
SDValue RegPair;
1868+
// Peephole to use X0_Pair for storing zero.
1869+
if (isNullConstant(Lo) && isNullConstant(Hi)) {
1870+
RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1871+
} else {
1872+
SDValue Ops[] = {
1873+
CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1874+
Lo, CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1875+
Hi, CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1876+
1877+
RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1878+
MVT::Untyped, Ops),
1879+
0);
1880+
}
18571881

1858-
RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1859-
MVT::Untyped, Ops),
1860-
0);
1882+
New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1883+
{RegPair, Base, Offset, Chain});
1884+
} else {
1885+
New = CurDAG->getMachineNode(RISCV::PseudoSD_RV32_OPT, DL, MVT::Other,
1886+
{Lo, Hi, Base, Offset, Chain});
18611887
}
18621888

1863-
MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1864-
{RegPair, Base, Offset, Chain});
1865-
CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1889+
CurDAG->setNodeMemRefs(New, {MemNode->getMemOperand()});
1890+
18661891
ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
18671892
CurDAG->RemoveDeadNode(Node);
18681893
return;

llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,8 @@ void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
509509
FirstReg != SecondReg &&
510510
"First register and second register is impossible to be same register");
511511
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
512-
.addReg(FirstReg, getKillRegState(FirstOp.isKill()))
512+
.addReg(FirstReg,
513+
getKillRegState(FirstOp.isKill() && FirstReg != BaseReg))
513514
.addReg(BaseReg);
514515

515516
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))

llvm/test/CodeGen/RISCV/zilsd.ll

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
define i64 @load(ptr %a) nounwind {
1010
; CHECK-LABEL: load:
1111
; CHECK: # %bb.0:
12-
; CHECK-NEXT: mv a2, a0
13-
; CHECK-NEXT: ld a0, 80(a0)
14-
; CHECK-NEXT: ld zero, 0(a2)
12+
; CHECK-NEXT: lw a2, 80(a0)
13+
; CHECK-NEXT: lw a1, 84(a0)
14+
; CHECK-NEXT: ld zero, 0(a0)
15+
; CHECK-NEXT: mv a0, a2
1516
; CHECK-NEXT: ret
1617
%1 = getelementptr i64, ptr %a, i32 10
1718
%2 = load i64, ptr %1
@@ -44,10 +45,8 @@ define i64 @load_align4(ptr %a) nounwind {
4445
define void @store(ptr %a, i64 %b) nounwind {
4546
; CHECK-LABEL: store:
4647
; CHECK: # %bb.0:
47-
; CHECK-NEXT: mv a3, a2
48-
; CHECK-NEXT: mv a2, a1
49-
; CHECK-NEXT: sd a2, 0(a0)
50-
; CHECK-NEXT: sd a2, 88(a0)
48+
; CHECK-NEXT: sw a1, 0(a1)
49+
; CHECK-NEXT: sw a2, 4(a1)
5150
; CHECK-NEXT: ret
5251
store i64 %b, ptr %a
5352
%1 = getelementptr i64, ptr %a, i32 11
@@ -64,16 +63,14 @@ define void @store_align4(ptr %a, i64 %b) nounwind {
6463
;
6564
; FAST-LABEL: store_align4:
6665
; FAST: # %bb.0:
67-
; FAST-NEXT: mv a3, a2
68-
; FAST-NEXT: mv a2, a1
69-
; FAST-NEXT: sd a2, 88(a0)
66+
; FAST-NEXT: sw a1, 0(a1)
67+
; FAST-NEXT: sw a2, 4(a1)
7068
; FAST-NEXT: ret
7169
;
7270
; 4BYTEALIGN-LABEL: store_align4:
7371
; 4BYTEALIGN: # %bb.0:
74-
; 4BYTEALIGN-NEXT: mv a3, a2
75-
; 4BYTEALIGN-NEXT: mv a2, a1
76-
; 4BYTEALIGN-NEXT: sd a2, 88(a0)
72+
; 4BYTEALIGN-NEXT: sw a1, 0(a1)
73+
; 4BYTEALIGN-NEXT: sw a2, 4(a1)
7774
; 4BYTEALIGN-NEXT: ret
7875
%1 = getelementptr i64, ptr %a, i32 11
7976
store i64 %b, ptr %1, align 4
@@ -158,9 +155,8 @@ define void @store_unaligned(ptr %p, i64 %v) {
158155
;
159156
; FAST-LABEL: store_unaligned:
160157
; FAST: # %bb.0:
161-
; FAST-NEXT: mv a3, a2
162-
; FAST-NEXT: mv a2, a1
163-
; FAST-NEXT: sd a2, 0(a0)
158+
; FAST-NEXT: sw a1, 0(a1)
159+
; FAST-NEXT: sw a2, 4(a1)
164160
; FAST-NEXT: ret
165161
;
166162
; 4BYTEALIGN-LABEL: store_unaligned:
@@ -200,8 +196,7 @@ entry:
200196
define void @store_g() nounwind {
201197
; CHECK-LABEL: store_g:
202198
; CHECK: # %bb.0: # %entyr
203-
; CHECK-NEXT: lui a0, %hi(g)
204-
; CHECK-NEXT: sd zero, %lo(g)(a0)
199+
; CHECK-NEXT: sd zero, 0(zero)
205200
; CHECK-NEXT: ret
206201
entyr:
207202
store i64 0, ptr @g
@@ -213,11 +208,12 @@ define void @large_offset(ptr nocapture %p, i64 %d) nounwind {
213208
; CHECK: # %bb.0: # %entry
214209
; CHECK-NEXT: lui a1, 4
215210
; CHECK-NEXT: add a0, a0, a1
216-
; CHECK-NEXT: ld a2, -384(a0)
217-
; CHECK-NEXT: addi a2, a2, 1
218-
; CHECK-NEXT: seqz a1, a2
219-
; CHECK-NEXT: add a3, a3, a1
220-
; CHECK-NEXT: sd a2, -384(a0)
211+
; CHECK-NEXT: ld a0, -384(a0)
212+
; CHECK-NEXT: addi a2, a0, 1
213+
; CHECK-NEXT: seqz a3, a2
214+
; CHECK-NEXT: add a1, a1, a3
215+
; CHECK-NEXT: sw a2, 1(a0)
216+
; CHECK-NEXT: sw a1, 5(a0)
221217
; CHECK-NEXT: ret
222218
entry:
223219
%add.ptr = getelementptr inbounds i64, ptr %p, i64 2000

0 commit comments

Comments
 (0)