-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Remove custom isel lowering of i64 to Zilsd load/store. #169067
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Rely on the ZilsdOptimizer instead. Test changes are primarily because register allocation didn't work out. This the reason we want to use the Zilsd optimizer. There's still some work to do to support X0 pair for storing zero or discarding results of a volatile load.
|
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesRely on the ZilsdOptimizer instead. Test changes are primarily because register allocation didn't work out. This the reason we want to use the Zilsd optimizer. There's still some work to do to support X0 pair for storing zero or discarding results of a volatile load. Full diff: https://github.com/llvm/llvm-project/pull/169067.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5025122db3681..f8cb84b5e5493 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1814,59 +1814,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
break;
}
- case RISCVISD::LD_RV32: {
- assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
-
- SDValue Base, Offset;
- SDValue Chain = Node->getOperand(0);
- SDValue Addr = Node->getOperand(1);
- SelectAddrRegImm(Addr, Base, Offset);
-
- SDValue Ops[] = {Base, Offset, Chain};
- MachineSDNode *New = CurDAG->getMachineNode(
- RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
- SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
- MVT::i32, SDValue(New, 0));
- SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
- MVT::i32, SDValue(New, 0));
- CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
- ReplaceUses(SDValue(Node, 0), Lo);
- ReplaceUses(SDValue(Node, 1), Hi);
- ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
- CurDAG->RemoveDeadNode(Node);
- return;
- }
- case RISCVISD::SD_RV32: {
- SDValue Base, Offset;
- SDValue Chain = Node->getOperand(0);
- SDValue Addr = Node->getOperand(3);
- SelectAddrRegImm(Addr, Base, Offset);
-
- SDValue Lo = Node->getOperand(1);
- SDValue Hi = Node->getOperand(2);
-
- SDValue RegPair;
- // Peephole to use X0_Pair for storing zero.
- if (isNullConstant(Lo) && isNullConstant(Hi)) {
- RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
- } else {
- SDValue Ops[] = {
- CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
- CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
- CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
-
- RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- MVT::Untyped, Ops),
- 0);
- }
-
- MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
- {RegPair, Base, Offset, Chain});
- CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
- ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
- CurDAG->RemoveDeadNode(Node);
- return;
- }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
@@ -3014,8 +2961,6 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
static bool isWorthFoldingAdd(SDValue Add) {
for (auto *User : Add->users()) {
if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
- User->getOpcode() != RISCVISD::LD_RV32 &&
- User->getOpcode() != RISCVISD::SD_RV32 &&
User->getOpcode() != ISD::ATOMIC_LOAD &&
User->getOpcode() != ISD::ATOMIC_STORE)
return false;
@@ -3030,9 +2975,6 @@ static bool isWorthFoldingAdd(SDValue Add) {
if (User->getOpcode() == ISD::ATOMIC_STORE &&
cast<AtomicSDNode>(User)->getVal() == Add)
return false;
- if (User->getOpcode() == RISCVISD::SD_RV32 &&
- (User->getOperand(0) == Add || User->getOperand(1) == Add))
- return false;
if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
return false;
}
@@ -3045,7 +2987,6 @@ bool isRegImmLoadOrStore(SDNode *User, SDValue Add) {
default:
return false;
case ISD::LOAD:
- case RISCVISD::LD_RV32:
case ISD::ATOMIC_LOAD:
break;
case ISD::STORE:
@@ -3053,11 +2994,6 @@ bool isRegImmLoadOrStore(SDNode *User, SDValue Add) {
if (cast<StoreSDNode>(User)->getValue() == Add)
return false;
break;
- case RISCVISD::SD_RV32:
- // Don't allow stores of Add. It must only be used as the address.
- if (User->getOperand(0) == Add || User->getOperand(1) == Add)
- return false;
- break;
case ISD::ATOMIC_STORE:
// Don't allow stores of Add. It must only be used as the address.
if (cast<AtomicSDNode>(User)->getVal() == Add)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6020fb6ca16ce..e00636c2ab23a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -342,11 +342,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
- if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
- setOperationAction(ISD::LOAD, MVT::i64, Custom);
- setOperationAction(ISD::STORE, MVT::i64, Custom);
- }
-
if (Subtarget.is64Bit()) {
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
@@ -8398,26 +8393,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
Store->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
- if (VT == MVT::i64) {
- assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
- "Unexpected custom legalisation");
- if (Store->isTruncatingStore())
- return SDValue();
-
- if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
- return SDValue();
-
- SDLoc DL(Op);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
- DAG.getTargetConstant(0, DL, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
- DAG.getTargetConstant(1, DL, MVT::i32));
-
- return DAG.getMemIntrinsicNode(
- RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
- {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
- Store->getMemOperand());
- }
if (VT == MVT::bf16)
return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
@@ -14799,25 +14774,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// sext_inreg we emit for ADD/SUB/MUL/SLLI.
LoadSDNode *Ld = cast<LoadSDNode>(N);
- if (N->getValueType(0) == MVT::i64) {
- assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
- "Unexpected custom legalisation");
-
- if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
- return;
-
- SDLoc DL(N);
- SDValue Result = DAG.getMemIntrinsicNode(
- RISCVISD::LD_RV32, DL,
- DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
- {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
- SDValue Lo = Result.getValue(0);
- SDValue Hi = Result.getValue(1);
- SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
- Results.append({Pair, Result.getValue(2)});
- return;
- }
-
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
index 4fc859f2547c1..0b907adbf5c79 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
@@ -15,16 +15,6 @@
// RISC-V specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDT_RISCV_LD_RV32
- : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<2>]>;
-def SDT_RISCV_SD_RV32
- : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<2>]>;
-
-def riscv_ld_rv32 : RVSDNode<"LD_RV32", SDT_RISCV_LD_RV32,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def riscv_st_rv32 : RVSDNode<"SD_RV32", SDT_RISCV_SD_RV32,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll
index 7f2d2dd120306..b6cf927c66dd5 100644
--- a/llvm/test/CodeGen/RISCV/zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd.ll
@@ -4,12 +4,15 @@
; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+unaligned-scalar-mem -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,FAST %s
+; FIXME: Use ld zero, 0(a0)
define i64 @load(ptr %a) nounwind {
; CHECK-LABEL: load:
; CHECK: # %bb.0:
-; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: ld a0, 80(a0)
-; CHECK-NEXT: ld zero, 0(a2)
+; CHECK-NEXT: lw a2, 80(a0)
+; CHECK-NEXT: lw a1, 84(a0)
+; CHECK-NEXT: lw zero, 4(a0)
+; CHECK-NEXT: lw zero, 0(a0)
+; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: ret
%1 = getelementptr i64, ptr %a, i32 10
%2 = load i64, ptr %1
@@ -20,10 +23,10 @@ define i64 @load(ptr %a) nounwind {
define void @store(ptr %a, i64 %b) nounwind {
; CHECK-LABEL: store:
; CHECK: # %bb.0:
-; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: sd a2, 0(a0)
-; CHECK-NEXT: sd a2, 88(a0)
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: sw a2, 4(a0)
+; CHECK-NEXT: sw a1, 88(a0)
+; CHECK-NEXT: sw a2, 92(a0)
; CHECK-NEXT: ret
store i64 %b, ptr %a
%1 = getelementptr i64, ptr %a, i32 11
@@ -85,9 +88,8 @@ define void @store_unaligned(ptr %p, i64 %v) {
;
; FAST-LABEL: store_unaligned:
; FAST: # %bb.0:
-; FAST-NEXT: mv a3, a2
-; FAST-NEXT: mv a2, a1
-; FAST-NEXT: sd a2, 0(a0)
+; FAST-NEXT: sw a1, 0(a0)
+; FAST-NEXT: sw a2, 4(a0)
; FAST-NEXT: ret
store i64 %v, ptr %p, align 1
ret void
@@ -106,11 +108,13 @@ entry:
ret i64 %0
}
+; FIXME: Use sd zero, %lo(g)(a0)
define void @store_g() nounwind {
; CHECK-LABEL: store_g:
; CHECK: # %bb.0: # %entyr
; CHECK-NEXT: lui a0, %hi(g)
-; CHECK-NEXT: sd zero, %lo(g)(a0)
+; CHECK-NEXT: sw zero, %lo(g+4)(a0)
+; CHECK-NEXT: sw zero, %lo(g)(a0)
; CHECK-NEXT: ret
entyr:
store i64 0, ptr @g
@@ -122,10 +126,11 @@ define void @large_offset(ptr nocapture %p, i64 %d) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a1, 4
; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ld a2, -384(a0)
+; CHECK-NEXT: lw a2, -384(a0)
+; CHECK-NEXT: lw a1, -380(a0)
; CHECK-NEXT: addi a2, a2, 1
-; CHECK-NEXT: seqz a1, a2
-; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: seqz a3, a2
+; CHECK-NEXT: add a3, a1, a3
; CHECK-NEXT: sd a2, -384(a0)
; CHECK-NEXT: ret
entry:
|
🐧 Linux x64 Test Results
|
|
I'm not sure how I feel about this. I do see that we can still improve zilsd optimisations, to undo the regressions, but I also prefer that we keep the 64-bit operations as 64-bit operations, rather than breaking them apart during isel, only to put them back together again several passes later. I do also see that removing this code means less to maintain. |
Is using Zilsd worth the additional moves required for the even/odd pairing. |
1 similar comment
Is using Zilsd worth the additional moves required for the even/odd pairing. |
|
I also personally support this because of reducing maintain effort and possibly better register allocation due to loose constraint lol |
4vtomat
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM~
|
I think we have to keep the SelectionDAG code for volatile loads/stores. If we want to guarantee that an i64 volatile load/store stays as a 64-bit operation we can't rely on the ZilsdOptimizer. |
|
I'm going to close this and open a new PR to still lower volatile loads/stores in SelectionDAG. |
|
@christian-herber-nxp what was the reason for doing this at O0 rather than e.g. Os or Oz? |
That was a typo. It is Oz. |


Rely on the ZilsdOptimizer instead. Test changes are primarily because register allocation didn't work out. This the reason we want to use the Zilsd optimizer.
There's still some work to do to support X0 pair for storing zero or discarding results of a volatile load.