Skip to content

Commit fefda86

Browse files
authored
[RISCV] Handle codegen for Big Endian (#172668)
- Handle BE in RISCVSubtarget - Handle riscv big-endian f64 - Handle loads/stores - Add tests for LE vs BE
1 parent 4012c1f commit fefda86

File tree

6 files changed

+676
-15
lines changed

6 files changed

+676
-15
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 64 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8426,6 +8426,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
84268426
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
84278427
Hi.getValue(1));
84288428

8429+
// For big-endian, swap the order of Lo and Hi.
8430+
if (!Subtarget.isLittleEndian())
8431+
std::swap(Lo, Hi);
8432+
84298433
SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
84308434
return DAG.getMergeValues({Pair, Chain}, DL);
84318435
}
@@ -8498,15 +8502,21 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
84988502
SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
84998503
DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
85008504

8501-
SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8502-
Store->getPointerInfo(), Store->getBaseAlign(),
8503-
Store->getMemOperand()->getFlags());
8505+
SDValue Lo = Split.getValue(0);
8506+
SDValue Hi = Split.getValue(1);
8507+
8508+
// For big-endian, swap the order of Lo and Hi before storing.
8509+
if (!Subtarget.isLittleEndian())
8510+
std::swap(Lo, Hi);
8511+
8512+
SDValue LoStore = DAG.getStore(
8513+
Chain, DL, Lo, BasePtr, Store->getPointerInfo(),
8514+
Store->getBaseAlign(), Store->getMemOperand()->getFlags());
85048515
BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8505-
SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8506-
Store->getPointerInfo().getWithOffset(4),
8507-
Store->getBaseAlign(),
8508-
Store->getMemOperand()->getFlags());
8509-
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8516+
SDValue HiStore = DAG.getStore(
8517+
Chain, DL, Hi, BasePtr, Store->getPointerInfo().getWithOffset(4),
8518+
Store->getBaseAlign(), Store->getMemOperand()->getFlags());
8519+
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoStore, HiStore);
85108520
}
85118521
if (VT == MVT::i64) {
85128522
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
@@ -15231,8 +15241,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1523115241
Subtarget.hasStdExtDOrZdinx()) {
1523215242
SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
1523315243
DAG.getVTList(MVT::i32, MVT::i32), Op0);
15234-
SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
15235-
NewReg.getValue(0), NewReg.getValue(1));
15244+
SDValue Lo = NewReg.getValue(0);
15245+
SDValue Hi = NewReg.getValue(1);
15246+
// For big-endian, swap the order when building the i64 pair.
15247+
if (!Subtarget.isLittleEndian())
15248+
std::swap(Lo, Hi);
15249+
SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1523615250
Results.push_back(RetReg);
1523715251
} else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
1523815252
isTypeLegal(Op0VT)) {
@@ -22676,6 +22690,11 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
2267622690
MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
2267722691
MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2267822692
MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
22693+
22694+
// For big-endian, the high part is at offset 0 and the low part at offset 4.
22695+
if (!Subtarget.isLittleEndian())
22696+
std::swap(LoReg, HiReg);
22697+
2267922698
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
2268022699
.addFrameIndex(FI)
2268122700
.addImm(0)
@@ -22700,6 +22719,8 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
2270022719
Register DstReg = MI.getOperand(0).getReg();
2270122720
Register LoReg = MI.getOperand(1).getReg();
2270222721
Register HiReg = MI.getOperand(2).getReg();
22722+
bool KillLo = MI.getOperand(1).isKill();
22723+
bool KillHi = MI.getOperand(2).isKill();
2270322724

2270422725
const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
2270522726
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
@@ -22709,13 +22730,21 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
2270922730
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
2271022731
MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2271122732
MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
22733+
22734+
// For big-endian, store the high part at offset 0 and the low part at
22735+
// offset 4.
22736+
if (!Subtarget.isLittleEndian()) {
22737+
std::swap(LoReg, HiReg);
22738+
std::swap(KillLo, KillHi);
22739+
}
22740+
2271222741
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22713-
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
22742+
.addReg(LoReg, getKillRegState(KillLo))
2271422743
.addFrameIndex(FI)
2271522744
.addImm(0)
2271622745
.addMemOperand(MMOLo);
2271722746
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22718-
.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
22747+
.addReg(HiReg, getKillRegState(KillHi))
2271922748
.addFrameIndex(FI)
2272022749
.addImm(4)
2272122750
.addMemOperand(MMOHi);
@@ -23545,6 +23574,12 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
2354523574
RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
2354623575
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
2354723576
}
23577+
23578+
// For big-endian, swap the order of Lo and Hi when building the pair.
23579+
const RISCVSubtarget &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
23580+
if (!Subtarget.isLittleEndian())
23581+
std::swap(Lo, Hi);
23582+
2354823583
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
2354923584
}
2355023585

@@ -23916,6 +23951,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
2391623951
SDValue Lo = SplitF64.getValue(0);
2391723952
SDValue Hi = SplitF64.getValue(1);
2391823953

23954+
// For big-endian, swap the order of Lo and Hi when passing.
23955+
if (!Subtarget.isLittleEndian())
23956+
std::swap(Lo, Hi);
23957+
2391923958
Register RegLo = VA.getLocReg();
2392023959
RegsToPass.push_back(std::make_pair(RegLo, Lo));
2392123960

@@ -24143,8 +24182,14 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
2414324182
MVT::i32, Glue);
2414424183
Chain = RetValue2.getValue(1);
2414524184
Glue = RetValue2.getValue(2);
24146-
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
24147-
RetValue2);
24185+
24186+
// For big-endian, swap the order when building the pair.
24187+
SDValue Lo = RetValue;
24188+
SDValue Hi = RetValue2;
24189+
if (!Subtarget.isLittleEndian())
24190+
std::swap(Lo, Hi);
24191+
24192+
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
2414824193
} else
2414924194
RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
2415024195

@@ -24209,6 +24254,11 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2420924254
DAG.getVTList(MVT::i32, MVT::i32), Val);
2421024255
SDValue Lo = SplitF64.getValue(0);
2421124256
SDValue Hi = SplitF64.getValue(1);
24257+
24258+
// For big-endian, swap the order of Lo and Hi when returning.
24259+
if (!Subtarget.isLittleEndian())
24260+
std::swap(Lo, Hi);
24261+
2421224262
Register RegLo = VA.getLocReg();
2421324263
Register RegHi = RVLocs[++i].getLocReg();
2421424264

llvm/lib/Target/RISCV/RISCVSubtarget.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
109109
unsigned RVVVectorBitsMax,
110110
const TargetMachine &TM)
111111
: RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS),
112-
RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax),
112+
IsLittleEndian(TT.isLittleEndian()), RVVVectorBitsMin(RVVVectorBitsMin),
113+
RVVVectorBitsMax(RVVVectorBitsMax),
113114
FrameLowering(
114115
initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
115116
InstrInfo(*this), TLInfo(TM, *this) {

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
9797
RISCVProcFamilyEnum RISCVProcFamily = Others;
9898
RISCVVRGatherCostModelEnum RISCVVRGatherCostModel = Quadratic;
9999

100+
bool IsLittleEndian = true;
101+
100102
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
101103
bool ATTRIBUTE = DEFAULT;
102104
#include "RISCVGenSubtargetInfo.inc"
@@ -220,6 +222,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
220222
}
221223

222224
bool is64Bit() const { return IsRV64; }
225+
bool isLittleEndian() const { return IsLittleEndian; }
223226
MVT getXLenVT() const {
224227
return is64Bit() ? MVT::i64 : MVT::i32;
225228
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32IFD-LE %s
3+
; RUN: llc -mtriple=riscv32be -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32IFD-BE %s
4+
5+
; Test operations that involve SplitF64/BuildPairF64 on RV32 with D extension
6+
; but soft-float ABI. This configuration triggers the special handling for
7+
; big-endian.
8+
9+
define double @fneg(double %a) nounwind {
10+
; RV32IFD-LE-LABEL: fneg:
11+
; RV32IFD-LE: # %bb.0:
12+
; RV32IFD-LE-NEXT: lui a2, 524288
13+
; RV32IFD-LE-NEXT: xor a1, a1, a2
14+
; RV32IFD-LE-NEXT: ret
15+
;
16+
; RV32IFD-BE-LABEL: fneg:
17+
; RV32IFD-BE: # %bb.0:
18+
; RV32IFD-BE-NEXT: lui a2, 524288
19+
; RV32IFD-BE-NEXT: xor a0, a0, a2
20+
; RV32IFD-BE-NEXT: ret
21+
%1 = fneg double %a
22+
ret double %1
23+
}
24+
25+
define double @fabs(double %a) nounwind {
26+
; RV32IFD-LE-LABEL: fabs:
27+
; RV32IFD-LE: # %bb.0:
28+
; RV32IFD-LE-NEXT: slli a1, a1, 1
29+
; RV32IFD-LE-NEXT: srli a1, a1, 1
30+
; RV32IFD-LE-NEXT: ret
31+
;
32+
; RV32IFD-BE-LABEL: fabs:
33+
; RV32IFD-BE: # %bb.0:
34+
; RV32IFD-BE-NEXT: slli a0, a0, 1
35+
; RV32IFD-BE-NEXT: srli a0, a0, 1
36+
; RV32IFD-BE-NEXT: ret
37+
%1 = call double @llvm.fabs.f64(double %a)
38+
ret double %1
39+
}
40+
41+
define double @fcopysign(double %a, double %b) nounwind {
42+
; RV32IFD-LE-LABEL: fcopysign:
43+
; RV32IFD-LE: # %bb.0:
44+
; RV32IFD-LE-NEXT: addi sp, sp, -16
45+
; RV32IFD-LE-NEXT: sw a2, 8(sp)
46+
; RV32IFD-LE-NEXT: sw a3, 12(sp)
47+
; RV32IFD-LE-NEXT: fld fa5, 8(sp)
48+
; RV32IFD-LE-NEXT: sw a0, 8(sp)
49+
; RV32IFD-LE-NEXT: sw a1, 12(sp)
50+
; RV32IFD-LE-NEXT: fld fa4, 8(sp)
51+
; RV32IFD-LE-NEXT: fsgnj.d fa5, fa4, fa5
52+
; RV32IFD-LE-NEXT: fsd fa5, 8(sp)
53+
; RV32IFD-LE-NEXT: lw a0, 8(sp)
54+
; RV32IFD-LE-NEXT: lw a1, 12(sp)
55+
; RV32IFD-LE-NEXT: addi sp, sp, 16
56+
; RV32IFD-LE-NEXT: ret
57+
;
58+
; RV32IFD-BE-LABEL: fcopysign:
59+
; RV32IFD-BE: # %bb.0:
60+
; RV32IFD-BE-NEXT: addi sp, sp, -16
61+
; RV32IFD-BE-NEXT: sw a2, 8(sp)
62+
; RV32IFD-BE-NEXT: sw a3, 12(sp)
63+
; RV32IFD-BE-NEXT: fld fa5, 8(sp)
64+
; RV32IFD-BE-NEXT: sw a0, 8(sp)
65+
; RV32IFD-BE-NEXT: sw a1, 12(sp)
66+
; RV32IFD-BE-NEXT: fld fa4, 8(sp)
67+
; RV32IFD-BE-NEXT: fsgnj.d fa5, fa4, fa5
68+
; RV32IFD-BE-NEXT: fsd fa5, 8(sp)
69+
; RV32IFD-BE-NEXT: lw a0, 8(sp)
70+
; RV32IFD-BE-NEXT: lw a1, 12(sp)
71+
; RV32IFD-BE-NEXT: addi sp, sp, 16
72+
; RV32IFD-BE-NEXT: ret
73+
%1 = call double @llvm.copysign.f64(double %a, double %b)
74+
ret double %1
75+
}
76+
77+
declare double @llvm.fabs.f64(double)
78+
declare double @llvm.copysign.f64(double, double)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32LE %s
3+
; RUN: llc -mtriple=riscv32be -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32BE %s
4+
5+
; Test f64 function calls with D extension and soft-float ABI
6+
; This specifically tests the LowerCall path that needs to swap Lo/Hi for BE
7+
8+
declare double @external_func(double, double)
9+
10+
define double @test_f64_call(double %a, double %b) {
11+
; RV32LE-LABEL: test_f64_call:
12+
; RV32LE: # %bb.0:
13+
; RV32LE-NEXT: addi sp, sp, -16
14+
; RV32LE-NEXT: .cfi_def_cfa_offset 16
15+
; RV32LE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
16+
; RV32LE-NEXT: .cfi_offset ra, -4
17+
; RV32LE-NEXT: call external_func
18+
; RV32LE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
19+
; RV32LE-NEXT: .cfi_restore ra
20+
; RV32LE-NEXT: addi sp, sp, 16
21+
; RV32LE-NEXT: .cfi_def_cfa_offset 0
22+
; RV32LE-NEXT: ret
23+
;
24+
; RV32BE-LABEL: test_f64_call:
25+
; RV32BE: # %bb.0:
26+
; RV32BE-NEXT: addi sp, sp, -16
27+
; RV32BE-NEXT: .cfi_def_cfa_offset 16
28+
; RV32BE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
29+
; RV32BE-NEXT: .cfi_offset ra, -4
30+
; RV32BE-NEXT: call external_func
31+
; RV32BE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
32+
; RV32BE-NEXT: .cfi_restore ra
33+
; RV32BE-NEXT: addi sp, sp, 16
34+
; RV32BE-NEXT: .cfi_def_cfa_offset 0
35+
; RV32BE-NEXT: ret
36+
%result = call double @external_func(double %a, double %b)
37+
ret double %result
38+
}
39+
40+
; Test with a computation before the call to force SplitF64
41+
define double @test_f64_call_with_fadd(double %a, double %b) {
42+
; RV32LE-LABEL: test_f64_call_with_fadd:
43+
; RV32LE: # %bb.0:
44+
; RV32LE-NEXT: addi sp, sp, -16
45+
; RV32LE-NEXT: .cfi_def_cfa_offset 16
46+
; RV32LE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
47+
; RV32LE-NEXT: .cfi_offset ra, -4
48+
; RV32LE-NEXT: sw a2, 0(sp)
49+
; RV32LE-NEXT: sw a3, 4(sp)
50+
; RV32LE-NEXT: fld fa5, 0(sp)
51+
; RV32LE-NEXT: sw a0, 0(sp)
52+
; RV32LE-NEXT: sw a1, 4(sp)
53+
; RV32LE-NEXT: fld fa4, 0(sp)
54+
; RV32LE-NEXT: fadd.d fa5, fa4, fa5
55+
; RV32LE-NEXT: fsd fa5, 0(sp)
56+
; RV32LE-NEXT: lw a0, 0(sp)
57+
; RV32LE-NEXT: lw a1, 4(sp)
58+
; RV32LE-NEXT: mv a2, a0
59+
; RV32LE-NEXT: mv a3, a1
60+
; RV32LE-NEXT: call external_func
61+
; RV32LE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
62+
; RV32LE-NEXT: .cfi_restore ra
63+
; RV32LE-NEXT: addi sp, sp, 16
64+
; RV32LE-NEXT: .cfi_def_cfa_offset 0
65+
; RV32LE-NEXT: ret
66+
;
67+
; RV32BE-LABEL: test_f64_call_with_fadd:
68+
; RV32BE: # %bb.0:
69+
; RV32BE-NEXT: addi sp, sp, -16
70+
; RV32BE-NEXT: .cfi_def_cfa_offset 16
71+
; RV32BE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
72+
; RV32BE-NEXT: .cfi_offset ra, -4
73+
; RV32BE-NEXT: sw a2, 0(sp)
74+
; RV32BE-NEXT: sw a3, 4(sp)
75+
; RV32BE-NEXT: fld fa5, 0(sp)
76+
; RV32BE-NEXT: sw a0, 0(sp)
77+
; RV32BE-NEXT: sw a1, 4(sp)
78+
; RV32BE-NEXT: fld fa4, 0(sp)
79+
; RV32BE-NEXT: fadd.d fa5, fa4, fa5
80+
; RV32BE-NEXT: fsd fa5, 0(sp)
81+
; RV32BE-NEXT: lw a0, 0(sp)
82+
; RV32BE-NEXT: lw a1, 4(sp)
83+
; RV32BE-NEXT: mv a2, a0
84+
; RV32BE-NEXT: mv a3, a1
85+
; RV32BE-NEXT: call external_func
86+
; RV32BE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
87+
; RV32BE-NEXT: .cfi_restore ra
88+
; RV32BE-NEXT: addi sp, sp, 16
89+
; RV32BE-NEXT: .cfi_def_cfa_offset 0
90+
; RV32BE-NEXT: ret
91+
%sum = fadd double %a, %b
92+
%result = call double @external_func(double %sum, double %sum)
93+
ret double %result
94+
}

0 commit comments

Comments
 (0)