Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 95 additions & 30 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8347,6 +8347,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
Hi.getValue(1));

// For big-endian, swap the order of Lo and Hi.
if (!Subtarget.isLittleEndian())
std::swap(Lo, Hi);

SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
return DAG.getMergeValues({Pair, Chain}, DL);
}
Expand Down Expand Up @@ -8419,15 +8423,21 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), StoredVal);

SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
Store->getPointerInfo(), Store->getBaseAlign(),
Store->getMemOperand()->getFlags());
SDValue Lo = Split.getValue(0);
SDValue Hi = Split.getValue(1);

// For big-endian, swap the order of Lo and Hi before storing.
if (!Subtarget.isLittleEndian())
std::swap(Lo, Hi);

SDValue LoStore = DAG.getStore(
Chain, DL, Lo, BasePtr, Store->getPointerInfo(),
Store->getBaseAlign(), Store->getMemOperand()->getFlags());
BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
Store->getPointerInfo().getWithOffset(4),
Store->getBaseAlign(),
Store->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
SDValue HiStore = DAG.getStore(
Chain, DL, Hi, BasePtr, Store->getPointerInfo().getWithOffset(4),
Store->getBaseAlign(), Store->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoStore, HiStore);
}
if (VT == MVT::i64) {
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
Expand Down Expand Up @@ -15160,8 +15170,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Subtarget.hasStdExtDOrZdinx()) {
SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), Op0);
SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
NewReg.getValue(0), NewReg.getValue(1));
SDValue Lo = NewReg.getValue(0);
SDValue Hi = NewReg.getValue(1);
// For big-endian, swap the order when building the i64 pair.
if (!Subtarget.isLittleEndian())
std::swap(Lo, Hi);
SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
Results.push_back(RetReg);
} else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
isTypeLegal(Op0VT)) {
Expand Down Expand Up @@ -22538,14 +22552,27 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
MachineMemOperand *MMOHi = MF.getMachineMemOperand(
MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);

// For big-endian, the high part is at offset 0 and the low part at offset 4.
if (!Subtarget.isLittleEndian()) {
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we swap LoReg and HiReg instead of mostly duplicating the code

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);
} else {
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);
}
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
Expand All @@ -22571,16 +22598,32 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
MachineMemOperand *MMOHi = MF.getMachineMemOperand(
MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);

// For big-endian, store the high part at offset 0 and the low part at
// offset 4.
if (!Subtarget.isLittleEndian()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a lot of code duplication here. Can we use more variables to reduce it?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thanks for the suggestion.

BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);
} else {
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);
}
TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, Register());
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
Expand Down Expand Up @@ -23407,6 +23450,13 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
}

// For big-endian, swap the order of Lo and Hi when building the pair.
const RISCVSubtarget &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
// TESTED with: CodeGen/RISCV/bigendian-double-bitmanip.ll
if (!Subtarget.isLittleEndian())
std::swap(Lo, Hi);

return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}

Expand Down Expand Up @@ -23778,6 +23828,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);

// For big-endian, swap the order of Lo and Hi when passing.
if (!Subtarget.isLittleEndian())
std::swap(Lo, Hi);

Register RegLo = VA.getLocReg();
RegsToPass.push_back(std::make_pair(RegLo, Lo));

Expand Down Expand Up @@ -24005,8 +24059,14 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
MVT::i32, Glue);
Chain = RetValue2.getValue(1);
Glue = RetValue2.getValue(2);
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
RetValue2);

// For big-endian, swap the order when building the pair.
SDValue Lo = RetValue;
SDValue Hi = RetValue2;
if (!Subtarget.isLittleEndian())
std::swap(Lo, Hi);

RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
} else
RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);

Expand Down Expand Up @@ -24071,6 +24131,11 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
DAG.getVTList(MVT::i32, MVT::i32), Val);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);

// For big-endian, swap the order of Lo and Hi when returning.
if (!Subtarget.isLittleEndian())
std::swap(Lo, Hi);

Register RegLo = VA.getLocReg();
Register RegHi = RVLocs[++i].getLocReg();

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
StringRef ABIName, unsigned RVVVectorBitsMin,
unsigned RVVVectorBitsMax,
const TargetMachine &TM)
: RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS),
: RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS), TargetTriple(TT),
RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax),
FrameLowering(
initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
RISCVProcFamilyEnum RISCVProcFamily = Others;
RISCVVRGatherCostModelEnum RISCVVRGatherCostModel = Quadratic;

Triple TargetTriple;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need TargetTriple?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, will remove it.


#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
bool ATTRIBUTE = DEFAULT;
#include "RISCVGenSubtargetInfo.inc"
Expand Down Expand Up @@ -220,6 +222,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}

bool is64Bit() const { return IsRV64; }
bool isLittleEndian() const { return TargetTriple.isLittleEndian(); }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we cache this in a bool in the constructor?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

MVT getXLenVT() const {
return is64Bit() ? MVT::i64 : MVT::i32;
}
Expand Down
78 changes: 78 additions & 0 deletions llvm/test/CodeGen/RISCV/bigendian-double-bitmanip.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32IFD-LE %s
; RUN: llc -mtriple=riscv32be -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32IFD-BE %s

; Test operations that involve SplitF64/BuildPairF64 on RV32 with D extension
; but soft-float ABI. This configuration triggers the special handling for
; big-endian.

define double @fneg(double %a) nounwind {
; RV32IFD-LE-LABEL: fneg:
; RV32IFD-LE: # %bb.0:
; RV32IFD-LE-NEXT: lui a2, 524288
; RV32IFD-LE-NEXT: xor a1, a1, a2
; RV32IFD-LE-NEXT: ret
;
; RV32IFD-BE-LABEL: fneg:
; RV32IFD-BE: # %bb.0:
; RV32IFD-BE-NEXT: lui a2, 524288
; RV32IFD-BE-NEXT: xor a0, a0, a2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PSABI doc riscv-non-isa/riscv-elf-psabi-doc#470 says

This register-pair ordering is defined in terms of value significance and is
independent of endianness.  For example, on RV32BE a 64-bit scalar returned
in a0/a1 places bits [31:0] (the least-significant XLEN bits) in a0 and
bits [63:32] in a1; memory layout remains big-endian.

Yet this code seems to be toggling the sign bit in a0. If the psabi doc is correct, isn't the sign bit in a1?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thing is that riscv-non-isa/riscv-elf-psabi-doc#470 is not ready yet -- or at least not merged/accepted officially yet.
And even GCC is not updated to follow all changes we plan for psabi (currently BE is marked as experimental), e.g.:

$ cat test.c
  #include <stdint.h>
  double flip_sign_bit(double a) {
      union { double d; uint64_t i; } u;
      u.d = a;
      u.i ^= (1ULL << 63);
      return u.d;
  }

$ riscv64-unknown-elf-gcc -c -O2 -march=rv32gc -mabi=ilp32 -mbig-endian test.c
$ llvm-objdump -d /tmp/test.o
  flip_sign_bit:
     lui    a4, 0x80000
     mv     a5, a1
     xor    a4, a4, a0
     mv     a0, a4
     mv     a1, a5
     ret

Therefore, once psabi is ready, we can change both GCC and LLVM. Until then, we will emit a warning for riscvbe from clang (#165599), something like:

clang: warning: big-endian RISC-V target support is experimental [-Wriscv-be-experimental]

; RV32IFD-BE-NEXT: ret
%1 = fneg double %a
ret double %1
}

define double @fabs(double %a) nounwind {
; RV32IFD-LE-LABEL: fabs:
; RV32IFD-LE: # %bb.0:
; RV32IFD-LE-NEXT: slli a1, a1, 1
; RV32IFD-LE-NEXT: srli a1, a1, 1
; RV32IFD-LE-NEXT: ret
;
; RV32IFD-BE-LABEL: fabs:
; RV32IFD-BE: # %bb.0:
; RV32IFD-BE-NEXT: slli a0, a0, 1
; RV32IFD-BE-NEXT: srli a0, a0, 1
; RV32IFD-BE-NEXT: ret
%1 = call double @llvm.fabs.f64(double %a)
ret double %1
}

define double @fcopysign(double %a, double %b) nounwind {
; RV32IFD-LE-LABEL: fcopysign:
; RV32IFD-LE: # %bb.0:
; RV32IFD-LE-NEXT: addi sp, sp, -16
; RV32IFD-LE-NEXT: sw a2, 8(sp)
; RV32IFD-LE-NEXT: sw a3, 12(sp)
; RV32IFD-LE-NEXT: fld fa5, 8(sp)
; RV32IFD-LE-NEXT: sw a0, 8(sp)
; RV32IFD-LE-NEXT: sw a1, 12(sp)
; RV32IFD-LE-NEXT: fld fa4, 8(sp)
; RV32IFD-LE-NEXT: fsgnj.d fa5, fa4, fa5
; RV32IFD-LE-NEXT: fsd fa5, 8(sp)
; RV32IFD-LE-NEXT: lw a0, 8(sp)
; RV32IFD-LE-NEXT: lw a1, 12(sp)
; RV32IFD-LE-NEXT: addi sp, sp, 16
; RV32IFD-LE-NEXT: ret
;
; RV32IFD-BE-LABEL: fcopysign:
; RV32IFD-BE: # %bb.0:
; RV32IFD-BE-NEXT: addi sp, sp, -16
; RV32IFD-BE-NEXT: sw a2, 8(sp)
; RV32IFD-BE-NEXT: sw a3, 12(sp)
; RV32IFD-BE-NEXT: fld fa5, 8(sp)
; RV32IFD-BE-NEXT: sw a0, 8(sp)
; RV32IFD-BE-NEXT: sw a1, 12(sp)
; RV32IFD-BE-NEXT: fld fa4, 8(sp)
; RV32IFD-BE-NEXT: fsgnj.d fa5, fa4, fa5
; RV32IFD-BE-NEXT: fsd fa5, 8(sp)
; RV32IFD-BE-NEXT: lw a0, 8(sp)
; RV32IFD-BE-NEXT: lw a1, 12(sp)
; RV32IFD-BE-NEXT: addi sp, sp, 16
; RV32IFD-BE-NEXT: ret
%1 = call double @llvm.copysign.f64(double %a, double %b)
ret double %1
}

declare double @llvm.fabs.f64(double)
declare double @llvm.copysign.f64(double, double)
94 changes: 94 additions & 0 deletions llvm/test/CodeGen/RISCV/bigendian-f64-call.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32LE %s
; RUN: llc -mtriple=riscv32be -target-abi=ilp32 -mattr=+d -verify-machineinstrs < %s | FileCheck -check-prefix=RV32BE %s

; Test f64 function calls with D extension and soft-float ABI
; This specifically tests the LowerCall path that needs to swap Lo/Hi for BE

declare double @external_func(double, double)

define double @test_f64_call(double %a, double %b) {
; RV32LE-LABEL: test_f64_call:
; RV32LE: # %bb.0:
; RV32LE-NEXT: addi sp, sp, -16
; RV32LE-NEXT: .cfi_def_cfa_offset 16
; RV32LE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32LE-NEXT: .cfi_offset ra, -4
; RV32LE-NEXT: call external_func
; RV32LE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32LE-NEXT: .cfi_restore ra
; RV32LE-NEXT: addi sp, sp, 16
; RV32LE-NEXT: .cfi_def_cfa_offset 0
; RV32LE-NEXT: ret
;
; RV32BE-LABEL: test_f64_call:
; RV32BE: # %bb.0:
; RV32BE-NEXT: addi sp, sp, -16
; RV32BE-NEXT: .cfi_def_cfa_offset 16
; RV32BE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32BE-NEXT: .cfi_offset ra, -4
; RV32BE-NEXT: call external_func
; RV32BE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32BE-NEXT: .cfi_restore ra
; RV32BE-NEXT: addi sp, sp, 16
; RV32BE-NEXT: .cfi_def_cfa_offset 0
; RV32BE-NEXT: ret
%result = call double @external_func(double %a, double %b)
ret double %result
}

; Test with a computation before the call to force SplitF64
define double @test_f64_call_with_fadd(double %a, double %b) {
; RV32LE-LABEL: test_f64_call_with_fadd:
; RV32LE: # %bb.0:
; RV32LE-NEXT: addi sp, sp, -16
; RV32LE-NEXT: .cfi_def_cfa_offset 16
; RV32LE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32LE-NEXT: .cfi_offset ra, -4
; RV32LE-NEXT: sw a2, 0(sp)
; RV32LE-NEXT: sw a3, 4(sp)
; RV32LE-NEXT: fld fa5, 0(sp)
; RV32LE-NEXT: sw a0, 0(sp)
; RV32LE-NEXT: sw a1, 4(sp)
; RV32LE-NEXT: fld fa4, 0(sp)
; RV32LE-NEXT: fadd.d fa5, fa4, fa5
; RV32LE-NEXT: fsd fa5, 0(sp)
; RV32LE-NEXT: lw a0, 0(sp)
; RV32LE-NEXT: lw a1, 4(sp)
; RV32LE-NEXT: mv a2, a0
; RV32LE-NEXT: mv a3, a1
; RV32LE-NEXT: call external_func
; RV32LE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32LE-NEXT: .cfi_restore ra
; RV32LE-NEXT: addi sp, sp, 16
; RV32LE-NEXT: .cfi_def_cfa_offset 0
; RV32LE-NEXT: ret
;
; RV32BE-LABEL: test_f64_call_with_fadd:
; RV32BE: # %bb.0:
; RV32BE-NEXT: addi sp, sp, -16
; RV32BE-NEXT: .cfi_def_cfa_offset 16
; RV32BE-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32BE-NEXT: .cfi_offset ra, -4
; RV32BE-NEXT: sw a2, 0(sp)
; RV32BE-NEXT: sw a3, 4(sp)
; RV32BE-NEXT: fld fa5, 0(sp)
; RV32BE-NEXT: sw a0, 0(sp)
; RV32BE-NEXT: sw a1, 4(sp)
; RV32BE-NEXT: fld fa4, 0(sp)
; RV32BE-NEXT: fadd.d fa5, fa4, fa5
; RV32BE-NEXT: fsd fa5, 0(sp)
; RV32BE-NEXT: lw a0, 0(sp)
; RV32BE-NEXT: lw a1, 4(sp)
; RV32BE-NEXT: mv a2, a0
; RV32BE-NEXT: mv a3, a1
; RV32BE-NEXT: call external_func
; RV32BE-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32BE-NEXT: .cfi_restore ra
; RV32BE-NEXT: addi sp, sp, 16
; RV32BE-NEXT: .cfi_def_cfa_offset 0
; RV32BE-NEXT: ret
%sum = fadd double %a, %b
%result = call double @external_func(double %sum, double %sum)
ret double %result
}
Loading