Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/FormatVariadic.h"

#define GET_GICOMBINER_DEPS
#include "RISCVGenPostLegalizeGICombiner.inc"
Expand Down Expand Up @@ -98,6 +99,8 @@ class RISCVPostLegalizerCombiner : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;

bool combineFPZeroStore(MachineFunction &MF, const RISCVSubtarget &STI);

private:
RISCVPostLegalizerCombinerImplRuleConfig RuleConfig;
};
Expand All @@ -122,6 +125,54 @@ RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner()
report_fatal_error("Invalid rule identifier");
}

/// Try to fold:
/// G_STORE (G_FCONSTANT +0.0), addr
/// into:
/// G_STORE (G_CONSTANT 0 [XLEN]), addr
bool RISCVPostLegalizerCombiner::combineFPZeroStore(MachineFunction &MF,
const RISCVSubtarget &STI) {
bool Changed = false;
MachineRegisterInfo &MRI = MF.getRegInfo();

for (auto &MBB : MF) {
for (auto &MI : MBB) {
if (MI.getOpcode() != TargetOpcode::G_STORE)
continue;

Register SrcReg = MI.getOperand(0).getReg();
if (!SrcReg.isVirtual())
continue;

MachineInstr *Def = MRI.getVRegDef(SrcReg);
if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT)
continue;

auto *CFP = Def->getOperand(1).getFPImm();
if (!CFP || !CFP->getValueAPF().isPosZero())
continue;

// Use XLEN-wide integer zero
MachineIRBuilder MIB(MI);
const unsigned XLen = STI.getXLen();
auto Zero = MIB.buildConstant(LLT::scalar(XLen), 0);
MI.getOperand(0).setReg(Zero.getReg(0));

LLT ValTy = MRI.getType(SrcReg);
if (MRI.use_nodbg_empty(SrcReg))
Def->eraseFromParent();

[[maybe_unused]] unsigned ValBits = ValTy.getSizeInBits();
LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero "
"(XLEN={1}, ValBits={2}) : \n\t{3}\n",
DEBUG_TYPE, XLen, ValBits, MI));

Changed = true;
}
}

return Changed;
}

bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasFailedISel())
return false;
Expand All @@ -147,7 +198,12 @@ bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
F.hasMinSize());
RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,
ST, MDT, LI);
return Impl.combineMachineInstrs();

bool TableCombChanged = Impl.combineMachineInstrs();

bool LocalChanged = combineFPZeroStore(MF, ST);

return TableCombChanged || LocalChanged;
}

char RISCVPostLegalizerCombiner::ID = 0;
Expand Down
72 changes: 28 additions & 44 deletions llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
define void @zero_f16(ptr %i) {
; RV32-LABEL: zero_f16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: fmv.h.x fa5, zero
; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: sh zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.h.x fa5, zero
; RV64-NEXT: fsh fa5, 0(a0)
; RV64-NEXT: sh zero, 0(a0)
; RV64-NEXT: ret
entry:
store half 0.0, ptr %i, align 4
Expand All @@ -24,14 +22,12 @@ entry:
define void @zero_bf16(ptr %i) {
; RV32-LABEL: zero_bf16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: fmv.h.x fa5, zero
; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: sh zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_bf16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.h.x fa5, zero
; RV64-NEXT: fsh fa5, 0(a0)
; RV64-NEXT: sh zero, 0(a0)
; RV64-NEXT: ret
entry:
store bfloat 0.0, ptr %i, align 4
Expand All @@ -41,14 +37,12 @@ entry:
define void @zero_f32(ptr %i) {
; RV32-LABEL: zero_f32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: fmv.w.x fa5, zero
; RV32-NEXT: fsw fa5, 0(a0)
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.w.x fa5, zero
; RV64-NEXT: fsw fa5, 0(a0)
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: ret
entry:
store float 0.0, ptr %i, align 4
Expand All @@ -69,8 +63,7 @@ define void @zero_f64(ptr %i) {
;
; RV64-LABEL: zero_f64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.d.x fa5, zero
; RV64-NEXT: fsd fa5, 0(a0)
; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: ret
entry:
store double 0.0, ptr %i, align 8
Expand All @@ -80,14 +73,12 @@ entry:
define void @zero_v1f32(ptr %i) {
; RV32-LABEL: zero_v1f32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: fmv.w.x fa5, zero
; RV32-NEXT: fsw fa5, 0(a0)
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v1f32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.w.x fa5, zero
; RV64-NEXT: fsw fa5, 0(a0)
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: ret
entry:
store <1 x float> <float 0.0>, ptr %i, align 8
Expand All @@ -97,16 +88,14 @@ entry:
define void @zero_v2f32(ptr %i) {
; RV32-LABEL: zero_v2f32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: fmv.w.x fa5, zero
; RV32-NEXT: fsw fa5, 0(a0)
; RV32-NEXT: fsw fa5, 4(a0)
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v2f32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.w.x fa5, zero
; RV64-NEXT: fsw fa5, 0(a0)
; RV64-NEXT: fsw fa5, 4(a0)
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: sw zero, 4(a0)
; RV64-NEXT: ret
entry:
store <2 x float> <float 0.0, float 0.0>, ptr %i, align 8
Expand All @@ -116,20 +105,18 @@ entry:
define void @zero_v4f32(ptr %i) {
; RV32-LABEL: zero_v4f32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: fmv.w.x fa5, zero
; RV32-NEXT: fsw fa5, 0(a0)
; RV32-NEXT: fsw fa5, 4(a0)
; RV32-NEXT: fsw fa5, 8(a0)
; RV32-NEXT: fsw fa5, 12(a0)
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: sw zero, 8(a0)
; RV32-NEXT: sw zero, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v4f32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.w.x fa5, zero
; RV64-NEXT: fsw fa5, 0(a0)
; RV64-NEXT: fsw fa5, 4(a0)
; RV64-NEXT: fsw fa5, 8(a0)
; RV64-NEXT: fsw fa5, 12(a0)
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: sw zero, 4(a0)
; RV64-NEXT: sw zero, 8(a0)
; RV64-NEXT: sw zero, 12(a0)
; RV64-NEXT: ret
entry:
store <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, ptr %i, align 8
Expand All @@ -149,8 +136,7 @@ define void @zero_v1f64(ptr %i) {
;
; RV64-LABEL: zero_v1f64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.d.x fa5, zero
; RV64-NEXT: fsd fa5, 0(a0)
; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: ret
entry:
store <1 x double> <double 0.0>, ptr %i, align 8
Expand All @@ -172,9 +158,8 @@ define void @zero_v2f64(ptr %i) {
;
; RV64-LABEL: zero_v2f64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.d.x fa5, zero
; RV64-NEXT: fsd fa5, 0(a0)
; RV64-NEXT: fsd fa5, 8(a0)
; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: sd zero, 8(a0)
; RV64-NEXT: ret
entry:
store <2 x double> <double 0.0, double 0.0>, ptr %i, align 8
Expand All @@ -200,11 +185,10 @@ define void @zero_v4f64(ptr %i) {
;
; RV64-LABEL: zero_v4f64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: fmv.d.x fa5, zero
; RV64-NEXT: fsd fa5, 0(a0)
; RV64-NEXT: fsd fa5, 8(a0)
; RV64-NEXT: fsd fa5, 16(a0)
; RV64-NEXT: fsd fa5, 24(a0)
; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: sd zero, 8(a0)
; RV64-NEXT: sd zero, 16(a0)
; RV64-NEXT: sd zero, 24(a0)
; RV64-NEXT: ret
entry:
store <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, ptr %i, align 8
Expand Down