Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/FormatVariadic.h"

#define GET_GICOMBINER_DEPS
#include "RISCVGenPostLegalizeGICombiner.inc"
Expand Down Expand Up @@ -98,6 +99,8 @@ class RISCVPostLegalizerCombiner : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;

bool combineFPZeroStore(MachineFunction &MF, const RISCVSubtarget &STI);

private:
RISCVPostLegalizerCombinerImplRuleConfig RuleConfig;
};
Expand All @@ -122,6 +125,54 @@ RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner()
report_fatal_error("Invalid rule identifier");
}

/// Try to fold:
/// G_STORE (G_FCONSTANT +0.0), addr
/// into:
/// G_STORE (G_CONSTANT 0 [XLEN]), addr
bool RISCVPostLegalizerCombiner::combineFPZeroStore(MachineFunction &MF,
const RISCVSubtarget &STI) {
bool Changed = false;
MachineRegisterInfo &MRI = MF.getRegInfo();

for (auto &MBB : MF) {
for (auto &MI : MBB) {
if (MI.getOpcode() != TargetOpcode::G_STORE)
continue;

Register SrcReg = MI.getOperand(0).getReg();
if (!SrcReg.isVirtual())
continue;

MachineInstr *Def = MRI.getVRegDef(SrcReg);
if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT)
continue;

auto *CFP = Def->getOperand(1).getFPImm();
if (!CFP || !CFP->getValueAPF().isPosZero())
continue;

// Use XLEN-wide integer zero
MachineIRBuilder MIB(MI);
const unsigned XLen = STI.getXLen();
auto Zero = MIB.buildConstant(LLT::scalar(XLen), 0);
MI.getOperand(0).setReg(Zero.getReg(0));

LLT ValTy = MRI.getType(SrcReg);
if (MRI.use_nodbg_empty(SrcReg))
Def->eraseFromParent();

[[maybe_unused]] unsigned ValBits = ValTy.getSizeInBits();
LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero "
"(XLEN={1}, ValBits={2}) : \n\t{3}\n",
DEBUG_TYPE, XLen, ValBits, MI));

Changed = true;
}
}

return Changed;
}

bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasFailedISel())
return false;
Expand All @@ -147,7 +198,12 @@ bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
F.hasMinSize());
RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,
ST, MDT, LI);
return Impl.combineMachineInstrs();

bool TableCombChanged = Impl.combineMachineInstrs();

bool LocalChanged = combineFPZeroStore(MF, ST);

return TableCombChanged || LocalChanged;
}

char RISCVPostLegalizerCombiner::ID = 0;
Expand Down
196 changes: 196 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=riscv32 -mattr=+f,+zfh < %s \
; RUN: | FileCheck %s --check-prefix=RV32
; RUN: llc -global-isel -mtriple=riscv64 -mattr=+d,+zfh < %s \
; RUN: | FileCheck %s --check-prefix=RV64

define void @zero_f16(ptr %i) {
; RV32-LABEL: zero_f16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: sh zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sh zero, 0(a0)
; RV64-NEXT: ret
entry:
store half 0.0, ptr %i, align 4
ret void
}

define void @zero_bf16(ptr %i) {
; RV32-LABEL: zero_bf16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: sh zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_bf16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sh zero, 0(a0)
; RV64-NEXT: ret
entry:
store bfloat 0.0, ptr %i, align 4
ret void
}

define void @zero_f32(ptr %i) {
; RV32-LABEL: zero_f32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: ret
entry:
store float 0.0, ptr %i, align 4
ret void
}


define void @zero_f64(ptr %i) {
; RV32-LABEL: zero_f64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: lui a1, %hi(.LCPI3_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI3_0)
; RV32-NEXT: lw a2, 0(a1)
; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: ret
entry:
store double 0.0, ptr %i, align 8
ret void
}

define void @zero_v1f32(ptr %i) {
; RV32-LABEL: zero_v1f32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v1f32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: ret
entry:
store <1 x float> <float 0.0>, ptr %i, align 8
ret void
}

define void @zero_v2f32(ptr %i) {
; RV32-LABEL: zero_v2f32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v2f32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: sw zero, 4(a0)
; RV64-NEXT: ret
entry:
store <2 x float> <float 0.0, float 0.0>, ptr %i, align 8
ret void
}

define void @zero_v4f32(ptr %i) {
; RV32-LABEL: zero_v4f32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: sw zero, 8(a0)
; RV32-NEXT: sw zero, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v4f32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: sw zero, 4(a0)
; RV64-NEXT: sw zero, 8(a0)
; RV64-NEXT: sw zero, 12(a0)
; RV64-NEXT: ret
entry:
store <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, ptr %i, align 8
ret void
}

define void @zero_v1f64(ptr %i) {
; RV32-LABEL: zero_v1f64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: lui a1, %hi(.LCPI7_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI7_0)
; RV32-NEXT: lw a2, 0(a1)
; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v1f64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: ret
entry:
store <1 x double> <double 0.0>, ptr %i, align 8
ret void
}

define void @zero_v2f64(ptr %i) {
; RV32-LABEL: zero_v2f64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: lui a1, %hi(.LCPI8_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0)
; RV32-NEXT: lw a2, 0(a1)
; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: sw a2, 8(a0)
; RV32-NEXT: sw a1, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v2f64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: sd zero, 8(a0)
; RV64-NEXT: ret
entry:
store <2 x double> <double 0.0, double 0.0>, ptr %i, align 8
ret void
}

define void @zero_v4f64(ptr %i) {
; RV32-LABEL: zero_v4f64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: lui a1, %hi(.LCPI9_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI9_0)
; RV32-NEXT: lw a2, 0(a1)
; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: sw a2, 8(a0)
; RV32-NEXT: sw a1, 12(a0)
; RV32-NEXT: sw a2, 16(a0)
; RV32-NEXT: sw a1, 20(a0)
; RV32-NEXT: sw a2, 24(a0)
; RV32-NEXT: sw a1, 28(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v4f64:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: sd zero, 8(a0)
; RV64-NEXT: sd zero, 16(a0)
; RV64-NEXT: sd zero, 24(a0)
; RV64-NEXT: ret
entry:
store <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, ptr %i, align 8
ret void
}