diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp index 67b510dc80f1e..f2b216be1db15 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/FormatVariadic.h" #define GET_GICOMBINER_DEPS #include "RISCVGenPostLegalizeGICombiner.inc" @@ -42,6 +43,56 @@ namespace { #include "RISCVGenPostLegalizeGICombiner.inc" #undef GET_GICOMBINER_TYPES +/// Match: G_STORE (G_FCONSTANT +0.0), addr +/// Return the source vreg in MatchInfo if matched. +bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI, + const RISCVSubtarget &STI, Register &MatchInfo) { + if (MI.getOpcode() != TargetOpcode::G_STORE) + return false; + + Register SrcReg = MI.getOperand(0).getReg(); + if (!SrcReg.isVirtual()) + return false; + + MachineInstr *Def = MRI.getVRegDef(SrcReg); + if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT) + return false; + + auto *CFP = Def->getOperand(1).getFPImm(); + if (!CFP || !CFP->getValueAPF().isPosZero()) + return false; + + unsigned ValBits = MRI.getType(SrcReg).getSizeInBits(); + if ((ValBits == 16 && !STI.hasStdExtZfh()) || + (ValBits == 32 && !STI.hasStdExtF()) || + (ValBits == 64 && (!STI.hasStdExtD() || !STI.is64Bit()))) + return false; + + MatchInfo = SrcReg; + return true; +} + +/// Apply: rewrite to G_STORE (G_CONSTANT 0 [XLEN]), addr +void applyFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, const RISCVSubtarget &STI, + Register &MatchInfo) { + const unsigned XLen = STI.getXLen(); + + auto Zero = B.buildConstant(LLT::scalar(XLen), 0); + MI.getOperand(0).setReg(Zero.getReg(0)); + + MachineInstr *Def = MRI.getVRegDef(MatchInfo); + if (Def && MRI.use_nodbg_empty(MatchInfo)) + Def->eraseFromParent(); + +#ifndef NDEBUG + unsigned ValBits = MRI.getType(MatchInfo).getSizeInBits(); + LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero " + "(XLEN={1}, ValBits={2}):\n {3}\n", + DEBUG_TYPE, XLen, ValBits, MI)); +#endif +} + class RISCVPostLegalizerCombinerImpl : public Combiner { protected: const CombinerHelper Helper; diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index 995dd0c5d82eb..a06b60d8cce07 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -19,11 +19,20 @@ def RISCVO0PreLegalizerCombiner: GICombiner< "RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> { } +// Rule: fold store (fp +0.0) -> store (int zero [XLEN]) +def fp_zero_store_matchdata : GIDefMatchData<"Register">; +def fold_fp_zero_store : GICombineRule< + (defs root:$root, fp_zero_store_matchdata:$matchinfo), + (match (G_STORE $src, $addr):$root, + [{ return matchFoldFPZeroStore(*${root}, MRI, STI, ${matchinfo}); }]), + (apply [{ applyFoldFPZeroStore(*${root}, MRI, B, STI, ${matchinfo}); }])>; + // Post-legalization combines which are primarily optimizations. // TODO: Add more combines. def RISCVPostLegalizerCombiner : GICombiner<"RISCVPostLegalizerCombinerImpl", [sub_to_add, combines_for_extload, redundant_and, identity_combines, shift_immed_chain, - commute_constant_to_rhs, simplify_neg_minmax]> { + commute_constant_to_rhs, simplify_neg_minmax, + fold_fp_zero_store]> { } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll new file mode 100644 index 0000000000000..bc79c6f650291 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll @@ -0,0 +1,320 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=riscv32 -mattr=+f,+zfh < %s \ +; RUN: | FileCheck %s --check-prefix=RV32F +; RUN: llc -global-isel -mtriple=riscv32 -mattr=+d,+zfh < %s \ +; RUN: | FileCheck %s --check-prefix=RV32D +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+f,+zfh < %s \ +; RUN: | FileCheck %s --check-prefix=RV64F +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+d,+zfh < %s \ +; RUN: | FileCheck %s --check-prefix=RV64D + +define void @zero_f16(ptr %i) { +; RV32F-LABEL: zero_f16: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sh zero, 0(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_f16: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sh zero, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_f16: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sh zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_f16: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sh zero, 0(a0) +; RV64D-NEXT: ret +entry: + store half 0.0, ptr %i, align 4 + ret void +} + +define void @zero_bf16(ptr %i) { +; RV32F-LABEL: zero_bf16: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sh zero, 0(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_bf16: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sh zero, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_bf16: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sh zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_bf16: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sh zero, 0(a0) +; RV64D-NEXT: ret +entry: + store bfloat 0.0, ptr %i, align 4 + ret void +} + +define void @zero_f32(ptr %i) { +; RV32F-LABEL: zero_f32: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sw zero, 0(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_f32: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sw zero, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_f32: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sw zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_f32: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sw zero, 0(a0) +; RV64D-NEXT: ret +entry: + store float 0.0, ptr %i, align 4 + ret void +} + + +define void @zero_f64(ptr %i) { +; RV32F-LABEL: zero_f64: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: lui a1, %hi(.LCPI3_0) +; RV32F-NEXT: addi a1, a1, %lo(.LCPI3_0) +; RV32F-NEXT: lw a2, 0(a1) +; RV32F-NEXT: lw a1, 4(a1) +; RV32F-NEXT: sw a2, 0(a0) +; RV32F-NEXT: sw a1, 4(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_f64: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fcvt.d.w fa5, zero +; RV32D-NEXT: fsd fa5, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_f64: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sd zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_f64: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sd zero, 0(a0) +; RV64D-NEXT: ret +entry: + store double 0.0, ptr %i, align 8 + ret void +} + +define void @zero_v1f32(ptr %i) { +; RV32F-LABEL: zero_v1f32: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sw zero, 0(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v1f32: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sw zero, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v1f32: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sw zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v1f32: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sw zero, 0(a0) +; RV64D-NEXT: ret +entry: + store <1 x float> , ptr %i, align 8 + ret void +} + +define void @zero_v2f32(ptr %i) { +; RV32F-LABEL: zero_v2f32: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sw zero, 0(a0) +; RV32F-NEXT: sw zero, 4(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v2f32: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sw zero, 0(a0) +; RV32D-NEXT: sw zero, 4(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v2f32: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sw zero, 0(a0) +; RV64F-NEXT: sw zero, 4(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v2f32: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sw zero, 0(a0) +; RV64D-NEXT: sw zero, 4(a0) +; RV64D-NEXT: ret +entry: + store <2 x float> , ptr %i, align 8 + ret void +} + +define void @zero_v4f32(ptr %i) { +; RV32F-LABEL: zero_v4f32: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: sw zero, 0(a0) +; RV32F-NEXT: sw zero, 4(a0) +; RV32F-NEXT: sw zero, 8(a0) +; RV32F-NEXT: sw zero, 12(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v4f32: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: sw zero, 0(a0) +; RV32D-NEXT: sw zero, 4(a0) +; RV32D-NEXT: sw zero, 8(a0) +; RV32D-NEXT: sw zero, 12(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v4f32: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sw zero, 0(a0) +; RV64F-NEXT: sw zero, 4(a0) +; RV64F-NEXT: sw zero, 8(a0) +; RV64F-NEXT: sw zero, 12(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v4f32: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sw zero, 0(a0) +; RV64D-NEXT: sw zero, 4(a0) +; RV64D-NEXT: sw zero, 8(a0) +; RV64D-NEXT: sw zero, 12(a0) +; RV64D-NEXT: ret +entry: + store <4 x float> , ptr %i, align 8 + ret void +} + +define void @zero_v1f64(ptr %i) { +; RV32F-LABEL: zero_v1f64: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: lui a1, %hi(.LCPI7_0) +; RV32F-NEXT: addi a1, a1, %lo(.LCPI7_0) +; RV32F-NEXT: lw a2, 0(a1) +; RV32F-NEXT: lw a1, 4(a1) +; RV32F-NEXT: sw a2, 0(a0) +; RV32F-NEXT: sw a1, 4(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v1f64: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fcvt.d.w fa5, zero +; RV32D-NEXT: fsd fa5, 0(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v1f64: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sd zero, 0(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v1f64: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sd zero, 0(a0) +; RV64D-NEXT: ret +entry: + store <1 x double> , ptr %i, align 8 + ret void +} + +define void @zero_v2f64(ptr %i) { +; RV32F-LABEL: zero_v2f64: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: lui a1, %hi(.LCPI8_0) +; RV32F-NEXT: addi a1, a1, %lo(.LCPI8_0) +; RV32F-NEXT: lw a2, 0(a1) +; RV32F-NEXT: lw a1, 4(a1) +; RV32F-NEXT: sw a2, 0(a0) +; RV32F-NEXT: sw a1, 4(a0) +; RV32F-NEXT: sw a2, 8(a0) +; RV32F-NEXT: sw a1, 12(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v2f64: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fcvt.d.w fa5, zero +; RV32D-NEXT: fsd fa5, 0(a0) +; RV32D-NEXT: fsd fa5, 8(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v2f64: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sd zero, 0(a0) +; RV64F-NEXT: sd zero, 8(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v2f64: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sd zero, 0(a0) +; RV64D-NEXT: sd zero, 8(a0) +; RV64D-NEXT: ret +entry: + store <2 x double> , ptr %i, align 8 + ret void +} + +define void @zero_v4f64(ptr %i) { +; RV32F-LABEL: zero_v4f64: +; RV32F: # %bb.0: # %entry +; RV32F-NEXT: lui a1, %hi(.LCPI9_0) +; RV32F-NEXT: addi a1, a1, %lo(.LCPI9_0) +; RV32F-NEXT: lw a2, 0(a1) +; RV32F-NEXT: lw a1, 4(a1) +; RV32F-NEXT: sw a2, 0(a0) +; RV32F-NEXT: sw a1, 4(a0) +; RV32F-NEXT: sw a2, 8(a0) +; RV32F-NEXT: sw a1, 12(a0) +; RV32F-NEXT: sw a2, 16(a0) +; RV32F-NEXT: sw a1, 20(a0) +; RV32F-NEXT: sw a2, 24(a0) +; RV32F-NEXT: sw a1, 28(a0) +; RV32F-NEXT: ret +; +; RV32D-LABEL: zero_v4f64: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fcvt.d.w fa5, zero +; RV32D-NEXT: fsd fa5, 0(a0) +; RV32D-NEXT: fsd fa5, 8(a0) +; RV32D-NEXT: fsd fa5, 16(a0) +; RV32D-NEXT: fsd fa5, 24(a0) +; RV32D-NEXT: ret +; +; RV64F-LABEL: zero_v4f64: +; RV64F: # %bb.0: # %entry +; RV64F-NEXT: sd zero, 0(a0) +; RV64F-NEXT: sd zero, 8(a0) +; RV64F-NEXT: sd zero, 16(a0) +; RV64F-NEXT: sd zero, 24(a0) +; RV64F-NEXT: ret +; +; RV64D-LABEL: zero_v4f64: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: sd zero, 0(a0) +; RV64D-NEXT: sd zero, 8(a0) +; RV64D-NEXT: sd zero, 16(a0) +; RV64D-NEXT: sd zero, 24(a0) +; RV64D-NEXT: ret +entry: + store <4 x double> , ptr %i, align 8 + ret void +}