Skip to content

Commit 1dbeb86

Browse files
committed
update
1 parent 1946132 commit 1dbeb86

File tree

2 files changed

+85
-45
lines changed

2 files changed

+85
-45
lines changed

llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "llvm/CodeGen/MachineDominators.h"
2828
#include "llvm/CodeGen/MachineFunctionPass.h"
2929
#include "llvm/CodeGen/TargetPassConfig.h"
30+
#include "llvm/Support/FormatVariadic.h"
3031

3132
#define GET_GICOMBINER_DEPS
3233
#include "RISCVGenPostLegalizeGICombiner.inc"
@@ -98,6 +99,8 @@ class RISCVPostLegalizerCombiner : public MachineFunctionPass {
9899
bool runOnMachineFunction(MachineFunction &MF) override;
99100
void getAnalysisUsage(AnalysisUsage &AU) const override;
100101

102+
bool combineFPZeroStore(MachineFunction &MF, const RISCVSubtarget &STI);
103+
101104
private:
102105
RISCVPostLegalizerCombinerImplRuleConfig RuleConfig;
103106
};
@@ -122,6 +125,54 @@ RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner()
122125
report_fatal_error("Invalid rule identifier");
123126
}
124127

128+
/// Try to fold:
129+
/// G_STORE (G_FCONSTANT +0.0), addr
130+
/// into:
131+
/// G_STORE (G_CONSTANT 0 [XLEN]), addr
132+
bool RISCVPostLegalizerCombiner::combineFPZeroStore(MachineFunction &MF,
133+
const RISCVSubtarget &STI) {
134+
bool Changed = false;
135+
MachineRegisterInfo &MRI = MF.getRegInfo();
136+
137+
for (auto &MBB : MF) {
138+
for (auto &MI : MBB) {
139+
if (MI.getOpcode() != TargetOpcode::G_STORE)
140+
continue;
141+
142+
Register SrcReg = MI.getOperand(0).getReg();
143+
if (!SrcReg.isVirtual())
144+
continue;
145+
146+
MachineInstr *Def = MRI.getVRegDef(SrcReg);
147+
if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT)
148+
continue;
149+
150+
auto *CFP = Def->getOperand(1).getFPImm();
151+
if (!CFP || !CFP->getValueAPF().isPosZero())
152+
continue;
153+
154+
// Use XLEN-wide integer zero
155+
MachineIRBuilder MIB(MI);
156+
const unsigned XLen = STI.getXLen();
157+
auto Zero = MIB.buildConstant(LLT::scalar(XLen), 0);
158+
MI.getOperand(0).setReg(Zero.getReg(0));
159+
160+
LLT ValTy = MRI.getType(SrcReg);
161+
if (MRI.use_nodbg_empty(SrcReg))
162+
Def->eraseFromParent();
163+
164+
[[maybe_unused]] unsigned ValBits = ValTy.getSizeInBits();
165+
LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero "
166+
"(XLEN={1}, ValBits={2}) : \n\t{3}\n",
167+
DEBUG_TYPE, XLen, ValBits, MI));
168+
169+
Changed = true;
170+
}
171+
}
172+
173+
return Changed;
174+
}
175+
125176
bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
126177
if (MF.getProperties().hasFailedISel())
127178
return false;
@@ -147,7 +198,12 @@ bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
147198
F.hasMinSize());
148199
RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,
149200
ST, MDT, LI);
150-
return Impl.combineMachineInstrs();
201+
202+
bool TableCombChanged = Impl.combineMachineInstrs();
203+
204+
bool LocalChanged = combineFPZeroStore(MF, ST);
205+
206+
return TableCombChanged || LocalChanged;
151207
}
152208

153209
char RISCVPostLegalizerCombiner::ID = 0;

llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll

Lines changed: 28 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@
77
define void @zero_f16(ptr %i) {
88
; RV32-LABEL: zero_f16:
99
; RV32: # %bb.0: # %entry
10-
; RV32-NEXT: fmv.h.x fa5, zero
11-
; RV32-NEXT: fsh fa5, 0(a0)
10+
; RV32-NEXT: sh zero, 0(a0)
1211
; RV32-NEXT: ret
1312
;
1413
; RV64-LABEL: zero_f16:
1514
; RV64: # %bb.0: # %entry
16-
; RV64-NEXT: fmv.h.x fa5, zero
17-
; RV64-NEXT: fsh fa5, 0(a0)
15+
; RV64-NEXT: sh zero, 0(a0)
1816
; RV64-NEXT: ret
1917
entry:
2018
store half 0.0, ptr %i, align 4
@@ -24,14 +22,12 @@ entry:
2422
define void @zero_bf16(ptr %i) {
2523
; RV32-LABEL: zero_bf16:
2624
; RV32: # %bb.0: # %entry
27-
; RV32-NEXT: fmv.h.x fa5, zero
28-
; RV32-NEXT: fsh fa5, 0(a0)
25+
; RV32-NEXT: sh zero, 0(a0)
2926
; RV32-NEXT: ret
3027
;
3128
; RV64-LABEL: zero_bf16:
3229
; RV64: # %bb.0: # %entry
33-
; RV64-NEXT: fmv.h.x fa5, zero
34-
; RV64-NEXT: fsh fa5, 0(a0)
30+
; RV64-NEXT: sh zero, 0(a0)
3531
; RV64-NEXT: ret
3632
entry:
3733
store bfloat 0.0, ptr %i, align 4
@@ -41,14 +37,12 @@ entry:
4137
define void @zero_f32(ptr %i) {
4238
; RV32-LABEL: zero_f32:
4339
; RV32: # %bb.0: # %entry
44-
; RV32-NEXT: fmv.w.x fa5, zero
45-
; RV32-NEXT: fsw fa5, 0(a0)
40+
; RV32-NEXT: sw zero, 0(a0)
4641
; RV32-NEXT: ret
4742
;
4843
; RV64-LABEL: zero_f32:
4944
; RV64: # %bb.0: # %entry
50-
; RV64-NEXT: fmv.w.x fa5, zero
51-
; RV64-NEXT: fsw fa5, 0(a0)
45+
; RV64-NEXT: sw zero, 0(a0)
5246
; RV64-NEXT: ret
5347
entry:
5448
store float 0.0, ptr %i, align 4
@@ -69,8 +63,7 @@ define void @zero_f64(ptr %i) {
6963
;
7064
; RV64-LABEL: zero_f64:
7165
; RV64: # %bb.0: # %entry
72-
; RV64-NEXT: fmv.d.x fa5, zero
73-
; RV64-NEXT: fsd fa5, 0(a0)
66+
; RV64-NEXT: sd zero, 0(a0)
7467
; RV64-NEXT: ret
7568
entry:
7669
store double 0.0, ptr %i, align 8
@@ -80,14 +73,12 @@ entry:
8073
define void @zero_v1f32(ptr %i) {
8174
; RV32-LABEL: zero_v1f32:
8275
; RV32: # %bb.0: # %entry
83-
; RV32-NEXT: fmv.w.x fa5, zero
84-
; RV32-NEXT: fsw fa5, 0(a0)
76+
; RV32-NEXT: sw zero, 0(a0)
8577
; RV32-NEXT: ret
8678
;
8779
; RV64-LABEL: zero_v1f32:
8880
; RV64: # %bb.0: # %entry
89-
; RV64-NEXT: fmv.w.x fa5, zero
90-
; RV64-NEXT: fsw fa5, 0(a0)
81+
; RV64-NEXT: sw zero, 0(a0)
9182
; RV64-NEXT: ret
9283
entry:
9384
store <1 x float> <float 0.0>, ptr %i, align 8
@@ -97,16 +88,14 @@ entry:
9788
define void @zero_v2f32(ptr %i) {
9889
; RV32-LABEL: zero_v2f32:
9990
; RV32: # %bb.0: # %entry
100-
; RV32-NEXT: fmv.w.x fa5, zero
101-
; RV32-NEXT: fsw fa5, 0(a0)
102-
; RV32-NEXT: fsw fa5, 4(a0)
91+
; RV32-NEXT: sw zero, 0(a0)
92+
; RV32-NEXT: sw zero, 4(a0)
10393
; RV32-NEXT: ret
10494
;
10595
; RV64-LABEL: zero_v2f32:
10696
; RV64: # %bb.0: # %entry
107-
; RV64-NEXT: fmv.w.x fa5, zero
108-
; RV64-NEXT: fsw fa5, 0(a0)
109-
; RV64-NEXT: fsw fa5, 4(a0)
97+
; RV64-NEXT: sw zero, 0(a0)
98+
; RV64-NEXT: sw zero, 4(a0)
11099
; RV64-NEXT: ret
111100
entry:
112101
store <2 x float> <float 0.0, float 0.0>, ptr %i, align 8
@@ -116,20 +105,18 @@ entry:
116105
define void @zero_v4f32(ptr %i) {
117106
; RV32-LABEL: zero_v4f32:
118107
; RV32: # %bb.0: # %entry
119-
; RV32-NEXT: fmv.w.x fa5, zero
120-
; RV32-NEXT: fsw fa5, 0(a0)
121-
; RV32-NEXT: fsw fa5, 4(a0)
122-
; RV32-NEXT: fsw fa5, 8(a0)
123-
; RV32-NEXT: fsw fa5, 12(a0)
108+
; RV32-NEXT: sw zero, 0(a0)
109+
; RV32-NEXT: sw zero, 4(a0)
110+
; RV32-NEXT: sw zero, 8(a0)
111+
; RV32-NEXT: sw zero, 12(a0)
124112
; RV32-NEXT: ret
125113
;
126114
; RV64-LABEL: zero_v4f32:
127115
; RV64: # %bb.0: # %entry
128-
; RV64-NEXT: fmv.w.x fa5, zero
129-
; RV64-NEXT: fsw fa5, 0(a0)
130-
; RV64-NEXT: fsw fa5, 4(a0)
131-
; RV64-NEXT: fsw fa5, 8(a0)
132-
; RV64-NEXT: fsw fa5, 12(a0)
116+
; RV64-NEXT: sw zero, 0(a0)
117+
; RV64-NEXT: sw zero, 4(a0)
118+
; RV64-NEXT: sw zero, 8(a0)
119+
; RV64-NEXT: sw zero, 12(a0)
133120
; RV64-NEXT: ret
134121
entry:
135122
store <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, ptr %i, align 8
@@ -149,8 +136,7 @@ define void @zero_v1f64(ptr %i) {
149136
;
150137
; RV64-LABEL: zero_v1f64:
151138
; RV64: # %bb.0: # %entry
152-
; RV64-NEXT: fmv.d.x fa5, zero
153-
; RV64-NEXT: fsd fa5, 0(a0)
139+
; RV64-NEXT: sd zero, 0(a0)
154140
; RV64-NEXT: ret
155141
entry:
156142
store <1 x double> <double 0.0>, ptr %i, align 8
@@ -172,9 +158,8 @@ define void @zero_v2f64(ptr %i) {
172158
;
173159
; RV64-LABEL: zero_v2f64:
174160
; RV64: # %bb.0: # %entry
175-
; RV64-NEXT: fmv.d.x fa5, zero
176-
; RV64-NEXT: fsd fa5, 0(a0)
177-
; RV64-NEXT: fsd fa5, 8(a0)
161+
; RV64-NEXT: sd zero, 0(a0)
162+
; RV64-NEXT: sd zero, 8(a0)
178163
; RV64-NEXT: ret
179164
entry:
180165
store <2 x double> <double 0.0, double 0.0>, ptr %i, align 8
@@ -200,11 +185,10 @@ define void @zero_v4f64(ptr %i) {
200185
;
201186
; RV64-LABEL: zero_v4f64:
202187
; RV64: # %bb.0: # %entry
203-
; RV64-NEXT: fmv.d.x fa5, zero
204-
; RV64-NEXT: fsd fa5, 0(a0)
205-
; RV64-NEXT: fsd fa5, 8(a0)
206-
; RV64-NEXT: fsd fa5, 16(a0)
207-
; RV64-NEXT: fsd fa5, 24(a0)
188+
; RV64-NEXT: sd zero, 0(a0)
189+
; RV64-NEXT: sd zero, 8(a0)
190+
; RV64-NEXT: sd zero, 16(a0)
191+
; RV64-NEXT: sd zero, 24(a0)
208192
; RV64-NEXT: ret
209193
entry:
210194
store <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, ptr %i, align 8

0 commit comments

Comments
 (0)