Skip to content

Commit e60f49e

Browse files
committed
[RISCV] Undo fneg (fmul x, y) -> fmul x, (fneg y) transform
InstCombine will hoist an fneg through an fmul, but not for fadd/fsub. This prevents us from matching fmsub and fnmadd in some cases. This patch adds a DAG combine to undo this in InstCombine, which helps some hot loops in 508.namd_r: ```diff @@ -983,18 +983,15 @@ fld ft2, 48(a5) fld ft3, 64(a5) fld ft4, 72(a5) - fneg.d fa0, fa0 - fneg.d ft0, ft0 - fneg.d ft2, ft2 fmul.d fa3, ft5, fa3 fmul.d fa0, fa3, fa0 fmul.d ft0, fa3, ft0 fmul.d fa3, fa3, ft2 fld ft2, 0(s1) fmul.d fa4, ft5, fa4 - fmadd.d fa2, fa4, fa2, fa0 - fmadd.d ft6, fa4, ft6, ft0 - fmadd.d fa4, fa4, ft1, fa3 + fmsub.d fa2, fa4, fa2, fa0 + fmsub.d ft6, fa4, ft6, ft0 + fmsub.d fa4, fa4, ft1, fa3 ``` This gives a [1.77% improvement in both instruction count and runtime on 508.namd_r](https://lnt.lukelau.me/db_default/v4/nts/profile/1/1022/1021) This also causes some more fnegs to be sunk after a bitcast to integer, so they're now done as xor. From glancing at some of the schedules for WriteFSGN my guess is that this is also profitable.
1 parent 8d9e2be commit e60f49e

File tree

5 files changed

+95
-65
lines changed

5 files changed

+95
-65
lines changed

llvm/include/llvm/CodeGen/SDPatternMatch.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,10 @@ template <typename Opnd> inline UnaryOpc_match<Opnd> m_Cttz(const Opnd &Op) {
10761076
return UnaryOpc_match<Opnd>(ISD::CTTZ, Op);
10771077
}
10781078

1079+
template <typename Opnd> inline UnaryOpc_match<Opnd> m_FNeg(const Opnd &Op) {
1080+
return UnaryOpc_match<Opnd>(ISD::FNEG, Op);
1081+
}
1082+
10791083
// === Constants ===
10801084
struct ConstantInt_match {
10811085
APInt *BindVal;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20250,6 +20250,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2025020250
return V;
2025120251
break;
2025220252
case ISD::FMUL: {
20253+
using namespace SDPatternMatch;
20254+
SDLoc DL(N);
20255+
EVT VT = N->getValueType(0);
20256+
SDValue X, Y;
20257+
// InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20258+
// hoistFNegAboveFMulFDiv.
20259+
// Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20260+
if (sd_match(N, m_FMul(m_Value(X), m_OneUse(m_FNeg(m_Value(Y))))))
20261+
return DAG.getNode(ISD::FNEG, DL, VT,
20262+
DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20263+
2025320264
// fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
2025420265
SDValue N0 = N->getOperand(0);
2025520266
SDValue N1 = N->getOperand(1);
@@ -20260,7 +20271,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2026020271
ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
2026120272
if (!C || !C->getValueAPF().isExactlyValue(+1.0))
2026220273
return SDValue();
20263-
EVT VT = N->getValueType(0);
2026420274
if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
2026520275
return SDValue();
2026620276
SDValue Sign = N0->getOperand(1);

llvm/test/CodeGen/RISCV/double-arith.ll

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -613,23 +613,20 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind {
613613
define double @fmsub_d_fmul_fneg(double %a, double %b, double %c, double %d) nounwind {
614614
; CHECKIFD-LABEL: fmsub_d_fmul_fneg:
615615
; CHECKIFD: # %bb.0:
616-
; CHECKIFD-NEXT: fneg.d fa5, fa3
617-
; CHECKIFD-NEXT: fmul.d fa5, fa2, fa5
618-
; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa5
616+
; CHECKIFD-NEXT: fmul.d fa5, fa2, fa3
617+
; CHECKIFD-NEXT: fmsub.d fa0, fa0, fa1, fa5
619618
; CHECKIFD-NEXT: ret
620619
;
621620
; RV32IZFINXZDINX-LABEL: fmsub_d_fmul_fneg:
622621
; RV32IZFINXZDINX: # %bb.0:
623-
; RV32IZFINXZDINX-NEXT: fneg.d a6, a6
624622
; RV32IZFINXZDINX-NEXT: fmul.d a4, a4, a6
625-
; RV32IZFINXZDINX-NEXT: fmadd.d a0, a0, a2, a4
623+
; RV32IZFINXZDINX-NEXT: fmsub.d a0, a0, a2, a4
626624
; RV32IZFINXZDINX-NEXT: ret
627625
;
628626
; RV64IZFINXZDINX-LABEL: fmsub_d_fmul_fneg:
629627
; RV64IZFINXZDINX: # %bb.0:
630-
; RV64IZFINXZDINX-NEXT: fneg.d a3, a3
631628
; RV64IZFINXZDINX-NEXT: fmul.d a2, a2, a3
632-
; RV64IZFINXZDINX-NEXT: fmadd.d a0, a0, a1, a2
629+
; RV64IZFINXZDINX-NEXT: fmsub.d a0, a0, a1, a2
633630
; RV64IZFINXZDINX-NEXT: ret
634631
;
635632
; RV32I-LABEL: fmsub_d_fmul_fneg:
@@ -963,23 +960,20 @@ define double @fnmadd_d_3(double %a, double %b, double %c) nounwind {
963960
define double @fnmadd_d_fmul_fneg(double %a, double %b, double %c, double %d) nounwind {
964961
; CHECKIFD-LABEL: fnmadd_d_fmul_fneg:
965962
; CHECKIFD: # %bb.0:
966-
; CHECKIFD-NEXT: fneg.d fa5, fa0
967-
; CHECKIFD-NEXT: fmul.d fa5, fa1, fa5
968-
; CHECKIFD-NEXT: fmadd.d fa0, fa2, fa3, fa5
963+
; CHECKIFD-NEXT: fmul.d fa5, fa1, fa0
964+
; CHECKIFD-NEXT: fmsub.d fa0, fa2, fa3, fa5
969965
; CHECKIFD-NEXT: ret
970966
;
971967
; RV32IZFINXZDINX-LABEL: fnmadd_d_fmul_fneg:
972968
; RV32IZFINXZDINX: # %bb.0:
973-
; RV32IZFINXZDINX-NEXT: fneg.d a0, a0
974969
; RV32IZFINXZDINX-NEXT: fmul.d a0, a2, a0
975-
; RV32IZFINXZDINX-NEXT: fmadd.d a0, a4, a6, a0
970+
; RV32IZFINXZDINX-NEXT: fmsub.d a0, a4, a6, a0
976971
; RV32IZFINXZDINX-NEXT: ret
977972
;
978973
; RV64IZFINXZDINX-LABEL: fnmadd_d_fmul_fneg:
979974
; RV64IZFINXZDINX: # %bb.0:
980-
; RV64IZFINXZDINX-NEXT: fneg.d a0, a0
981975
; RV64IZFINXZDINX-NEXT: fmul.d a0, a1, a0
982-
; RV64IZFINXZDINX-NEXT: fmadd.d a0, a2, a3, a0
976+
; RV64IZFINXZDINX-NEXT: fmsub.d a0, a2, a3, a0
983977
; RV64IZFINXZDINX-NEXT: ret
984978
;
985979
; RV32I-LABEL: fnmadd_d_fmul_fneg:

llvm/test/CodeGen/RISCV/float-arith.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -553,16 +553,14 @@ define float @fmsub_s_fmul_fneg(float %a, float %b, float %c, float %d) nounwind
553553
;
554554
; CHECKIF-LABEL: fmsub_s_fmul_fneg:
555555
; CHECKIF: # %bb.0:
556-
; CHECKIF-NEXT: fneg.s fa5, fa3
557-
; CHECKIF-NEXT: fmul.s fa5, fa2, fa5
558-
; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa5
556+
; CHECKIF-NEXT: fmul.s fa5, fa2, fa3
557+
; CHECKIF-NEXT: fmsub.s fa0, fa0, fa1, fa5
559558
; CHECKIF-NEXT: ret
560559
;
561560
; CHECKIZFINX-LABEL: fmsub_s_fmul_fneg:
562561
; CHECKIZFINX: # %bb.0:
563-
; CHECKIZFINX-NEXT: fneg.s a3, a3
564562
; CHECKIZFINX-NEXT: fmul.s a2, a2, a3
565-
; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2
563+
; CHECKIZFINX-NEXT: fmsub.s a0, a0, a1, a2
566564
; CHECKIZFINX-NEXT: ret
567565
;
568566
; RV32I-LABEL: fmsub_s_fmul_fneg:
@@ -847,16 +845,14 @@ define float @fnmadd_s_fmul_fneg(float %a, float %b, float %c, float %d) nounwin
847845
;
848846
; CHECKIF-LABEL: fnmadd_s_fmul_fneg:
849847
; CHECKIF: # %bb.0:
850-
; CHECKIF-NEXT: fneg.s fa5, fa0
851-
; CHECKIF-NEXT: fmul.s fa5, fa1, fa5
852-
; CHECKIF-NEXT: fmadd.s fa0, fa2, fa3, fa5
848+
; CHECKIF-NEXT: fmul.s fa5, fa1, fa0
849+
; CHECKIF-NEXT: fmsub.s fa0, fa2, fa3, fa5
853850
; CHECKIF-NEXT: ret
854851
;
855852
; CHECKIZFINX-LABEL: fnmadd_s_fmul_fneg:
856853
; CHECKIZFINX: # %bb.0:
857-
; CHECKIZFINX-NEXT: fneg.s a0, a0
858854
; CHECKIZFINX-NEXT: fmul.s a0, a1, a0
859-
; CHECKIZFINX-NEXT: fmadd.s a0, a2, a3, a0
855+
; CHECKIZFINX-NEXT: fmsub.s a0, a2, a3, a0
860856
; CHECKIZFINX-NEXT: ret
861857
;
862858
; RV32I-LABEL: fnmadd_s_fmul_fneg:

0 commit comments

Comments
 (0)