Skip to content

Commit 1e1302b

Browse files
committed
Fix mul for negative zero
1 parent d9d67c4 commit 1e1302b

File tree

3 files changed

+7
-5
lines changed

3 files changed

+7
-5
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2548,11 +2548,13 @@ SDValue NVPTXTargetLowering::LowerFMUL(SDValue Op, SelectionDAG &DAG) const {
25482548
return PromoteBinOpToF32(Op.getNode(), DAG);
25492549
}
25502550

2551-
// FMUL(a, b) -> FMA(a, b, 0.0)
2551+
// FMUL(a, b) -> FMA(a, b, -0.0)
2552+
// NOTE: The identity is -0, not 0, because -0 + 0 == 0 for floats
25522553
SDLoc DL(Op);
25532554
auto VT = Op.getValueType();
2554-
auto Zero = DAG.getConstantFP(0.0, DL, VT);
2555-
SmallVector<SDValue, 3> Operands{Op->getOperand(0), Op->getOperand(1), Zero};
2555+
auto NegZero = DAG.getConstantFP(-0.0, DL, VT);
2556+
SmallVector<SDValue, 3> Operands{Op->getOperand(0), Op->getOperand(1),
2557+
NegZero};
25562558
return DAG.getNode(ISD::FMA, DL, VT, Operands);
25572559
}
25582560

llvm/test/CodeGen/NVPTX/bf16-instructions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
385385
; SM80-NEXT: // %bb.0:
386386
; SM80-NEXT: ld.param.b32 %r1, [test_fmulx2_param_1];
387387
; SM80-NEXT: ld.param.b32 %r2, [test_fmulx2_param_0];
388-
; SM80-NEXT: mov.b32 %r3, 0;
388+
; SM80-NEXT: mov.b32 %r3, -2147450880;
389389
; SM80-NEXT: fma.rn.bf16x2 %r4, %r2, %r1, %r3;
390390
; SM80-NEXT: st.param.b32 [func_retval0], %r4;
391391
; SM80-NEXT: ret;

llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
107107
; SM80-NEXT: // %bb.0:
108108
; SM80-NEXT: ld.param.b32 %r1, [test_fmulx2_param_1];
109109
; SM80-NEXT: ld.param.b32 %r2, [test_fmulx2_param_0];
110-
; SM80-NEXT: mov.b32 %r3, 0;
110+
; SM80-NEXT: mov.b32 %r3, -2147450880;
111111
; SM80-NEXT: fma.rn.bf16x2 %r4, %r2, %r1, %r3;
112112
; SM80-NEXT: st.param.b32 [func_retval0], %r4;
113113
; SM80-NEXT: ret;

0 commit comments

Comments
 (0)