Skip to content

Commit 92be640

Browse files
author
Cameron McInally
committed
[FPEnv][AMDGPU] Disable FSUB(-0,X)->FNEG(X) DAGCombine when subnormals are flushed
This patch disables the FSUB(-0,X)->FNEG(X) DAG combine when we're flushing subnormals. It requires updating the existing AMDGPU tests to use the fneg IR instruction, in place of the old fsub(-0,X) canonical form, since AMDGPU is the only backend currently checking the DenormalMode flags. Note that this will require follow-up optimizations to make sure the FSUB(-0,X) form is handled appropriately Differential Revision: https://reviews.llvm.org/D93243
1 parent de6d43f commit 92be640

16 files changed

+67
-64
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13367,18 +13367,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
1336713367
}
1336813368

1336913369
// (fsub -0.0, N1) -> -N1
13370-
// NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
13371-
// FSUB does not specify the sign bit of a NaN. Also note that for
13372-
// the same reason, the inverse transform is not safe, unless fast math
13373-
// flags are in play.
1337413370
if (N0CFP && N0CFP->isZero()) {
1337513371
if (N0CFP->isNegative() ||
1337613372
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13377-
if (SDValue NegN1 =
13378-
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13379-
return NegN1;
13380-
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13381-
return DAG.getNode(ISD::FNEG, DL, VT, N1);
13373+
// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13374+
// flushed to zero, unless all users treat denorms as zero (DAZ).
13375+
// FIXME: This transform will change the sign of a NaN and the behavior
13376+
// of a signaling NaN. It is only valid when a NoNaN flag is present.
13377+
DenormalMode DenormMode = DAG.getDenormalMode(VT);
13378+
if (DenormMode == DenormalMode::getIEEE()) {
13379+
if (SDValue NegN1 =
13380+
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13381+
return NegN1;
13382+
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13383+
return DAG.getNode(ISD::FNEG, DL, VT, N1);
13384+
}
1338213385
}
1338313386
}
1338413387

llvm/test/CodeGen/AMDGPU/clamp-modifier.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ define amdgpu_kernel void @v_clamp_add_neg_src_f32(float addrspace(1)* %out, flo
6262
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
6363
%a = load float, float addrspace(1)* %gep0
6464
%floor = call float @llvm.floor.f32(float %a)
65-
%neg.floor = fsub float -0.0, %floor
65+
%neg.floor = fneg float %floor
6666
%max = call float @llvm.maxnum.f32(float %neg.floor, float 0.0)
6767
%clamp = call float @llvm.minnum.f32(float %max, float 1.0)
6868
store float %clamp, float addrspace(1)* %out.gep

llvm/test/CodeGen/AMDGPU/clamp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrs
2525
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
2626
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
2727
%a = load float, float addrspace(1)* %gep0
28-
%fneg.a = fsub float -0.0, %a
28+
%fneg.a = fneg float %a
2929
%max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
3030
%med = call float @llvm.minnum.f32(float %max, float 1.0)
3131

@@ -42,7 +42,7 @@ define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float ad
4242
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
4343
%a = load float, float addrspace(1)* %gep0
4444
%fabs.a = call float @llvm.fabs.f32(float %a)
45-
%fneg.fabs.a = fsub float -0.0, %fabs.a
45+
%fneg.fabs.a = fneg float %fabs.a
4646

4747
%max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
4848
%med = call float @llvm.minnum.f32(float %max, float 1.0)

llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ define amdgpu_kernel void @div_minus_1_by_x_25ulp(float addrspace(1)* %arg) {
5656
; GCN: global_store_dword v{{[0-9]+}}, [[OUT]], s{{\[[0-9]+:[0-9]+\]}}
5757
define amdgpu_kernel void @div_1_by_minus_x_25ulp(float addrspace(1)* %arg) {
5858
%load = load float, float addrspace(1)* %arg, align 4
59-
%neg = fsub float -0.000000e+00, %load
59+
%neg = fneg float %load
6060
%div = fdiv float 1.000000e+00, %neg, !fpmath !0
6161
store float %div, float addrspace(1)* %arg, align 4
6262
ret void
@@ -188,7 +188,7 @@ define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %
188188
; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
189189
define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
190190
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
191-
%neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
191+
%neg = fneg <4 x float> %load
192192
%div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %neg, !fpmath !0
193193
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
194194
ret void
@@ -226,7 +226,7 @@ define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %
226226
; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
227227
define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
228228
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
229-
%neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
229+
%neg = fneg <4 x float> %load
230230
%div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %neg, !fpmath !0
231231
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
232232
ret void
@@ -372,7 +372,7 @@ define amdgpu_kernel void @div_minus_1_by_x_fast(float addrspace(1)* %arg) {
372372
; GCN: global_store_dword v{{[0-9]+}}, [[RCP]], s{{\[[0-9]+:[0-9]+\]}}
373373
define amdgpu_kernel void @div_1_by_minus_x_fast(float addrspace(1)* %arg) {
374374
%load = load float, float addrspace(1)* %arg, align 4
375-
%neg = fsub float -0.000000e+00, %load, !fpmath !0
375+
%neg = fneg float %load, !fpmath !0
376376
%div = fdiv fast float 1.000000e+00, %neg
377377
store float %div, float addrspace(1)* %arg, align 4
378378
ret void

llvm/test/CodeGen/AMDGPU/fma-combine.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ define amdgpu_kernel void @fma_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, flo
647647
%r1 = load volatile float, float addrspace(1)* %gep.0
648648
%r2 = load volatile float, float addrspace(1)* %gep.1
649649

650-
%r1.fneg = fsub float -0.000000e+00, %r1
650+
%r1.fneg = fneg float %r1
651651

652652
%r3 = tail call float @llvm.fma.f32(float -2.0, float %r1.fneg, float %r2)
653653
store float %r3, float addrspace(1)* %gep.out
@@ -669,7 +669,7 @@ define amdgpu_kernel void @fma_2.0_neg_a_b_f32(float addrspace(1)* %out, float a
669669
%r1 = load volatile float, float addrspace(1)* %gep.0
670670
%r2 = load volatile float, float addrspace(1)* %gep.1
671671

672-
%r1.fneg = fsub float -0.000000e+00, %r1
672+
%r1.fneg = fneg float %r1
673673

674674
%r3 = tail call float @llvm.fma.f32(float 2.0, float %r1.fneg, float %r2)
675675
store float %r3, float addrspace(1)* %gep.out

llvm/test/CodeGen/AMDGPU/fneg-combines.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out,
482482
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
483483
%a = load volatile float, float addrspace(1)* %a.gep
484484
%min = call float @llvm.minnum.f32(float %a, float %a)
485-
%min.fneg = fsub float -0.0, %min
485+
%min.fneg = fneg float %min
486486
store float %min.fneg, float addrspace(1)* %out.gep
487487
ret void
488488
}
@@ -493,7 +493,7 @@ define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out,
493493
; GCN-NEXT: ; return
494494
define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
495495
%min = call float @llvm.minnum.f32(float %a, float %a)
496-
%min.fneg = fsub float -0.0, %min
496+
%min.fneg = fneg float %min
497497
ret float %min.fneg
498498
}
499499

@@ -887,7 +887,7 @@ define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out,
887887
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
888888
%a = load volatile float, float addrspace(1)* %a.gep
889889
%max = call float @llvm.maxnum.f32(float %a, float %a)
890-
%max.fneg = fsub float -0.0, %max
890+
%max.fneg = fneg float %max
891891
store float %max.fneg, float addrspace(1)* %out.gep
892892
ret void
893893
}
@@ -898,7 +898,7 @@ define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out,
898898
; GCN-NEXT: ; return
899899
define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
900900
%max = call float @llvm.maxnum.f32(float %a, float %a)
901-
%max.fneg = fsub float -0.0, %max
901+
%max.fneg = fneg float %max
902902
ret float %max.fneg
903903
}
904904

@@ -2039,7 +2039,7 @@ define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float
20392039
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
20402040
%a = load volatile float, float addrspace(1)* %a.gep
20412041
%sin = call float @llvm.amdgcn.sin.f32(float %a)
2042-
%fneg = fsub float -0.0, %sin
2042+
%fneg = fneg float %sin
20432043
store float %fneg, float addrspace(1)* %out.gep
20442044
ret void
20452045
}
@@ -2059,7 +2059,7 @@ define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addr
20592059
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
20602060
%a = load volatile float, float addrspace(1)* %a.gep
20612061
%trunc = call float @llvm.trunc.f32(float %a)
2062-
%fneg = fsub float -0.0, %trunc
2062+
%fneg = fneg float %trunc
20632063
store float %fneg, float addrspace(1)* %out.gep
20642064
ret void
20652065
}
@@ -2086,7 +2086,7 @@ define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addr
20862086
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
20872087
%a = load volatile float, float addrspace(1)* %a.gep
20882088
%round = call float @llvm.round.f32(float %a)
2089-
%fneg = fsub float -0.0, %round
2089+
%fneg = fneg float %round
20902090
store float %fneg, float addrspace(1)* %out.gep
20912091
ret void
20922092
}
@@ -2106,7 +2106,7 @@ define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrs
21062106
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
21072107
%a = load volatile float, float addrspace(1)* %a.gep
21082108
%rint = call float @llvm.rint.f32(float %a)
2109-
%fneg = fsub float -0.0, %rint
2109+
%fneg = fneg float %rint
21102110
store float %fneg, float addrspace(1)* %out.gep
21112111
ret void
21122112
}
@@ -2126,7 +2126,7 @@ define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float
21262126
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
21272127
%a = load volatile float, float addrspace(1)* %a.gep
21282128
%nearbyint = call float @llvm.nearbyint.f32(float %a)
2129-
%fneg = fsub float -0.0, %nearbyint
2129+
%fneg = fneg float %nearbyint
21302130
store float %fneg, float addrspace(1)* %out.gep
21312131
ret void
21322132
}
@@ -2146,7 +2146,7 @@ define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, flo
21462146
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
21472147
%a = load volatile float, float addrspace(1)* %a.gep
21482148
%trunc = call float @llvm.canonicalize.f32(float %a)
2149-
%fneg = fsub float -0.0, %trunc
2149+
%fneg = fneg float %trunc
21502150
store float %fneg, float addrspace(1)* %out.gep
21512151
ret void
21522152
}
@@ -2170,7 +2170,7 @@ define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float
21702170
%a = load volatile float, float addrspace(1)* %a.gep
21712171
%b = load volatile float, float addrspace(1)* %b.gep
21722172
%mul = fmul float %a, %b
2173-
%fneg = fsub float -0.0, %mul
2173+
%fneg = fneg float %mul
21742174
%intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
21752175
%intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
21762176
store volatile float %intrp0, float addrspace(1)* %out.gep
@@ -2193,7 +2193,7 @@ define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float
21932193
%a = load volatile float, float addrspace(1)* %a.gep
21942194
%b = load volatile float, float addrspace(1)* %b.gep
21952195
%mul = fmul float %a, %b
2196-
%fneg = fsub float -0.0, %mul
2196+
%fneg = fneg float %mul
21972197
%intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
21982198
%intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
21992199
store volatile float %intrp0, float addrspace(1)* %out.gep
@@ -2230,7 +2230,7 @@ define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float
22302230
%b = load volatile float, float addrspace(1)* %b.gep
22312231
%c = load volatile float, float addrspace(1)* %c.gep
22322232
%mul = fmul float %a, %b
2233-
%fneg = fsub float -0.0, %mul
2233+
%fneg = fneg float %mul
22342234
%cmp0 = icmp eq i32 %d, 0
22352235
br i1 %cmp0, label %if, label %endif
22362236

@@ -2266,7 +2266,7 @@ define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float
22662266
%b = load volatile float, float addrspace(1)* %b.gep
22672267
%c = load volatile float, float addrspace(1)* %c.gep
22682268
%mul = fmul float %a, %b
2269-
%fneg = fsub float -0.0, %mul
2269+
%fneg = fneg float %mul
22702270
call void asm sideeffect "; use $0", "v"(float %fneg) #0
22712271
store volatile float %fneg, float addrspace(1)* %out.gep
22722272
ret void
@@ -2295,7 +2295,7 @@ define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)
22952295
%b = load volatile float, float addrspace(1)* %b.gep
22962296
%c = load volatile float, float addrspace(1)* %c.gep
22972297
%mul = fmul float %a, %b
2298-
%fneg = fsub float -0.0, %mul
2298+
%fneg = fneg float %mul
22992299
call void asm sideeffect "; use $0", "v"(float %fneg) #0
23002300
store volatile float %mul, float addrspace(1)* %out.gep
23012301
ret void
@@ -2328,7 +2328,7 @@ define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %o
23282328
%b = load volatile float, float addrspace(1)* %b.gep
23292329
%c = load volatile float, float addrspace(1)* %c.gep
23302330

2331-
%fneg.a = fsub float -0.0, %a
2331+
%fneg.a = fneg float %a
23322332
%fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
23332333
%fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
23342334

@@ -2360,7 +2360,7 @@ define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %o
23602360
%b = load volatile float, float addrspace(1)* %b.gep
23612361
%c = load volatile float, float addrspace(1)* %c.gep
23622362

2363-
%fneg.a = fsub float -0.0, %a
2363+
%fneg.a = fneg float %a
23642364
%mul0 = fmul float %fneg.a, %b
23652365
%mul1 = fmul float %fneg.a, %c
23662366

@@ -2391,7 +2391,7 @@ define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)*
23912391
%b = load volatile float, float addrspace(1)* %b.gep
23922392
%c = load volatile float, float addrspace(1)* %c.gep
23932393

2394-
%fneg.a = fsub float -0.0, %a
2394+
%fneg.a = fneg float %a
23952395
%fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
23962396
%mul1 = fmul float %fneg.a, %c
23972397

@@ -2433,7 +2433,7 @@ define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(
24332433
%d = load volatile float, float addrspace(1)* %d.gep
24342434

24352435
%fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2436-
%fneg.fma0 = fsub float -0.0, %fma0
2436+
%fneg.fma0 = fneg float %fma0
24372437
%mul1 = fmul float %fneg.fma0, %c
24382438
%mul2 = fmul float %fneg.fma0, %d
24392439

@@ -2501,7 +2501,7 @@ define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)*
25012501
%d = load volatile float, float addrspace(1)* %d.gep
25022502

25032503
%trunc.a = call float @llvm.trunc.f32(float %a)
2504-
%trunc.fneg.a = fsub float -0.0, %trunc.a
2504+
%trunc.fneg.a = fneg float %trunc.a
25052505
%fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
25062506
store volatile float %fma0, float addrspace(1)* %out
25072507
ret void
@@ -2531,7 +2531,7 @@ define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %
25312531
%d = load volatile float, float addrspace(1)* %d.gep
25322532

25332533
%trunc.a = call float @llvm.trunc.f32(float %a)
2534-
%trunc.fneg.a = fsub float -0.0, %trunc.a
2534+
%trunc.fneg.a = fneg float %trunc.a
25352535
%fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
25362536
%mul1 = fmul float %trunc.a, %d
25372537
store volatile float %fma0, float addrspace(1)* %out

llvm/test/CodeGen/AMDGPU/fpext-free.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
288288
entry:
289289
%mul = fmul half %x, %y
290290
%mul.ext = fpext half %mul to float
291-
%neg.mul.ext = fsub float -0.0, %mul.ext
291+
%neg.mul.ext = fneg float %mul.ext
292292
%add = fsub float %neg.mul.ext, %z
293293
ret float %add
294294
}

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
9999
float addrspace(1)* %a) {
100100
entry:
101101
%a.val = load float, float addrspace(1)* %a
102-
%a.fneg = fsub float -0.0, %a.val
102+
%a.fneg = fneg float %a.val
103103
%r.val = fptrunc float %a.fneg to half
104104
store half %r.val, half addrspace(1)* %r
105105
ret void
@@ -132,7 +132,7 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
132132
entry:
133133
%a.val = load float, float addrspace(1)* %a
134134
%a.fabs = call float @llvm.fabs.f32(float %a.val)
135-
%a.fneg.fabs = fsub float -0.0, %a.fabs
135+
%a.fneg.fabs = fneg float %a.fabs
136136
%r.val = fptrunc float %a.fneg.fabs to half
137137
store half %r.val, half addrspace(1)* %r
138138
ret void

llvm/test/CodeGen/AMDGPU/known-never-snan.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ define float @v_test_known_not_snan_fneg_input_fmed3_r_i_i_f32(float %a) #0 {
2626
; GCN-NEXT: v_med3_f32 v0, -v0, 2.0, 4.0
2727
; GCN-NEXT: s_setpc_b64 s[30:31]
2828
%a.nnan.add = fdiv nnan float 1.0, %a, !fpmath !0
29-
%known.not.snan = fsub float -0.0, %a.nnan.add
29+
%known.not.snan = fneg float %a.nnan.add
3030
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
3131
%med = call float @llvm.minnum.f32(float %max, float 4.0)
3232
ret float %med

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ define amdgpu_kernel void @mad_f32_neg_b(
7474
%a.val = load float, float addrspace(1)* %a
7575
%b.val = load float, float addrspace(1)* %b
7676
%c.val = load float, float addrspace(1)* %c
77-
%neg.b = fsub float -0.0, %b.val
77+
%neg.b = fneg float %b.val
7878
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.b, float %c.val)
7979
store float %r.val, float addrspace(1)* %r
8080
ret void
@@ -107,7 +107,7 @@ define amdgpu_kernel void @mad_f32_neg_abs_b(
107107
%b.val = load float, float addrspace(1)* %b
108108
%c.val = load float, float addrspace(1)* %c
109109
%abs.b = call float @llvm.fabs.f32(float %b.val)
110-
%neg.abs.b = fsub float -0.0, %abs.b
110+
%neg.abs.b = fneg float %abs.b
111111
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.abs.b, float %c.val)
112112
store float %r.val, float addrspace(1)* %r
113113
ret void

0 commit comments

Comments
 (0)