Skip to content

Commit b230ed7

Browse files
[InstSimplify] Enable FAdd simplifications when user can ignore sign bit
When FAdd result is used by fabs, we can safely ignore the sign bit of fp zero. This patch enables an instruction simplification optimization that folds fadd x, 0 ==> x, which would otherwise not work as the compiler cannot prove that the zero isn't -0. But if the result of the fadd is used by fabs we can simply ignore this and still do the optimization. Fixes #154238
1 parent ce5a115 commit b230ed7

File tree

3 files changed

+17
-4
lines changed

3 files changed

+17
-4
lines changed

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5710,7 +5710,9 @@ simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
57105710
// fadd X, 0 ==> X, when we know X is not -0
57115711
if (canIgnoreSNaN(ExBehavior, FMF))
57125712
if (match(Op1, m_PosZeroFP()) &&
5713-
(FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q)))
5713+
(FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q) ||
5714+
(Q.CxtI && !Q.CxtI->use_empty() &&
5715+
canIgnoreSignBitOfZero(*(Q.CxtI->use_begin())))))
57145716
return Op0;
57155717

57165718
if (!isDefaultFPEnvironment(ExBehavior, Rounding))

llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,9 +363,8 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr add
363363
}
364364

365365
; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
366-
; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
367-
; GCN-NOT: v_mul
368-
; GCN-NOT: v_max
366+
; VI: v_mul_f32_e64 [[V:v[0-9]+]], 1.0, |[[V]]|
367+
; GFX9: v_max_f32_e64 [[V:v[0-9]+]], |[[V]]|, |[[V]]|
369368
; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
370369
define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
371370
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
3+
define float @src(float %arg1) {
4+
; CHECK-LABEL: define float @src(
5+
; CHECK-SAME: float [[ARG1:%.*]]) {
6+
; CHECK-NEXT: [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]])
7+
; CHECK-NEXT: ret float [[V3]]
8+
;
9+
%v2 = fadd float %arg1, 0.000000e+00
10+
%v3 = call float @llvm.fabs.f32(float %v2)
11+
ret float %v3
12+
}

0 commit comments

Comments
 (0)