-
Notifications
You must be signed in to change notification settings - Fork 15k
[GlobalIsel] Remove NoNaNsFPMath uses #163484
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
ea79ea0 to
4492973
Compare
|
@llvm/pr-subscribers-backend-amdgpu Author: None (paperchalice) ChangesUsers should use Patch is 45.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163484.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 055fdc6ad7213..ca82857319abc 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- const TargetMachine& TM = DefMI->getMF()->getTarget();
- if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index 26b9d996fc284..8705647e36fe1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -206,7 +206,7 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
; global nnan function attribute always forces clamp combine
-define float @test_min_max_global_nnan(float %a) #3 {
+define float @test_min_max_global_nnan(float %a) {
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -223,11 +223,11 @@ define float @test_min_max_global_nnan(float %a) #3 {
; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%maxnum = call float @llvm.maxnum.f32(float %a, float 0.0)
- %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
+ %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 1.0)
ret float %fmed
}
-define float @test_max_min_global_nnan(float %a) #3 {
+define float @test_max_min_global_nnan(float %a) {
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -244,7 +244,7 @@ define float @test_max_min_global_nnan(float %a) #3 {
; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 1.0)
- %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 0.0)
ret float %fmed
}
@@ -414,5 +414,4 @@ declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
attributes #0 = {"amdgpu-ieee"="true"}
attributes #1 = {"amdgpu-ieee"="false"}
attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
-attributes #3 = {"no-nans-fp-math"="true"}
attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index d2c93e75cbed6..696a87b9d0b4d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -232,7 +232,7 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
; global nnan function attribute always forces fmed3 combine
-define float @test_min_max_global_nnan(float %a) #2 {
+define float @test_min_max_global_nnan(float %a) {
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -254,12 +254,12 @@ define float @test_min_max_global_nnan(float %a) #2 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
- %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
%fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
ret float %fmed
}
-define float @test_max_min_global_nnan(float %a) #2 {
+define float @test_max_min_global_nnan(float %a) {
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,8 +281,8 @@ define float @test_max_min_global_nnan(float %a) #2 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
- %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
- %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
+ %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
ret float %fmed
}
@@ -560,4 +560,3 @@ declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
attributes #0 = {"amdgpu-ieee"="true"}
attributes #1 = {"amdgpu-ieee"="false"}
-attributes #2 = {"no-nans-fp-math"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 9e152253bb6ca..3145a272ae48f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -10,7 +10,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -120,7 +120,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %o
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -231,7 +231,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, pt
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -342,7 +342,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -453,7 +453,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -569,7 +569,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrsp
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -740,7 +740,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1
ret void
}
-define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -955,14 +955,14 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out,
%outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
%a = load float, ptr addrspace(1) %gep0
- %max = call float @llvm.maxnum.f32(float %a, float 2.0)
- %med = call float @llvm.minnum.f32(float %max, float 4.0)
+ %max = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
+ %med = call nnan float @llvm.minnum.f32(float %max, float 4.0)
store float %med, ptr addrspace(1) %outgep
ret void
}
-define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -1297,10 +1297,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1487,10 +1487,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%b.fneg = fsub float -0.0, %b
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b.fneg)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b.fneg)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1677,10 +1677,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%c.fneg = fsub float -0.0, %c
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1872,14 +1872,14 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %b.fabs = call float @llvm.fabs.f32(float %b)
- %c.fabs = call float @llvm.fabs.f32(float %c)
+ %b.fabs = call nnan float @llvm.fabs.f32(float %b)
+ %c.fabs = call nnan float @llvm.fabs.f32(float %c)
%c.fabs.fneg = fsub float -0.0, %c.fabs
- %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
@@ -2082,16 +2082,16 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
%c.fabs = call float @llvm.fabs.f32(float %c)
%c.fabs.fneg = fsub float -0.0, %c.fabs
- %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
-define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2266,7 +2266,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, pt
ret void
}
-define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2418,7 +2418,7 @@ define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %ou
ret void
}
-define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2570,7 +2570,7 @@ define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr
ret void
}
-define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2878,10 +2878,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3030,10 +3030,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3220,10 +3220,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %tmp0 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
- %tmp1 = call float @llvm.minnum.f32(float %a.fneg, float %b)
- %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
+ %tmp1 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp2 = call nnan float @llvm.maxnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.minnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3372,10 +3372,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3524,10 +3524,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3676,10 +3676,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float...
[truncated]
|
|
@llvm/pr-subscribers-llvm-globalisel Author: None (paperchalice) ChangesUsers should use Patch is 45.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163484.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 055fdc6ad7213..ca82857319abc 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- const TargetMachine& TM = DefMI->getMF()->getTarget();
- if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index 26b9d996fc284..8705647e36fe1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -206,7 +206,7 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
; global nnan function attribute always forces clamp combine
-define float @test_min_max_global_nnan(float %a) #3 {
+define float @test_min_max_global_nnan(float %a) {
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -223,11 +223,11 @@ define float @test_min_max_global_nnan(float %a) #3 {
; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%maxnum = call float @llvm.maxnum.f32(float %a, float 0.0)
- %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
+ %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 1.0)
ret float %fmed
}
-define float @test_max_min_global_nnan(float %a) #3 {
+define float @test_max_min_global_nnan(float %a) {
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -244,7 +244,7 @@ define float @test_max_min_global_nnan(float %a) #3 {
; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 1.0)
- %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 0.0)
ret float %fmed
}
@@ -414,5 +414,4 @@ declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
attributes #0 = {"amdgpu-ieee"="true"}
attributes #1 = {"amdgpu-ieee"="false"}
attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
-attributes #3 = {"no-nans-fp-math"="true"}
attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index d2c93e75cbed6..696a87b9d0b4d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -232,7 +232,7 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
; global nnan function attribute always forces fmed3 combine
-define float @test_min_max_global_nnan(float %a) #2 {
+define float @test_min_max_global_nnan(float %a) {
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -254,12 +254,12 @@ define float @test_min_max_global_nnan(float %a) #2 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
- %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
%fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
ret float %fmed
}
-define float @test_max_min_global_nnan(float %a) #2 {
+define float @test_max_min_global_nnan(float %a) {
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,8 +281,8 @@ define float @test_max_min_global_nnan(float %a) #2 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
- %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
- %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
+ %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
ret float %fmed
}
@@ -560,4 +560,3 @@ declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
attributes #0 = {"amdgpu-ieee"="true"}
attributes #1 = {"amdgpu-ieee"="false"}
-attributes #2 = {"no-nans-fp-math"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 9e152253bb6ca..3145a272ae48f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -10,7 +10,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -120,7 +120,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %o
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -231,7 +231,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, pt
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -342,7 +342,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -453,7 +453,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1)
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -569,7 +569,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrsp
ret void
}
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -740,7 +740,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1
ret void
}
-define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -955,14 +955,14 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out,
%outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
%a = load float, ptr addrspace(1) %gep0
- %max = call float @llvm.maxnum.f32(float %a, float 2.0)
- %med = call float @llvm.minnum.f32(float %max, float 4.0)
+ %max = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
+ %med = call nnan float @llvm.minnum.f32(float %max, float 4.0)
store float %med, ptr addrspace(1) %outgep
ret void
}
-define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -1297,10 +1297,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1487,10 +1487,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%b.fneg = fsub float -0.0, %b
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b.fneg)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b.fneg)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1677,10 +1677,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%c.fneg = fsub float -0.0, %c
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -1872,14 +1872,14 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %b.fabs = call float @llvm.fabs.f32(float %b)
- %c.fabs = call float @llvm.fabs.f32(float %c)
+ %b.fabs = call nnan float @llvm.fabs.f32(float %b)
+ %c.fabs = call nnan float @llvm.fabs.f32(float %c)
%c.fabs.fneg = fsub float -0.0, %c.fabs
- %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
@@ -2082,16 +2082,16 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
%c.fabs = call float @llvm.fabs.f32(float %c)
%c.fabs.fneg = fsub float -0.0, %c.fabs
- %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
- %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
-define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2266,7 +2266,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, pt
ret void
}
-define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2418,7 +2418,7 @@ define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %ou
ret void
}
-define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2570,7 +2570,7 @@ define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr
ret void
}
-define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2878,10 +2878,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3030,10 +3030,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3220,10 +3220,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
%a.fneg = fsub float -0.0, %a
- %tmp0 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
- %tmp1 = call float @llvm.minnum.f32(float %a.fneg, float %b)
- %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
- %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
+ %tmp1 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp2 = call nnan float @llvm.maxnum.f32(float %tmp1, float %c)
+ %med3 = call nnan float @llvm.minnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3372,10 +3372,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3524,10 +3524,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
- %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
- %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
- %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, ptr addrspace(1) %outgep
ret void
}
@@ -3676,10 +3676,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %o
%a = load volatile float, ptr addrspace(1) %gep0
%b = load volatile float, ptr addrspace(1) %gep1
%c = load volatile float, ptr addrspace(1) %gep2
- %tmp0 = call float...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/134/builds/28149 Here is the relevant piece of the build log for the reference |
Users should use
nnaninstead.This is the GlobalIsel part.