[FPEnv][AMDGPU] Correct strictfp tests.

kpneal · kpneal · commit d15c454bedc0 · 2024-02-05T09:29:31.000-05:00
Correct AMDGPU strictfp tests to follow the rules documented in the LangRef: https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics These tests needed the strictfp attribute added to function calls and some declarations. Some of the tests now pass with D146845, others get farther along and fail with D146845. The tests revealed that further work is required in mostly AMDGPU atomics to get the tests passing. Since I was here anyway I removed the strictfp attribute from some constrained intrinsic declarations. They have this attribute by default. Test changes verified with D146845.
diff --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
@@ -943,7 +943,7 @@ define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1
   ret <2 x half> %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #3 {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -955,7 +955,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1)
   ret float %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #4 {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -967,7 +967,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1)
   ret float %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #5 {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1316,3 +1316,6 @@ declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
 attributes #0 = { "denormal-fp-math"="ieee,ieee" }
 attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }
+attributes #3 = { "denormal-fp-math"="ieee,ieee" strictfp }
+attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" strictfp }
+attributes #5 = { "denormal-fp-math"="dynamic,dynamic" strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -2276,7 +2276,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB3_2
 ; GFX1132-DPP-NEXT:  .LBB3_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
   ret void
 }
@@ -4174,7 +4174,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    global_atomic_add_f32 v4, v0, s[0:1]
 ; GFX1132-DPP-NEXT:  .LBB6_2:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
   ret void
 }
@@ -5403,7 +5403,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scop
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB8_2
 ; GFX1132-DPP-NEXT:  .LBB8_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue monotonic, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -2380,7 +2380,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB3_2
 ; GFX1132-DPP-NEXT:  .LBB3_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
   ret void
 }
@@ -4382,7 +4382,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB6_2
 ; GFX1132-DPP-NEXT:  .LBB6_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
   ret void
 }
@@ -5611,7 +5611,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scop
 ; GFX1132-DPP-NEXT:    s_cbranch_execnz .LBB8_2
 ; GFX1132-DPP-NEXT:  .LBB8_3:
 ; GFX1132-DPP-NEXT:    s_endpgm
-  %divValue = call float @div.float.value()
+  %divValue = call float @div.float.value() strictfp
   %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue monotonic, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
@@ -76,7 +76,7 @@ define i32 @strictfp_func_fpmode_i32() strictfp {
 ; GFX11-NEXT:    s_and_b32 s0, 0x87f3ff, s0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %fpmode = call i32 @llvm.get.fpmode.i32()
+  %fpmode = call i32 @llvm.get.fpmode.i32() strictfp
   ret i32 %fpmode
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
@@ -374,24 +374,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
 ; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1011-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
 ; GFX1011-NEXT:    s_setpc_b64 s[30:31]
-  %abs.arg = call float @llvm.fabs.f32(float %arg)
+  %abs.arg = call float @llvm.fabs.f32(float %arg) #0
   %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret void
 }
 
-declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #1
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
+declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata)
 
-declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) #1
-declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) #1
+declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
 
-declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata) #1
+declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
+declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata)
 
-declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fabs.f32(float)
 
 attributes #0 = { strictfp }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
diff --git a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
@@ -1,17 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
 
-declare void @f16_user(half)
-declare half @f16_result()
+declare void @f16_user(half) #0
+declare half @f16_result() #0
 
-declare void @v2f16_user(<2 x half>)
-declare <2 x half> @v2f16_result()
+declare void @v2f16_user(<2 x half>) #0
+declare <2 x half> @v2f16_result() #0
 
-declare void @v4f16_user(<4 x half>)
-declare <4 x half> @v4f16_result()
+declare void @v4f16_user(<4 x half>) #0
+declare <4 x half> @v4f16_result() #0
 
-declare void @v8f16_user(<8 x half>)
-declare <8 x half> @v8f16_result()
+declare void @v8f16_user(<8 x half>) #0
+declare <8 x half> @v8f16_result() #0
 
 define void @f16_arg(half %arg, ptr %ptr) #0 {
 ; GFX7-LABEL: f16_arg:

Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ define i32 @strictfp_func_fpmode_i32() strictfp {`
`76`	`76`	`; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0`
`77`	`77`	`; GFX11-NEXT: v_mov_b32_e32 v0, s0`
`78`	`78`	`; GFX11-NEXT: s_setpc_b64 s[30:31]`
`79`		`- %fpmode = call i32 @llvm.get.fpmode.i32()`
	`79`	`+ %fpmode = call i32 @llvm.get.fpmode.i32() strictfp`
`80`	`80`	`ret i32 %fpmode`
`81`	`81`	`}`
`82`	`82`