Skip to content

Commit d15c454

Browse files
committed
[FPEnv][AMDGPU] Correct strictfp tests.
Correct AMDGPU strictfp tests to follow the rules documented in the LangRef: https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics These tests needed the strictfp attribute added to function calls and some declarations. Some of the tests now pass with D146845, others get farther along and fail with D146845. The tests revealed that further work is required in mostly AMDGPU atomics to get the tests passing. Since I was here anyway I removed the strictfp attribute from some constrained intrinsic declarations. They have this attribute by default. Test changes verified with D146845.
1 parent 1af0536 commit d15c454

File tree

6 files changed

+32
-30
lines changed

6 files changed

+32
-30
lines changed

llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1
943943
ret <2 x half> %mul
944944
}
945945

946-
define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
946+
define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #3 {
947947
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee:
948948
; CHECK: ; %bb.0:
949949
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -955,7 +955,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1)
955955
ret float %mul
956956
}
957957

958-
define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
958+
define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #4 {
959959
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz:
960960
; CHECK: ; %bb.0:
961961
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -967,7 +967,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1)
967967
ret float %mul
968968
}
969969

970-
define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
970+
define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #5 {
971971
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic:
972972
; CHECK: ; %bb.0:
973973
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1316,3 +1316,6 @@ declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
13161316
attributes #0 = { "denormal-fp-math"="ieee,ieee" }
13171317
attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
13181318
attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }
1319+
attributes #3 = { "denormal-fp-math"="ieee,ieee" strictfp }
1320+
attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" strictfp }
1321+
attributes #5 = { "denormal-fp-math"="dynamic,dynamic" strictfp }

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2276,7 +2276,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
22762276
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB3_2
22772277
; GFX1132-DPP-NEXT: .LBB3_3:
22782278
; GFX1132-DPP-NEXT: s_endpgm
2279-
%divValue = call float @div.float.value()
2279+
%divValue = call float @div.float.value() strictfp
22802280
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
22812281
ret void
22822282
}
@@ -4174,7 +4174,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
41744174
; GFX1132-DPP-NEXT: global_atomic_add_f32 v4, v0, s[0:1]
41754175
; GFX1132-DPP-NEXT: .LBB6_2:
41764176
; GFX1132-DPP-NEXT: s_endpgm
4177-
%divValue = call float @div.float.value()
4177+
%divValue = call float @div.float.value() strictfp
41784178
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
41794179
ret void
41804180
}
@@ -5403,7 +5403,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scop
54035403
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2
54045404
; GFX1132-DPP-NEXT: .LBB8_3:
54055405
; GFX1132-DPP-NEXT: s_endpgm
5406-
%divValue = call float @div.float.value()
5406+
%divValue = call float @div.float.value() strictfp
54075407
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue monotonic, align 4
54085408
ret void
54095409
}

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,7 +2380,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
23802380
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB3_2
23812381
; GFX1132-DPP-NEXT: .LBB3_3:
23822382
; GFX1132-DPP-NEXT: s_endpgm
2383-
%divValue = call float @div.float.value()
2383+
%divValue = call float @div.float.value() strictfp
23842384
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
23852385
ret void
23862386
}
@@ -4382,7 +4382,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
43824382
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
43834383
; GFX1132-DPP-NEXT: .LBB6_3:
43844384
; GFX1132-DPP-NEXT: s_endpgm
4385-
%divValue = call float @div.float.value()
4385+
%divValue = call float @div.float.value() strictfp
43864386
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
43874387
ret void
43884388
}
@@ -5611,7 +5611,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scop
56115611
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2
56125612
; GFX1132-DPP-NEXT: .LBB8_3:
56135613
; GFX1132-DPP-NEXT: s_endpgm
5614-
%divValue = call float @div.float.value()
5614+
%divValue = call float @div.float.value() strictfp
56155615
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue monotonic, align 4
56165616
ret void
56175617
}

llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ define i32 @strictfp_func_fpmode_i32() strictfp {
7676
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
7777
; GFX11-NEXT: v_mov_b32_e32 v0, s0
7878
; GFX11-NEXT: s_setpc_b64 s[30:31]
79-
%fpmode = call i32 @llvm.get.fpmode.i32()
79+
%fpmode = call i32 @llvm.get.fpmode.i32() strictfp
8080
ret i32 %fpmode
8181
}
8282

llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -374,24 +374,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
374374
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375375
; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, |v0|
376376
; GFX1011-NEXT: s_setpc_b64 s[30:31]
377-
%abs.arg = call float @llvm.fabs.f32(float %arg)
377+
%abs.arg = call float @llvm.fabs.f32(float %arg) #0
378378
%result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
379379
ret void
380380
}
381381

382-
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #1
383-
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #1
384-
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #1
382+
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
383+
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
384+
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata)
385385

386-
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1
387-
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) #1
388-
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) #1
386+
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
387+
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
388+
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
389389

390-
declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) #1
391-
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) #1
392-
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata) #1
390+
declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
391+
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
392+
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata)
393393

394-
declare float @llvm.fabs.f32(float) #1
394+
declare float @llvm.fabs.f32(float)
395395

396396
attributes #0 = { strictfp }
397-
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }

llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
33

4-
declare void @f16_user(half)
5-
declare half @f16_result()
4+
declare void @f16_user(half) #0
5+
declare half @f16_result() #0
66

7-
declare void @v2f16_user(<2 x half>)
8-
declare <2 x half> @v2f16_result()
7+
declare void @v2f16_user(<2 x half>) #0
8+
declare <2 x half> @v2f16_result() #0
99

10-
declare void @v4f16_user(<4 x half>)
11-
declare <4 x half> @v4f16_result()
10+
declare void @v4f16_user(<4 x half>) #0
11+
declare <4 x half> @v4f16_result() #0
1212

13-
declare void @v8f16_user(<8 x half>)
14-
declare <8 x half> @v8f16_result()
13+
declare void @v8f16_user(<8 x half>) #0
14+
declare <8 x half> @v8f16_result() #0
1515

1616
define void @f16_arg(half %arg, ptr %ptr) #0 {
1717
; GFX7-LABEL: f16_arg:

0 commit comments

Comments
 (0)