Skip to content

Commit a7f9a4d

Browse files
authored
[AMDGPU] Update strict floating point tests to be more comprehensive (#169578)
1 parent 4e7c65e commit a7f9a4d

File tree

9 files changed

+1768
-334
lines changed

9 files changed

+1768
-334
lines changed

llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll

Lines changed: 533 additions & 144 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll

Lines changed: 444 additions & 72 deletions
Large diffs are not rendered by default.
Lines changed: 28 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
4-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s
4+
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
6+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
7+
8+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
9+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
510

611
define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
712
; GCN-LABEL: v_constained_fadd_f64_fpexcept_strict:
813
; GCN: ; %bb.0:
914
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1015
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
1116
; GCN-NEXT: s_setpc_b64 s[30:31]
12-
;
13-
; GFX10-LABEL: v_constained_fadd_f64_fpexcept_strict:
14-
; GFX10: ; %bb.0:
15-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16-
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
17-
; GFX10-NEXT: s_setpc_b64 s[30:31]
1817
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
1918
ret double %val
2019
}
@@ -25,12 +24,6 @@ define double @v_constained_fadd_f64_fpexcept_ignore(double %x, double %y) #0 {
2524
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2625
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
2726
; GCN-NEXT: s_setpc_b64 s[30:31]
28-
;
29-
; GFX10-LABEL: v_constained_fadd_f64_fpexcept_ignore:
30-
; GFX10: ; %bb.0:
31-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32-
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
33-
; GFX10-NEXT: s_setpc_b64 s[30:31]
3427
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
3528
ret double %val
3629
}
@@ -41,12 +34,6 @@ define double @v_constained_fadd_f64_fpexcept_maytrap(double %x, double %y) #0 {
4134
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4235
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
4336
; GCN-NEXT: s_setpc_b64 s[30:31]
44-
;
45-
; GFX10-LABEL: v_constained_fadd_f64_fpexcept_maytrap:
46-
; GFX10: ; %bb.0:
47-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48-
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
49-
; GFX10-NEXT: s_setpc_b64 s[30:31]
5037
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
5138
ret double %val
5239
}
@@ -58,13 +45,6 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_strict(<2 x double> %x, <2
5845
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
5946
; GCN-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
6047
; GCN-NEXT: s_setpc_b64 s[30:31]
61-
;
62-
; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_strict:
63-
; GFX10: ; %bb.0:
64-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65-
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
66-
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
67-
; GFX10-NEXT: s_setpc_b64 s[30:31]
6848
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
6949
ret <2 x double> %val
7050
}
@@ -76,13 +56,6 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_ignore(<2 x double> %x, <2
7656
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
7757
; GCN-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
7858
; GCN-NEXT: s_setpc_b64 s[30:31]
79-
;
80-
; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_ignore:
81-
; GFX10: ; %bb.0:
82-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83-
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
84-
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
85-
; GFX10-NEXT: s_setpc_b64 s[30:31]
8659
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
8760
ret <2 x double> %val
8861
}
@@ -94,13 +67,6 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_maytrap(<2 x double> %x, <
9467
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
9568
; GCN-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
9669
; GCN-NEXT: s_setpc_b64 s[30:31]
97-
;
98-
; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap:
99-
; GFX10: ; %bb.0:
100-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101-
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
102-
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
103-
; GFX10-NEXT: s_setpc_b64 s[30:31]
10470
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
10571
ret <2 x double> %val
10672
}
@@ -113,30 +79,29 @@ define <3 x double> @v_constained_fadd_v3f64_fpexcept_strict(<3 x double> %x, <3
11379
; GCN-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
11480
; GCN-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
11581
; GCN-NEXT: s_setpc_b64 s[30:31]
116-
;
117-
; GFX10-LABEL: v_constained_fadd_v3f64_fpexcept_strict:
118-
; GFX10: ; %bb.0:
119-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120-
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[6:7]
121-
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
122-
; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
123-
; GFX10-NEXT: s_setpc_b64 s[30:31]
12482
%val = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double> %x, <3 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
12583
ret <3 x double> %val
12684
}
12785

12886
define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
129-
; GCN-LABEL: s_constained_fadd_f64_fpexcept_strict:
130-
; GCN: ; %bb.0:
131-
; GCN-NEXT: v_mov_b32_e32 v0, s4
132-
; GCN-NEXT: v_mov_b32_e32 v1, s5
133-
; GCN-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
134-
; GCN-NEXT: ; return to shader part epilog
87+
; GCN-SDAG-LABEL: s_constained_fadd_f64_fpexcept_strict:
88+
; GCN-SDAG: ; %bb.0:
89+
; GCN-SDAG-NEXT: v_mov_b32_e32 v0, s4
90+
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s5
91+
; GCN-SDAG-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
92+
; GCN-SDAG-NEXT: ; return to shader part epilog
93+
;
94+
; GCN-GISEL-LABEL: s_constained_fadd_f64_fpexcept_strict:
95+
; GCN-GISEL: ; %bb.0:
96+
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s4
97+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s5
98+
; GCN-GISEL-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
99+
; GCN-GISEL-NEXT: ; return to shader part epilog
135100
;
136-
; GFX10-LABEL: s_constained_fadd_f64_fpexcept_strict:
137-
; GFX10: ; %bb.0:
138-
; GFX10-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
139-
; GFX10-NEXT: ; return to shader part epilog
101+
; GFX10PLUS-LABEL: s_constained_fadd_f64_fpexcept_strict:
102+
; GFX10PLUS: ; %bb.0:
103+
; GFX10PLUS-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
104+
; GFX10PLUS-NEXT: ; return to shader part epilog
140105
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
141106
%cast = bitcast double %val to <2 x float>
142107
ret <2 x float> %cast
@@ -148,3 +113,6 @@ declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3
148113

149114
attributes #0 = { strictfp }
150115
attributes #1 = { inaccessiblememonly nounwind willreturn }
116+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
117+
; GFX10: {{.*}}
118+
; GFX11: {{.*}}

0 commit comments

Comments
 (0)