Skip to content

Commit f8f0c85

Browse files
committed
[AMDGPU] Update strict floating point tests to be more comprehensive
1 parent 4822f49 commit f8f0c85

File tree

9 files changed

+2118
-137
lines changed

9 files changed

+2118
-137
lines changed

llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll

Lines changed: 684 additions & 108 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll

Lines changed: 577 additions & 17 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
4-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
4+
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7+
8+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
9+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
510

611
define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
712
; GCN-LABEL: v_constained_fadd_f64_fpexcept_strict:
@@ -15,6 +20,12 @@ define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
1520
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1621
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
1722
; GFX10-NEXT: s_setpc_b64 s[30:31]
23+
;
24+
; GFX11-LABEL: v_constained_fadd_f64_fpexcept_strict:
25+
; GFX11: ; %bb.0:
26+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27+
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
28+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1829
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
1930
ret double %val
2031
}
@@ -31,6 +42,12 @@ define double @v_constained_fadd_f64_fpexcept_ignore(double %x, double %y) #0 {
3142
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3243
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
3344
; GFX10-NEXT: s_setpc_b64 s[30:31]
45+
;
46+
; GFX11-LABEL: v_constained_fadd_f64_fpexcept_ignore:
47+
; GFX11: ; %bb.0:
48+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49+
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
50+
; GFX11-NEXT: s_setpc_b64 s[30:31]
3451
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
3552
ret double %val
3653
}
@@ -47,6 +64,12 @@ define double @v_constained_fadd_f64_fpexcept_maytrap(double %x, double %y) #0 {
4764
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4865
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
4966
; GFX10-NEXT: s_setpc_b64 s[30:31]
67+
;
68+
; GFX11-LABEL: v_constained_fadd_f64_fpexcept_maytrap:
69+
; GFX11: ; %bb.0:
70+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71+
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
72+
; GFX11-NEXT: s_setpc_b64 s[30:31]
5073
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
5174
ret double %val
5275
}
@@ -65,6 +88,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_strict(<2 x double> %x, <2
6588
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
6689
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
6790
; GFX10-NEXT: s_setpc_b64 s[30:31]
91+
;
92+
; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_strict:
93+
; GFX11: ; %bb.0:
94+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95+
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
96+
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
97+
; GFX11-NEXT: s_setpc_b64 s[30:31]
6898
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
6999
ret <2 x double> %val
70100
}
@@ -83,6 +113,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_ignore(<2 x double> %x, <2
83113
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
84114
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
85115
; GFX10-NEXT: s_setpc_b64 s[30:31]
116+
;
117+
; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_ignore:
118+
; GFX11: ; %bb.0:
119+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120+
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
121+
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
122+
; GFX11-NEXT: s_setpc_b64 s[30:31]
86123
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
87124
ret <2 x double> %val
88125
}
@@ -101,6 +138,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_maytrap(<2 x double> %x, <
101138
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
102139
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
103140
; GFX10-NEXT: s_setpc_b64 s[30:31]
141+
;
142+
; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap:
143+
; GFX11: ; %bb.0:
144+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145+
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
146+
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
147+
; GFX11-NEXT: s_setpc_b64 s[30:31]
104148
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
105149
ret <2 x double> %val
106150
}
@@ -121,6 +165,14 @@ define <3 x double> @v_constained_fadd_v3f64_fpexcept_strict(<3 x double> %x, <3
121165
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
122166
; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
123167
; GFX10-NEXT: s_setpc_b64 s[30:31]
168+
;
169+
; GFX11-LABEL: v_constained_fadd_v3f64_fpexcept_strict:
170+
; GFX11: ; %bb.0:
171+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172+
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[6:7]
173+
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
174+
; GFX11-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
175+
; GFX11-NEXT: s_setpc_b64 s[30:31]
124176
%val = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double> %x, <3 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
125177
ret <3 x double> %val
126178
}
@@ -137,6 +189,11 @@ define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg
137189
; GFX10: ; %bb.0:
138190
; GFX10-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
139191
; GFX10-NEXT: ; return to shader part epilog
192+
;
193+
; GFX11-LABEL: s_constained_fadd_f64_fpexcept_strict:
194+
; GFX11: ; %bb.0:
195+
; GFX11-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
196+
; GFX11-NEXT: ; return to shader part epilog
140197
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
141198
%cast = bitcast double %val to <2 x float>
142199
ret <2 x float> %cast

llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
5-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
6-
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
4+
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
6+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
7+
8+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-SDAG %s
9+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-GISEL %s
10+
11+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-SDAG-TRUE16 %s
12+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-SDAG-FAKE16 %s
13+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16 %s
14+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16 %s
15+
16+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
17+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
718

819
define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0 {
920
; GCN-LABEL: v_constained_fma_f16_fpexcept_strict:

0 commit comments

Comments
 (0)