22; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN %s
33
44; TODO: Add global-isel when it can support bf16
5+
56define amdgpu_ps float @v_test_cvt_bf16_f32_v (bfloat %v ) {
67; GCN-LABEL: v_test_cvt_bf16_f32_v:
78; GCN: ; %bb.0:
@@ -10,6 +11,7 @@ define amdgpu_ps float @v_test_cvt_bf16_f32_v(bfloat %v) {
1011 %cvt = fpext bfloat %v to float
1112 ret float %cvt
1213}
14+
1315define amdgpu_ps float @v_test_cvt_bf16_f32_s (bfloat inreg %v ) {
1416; GCN-LABEL: v_test_cvt_bf16_f32_s:
1517; GCN: ; %bb.0:
@@ -19,6 +21,7 @@ define amdgpu_ps float @v_test_cvt_bf16_f32_s(bfloat inreg %v) {
1921 %cvt = fpext bfloat %v to float
2022 ret float %cvt
2123}
24+
2225define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_v (<2 x float > %src ) {
2326; GCN-LABEL: v_test_cvt_v2f32_v2bf16_v:
2427; GCN: ; %bb.0:
@@ -42,6 +45,7 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_v(<2 x float> %src) {
4245 %cast = bitcast <2 x bfloat> %res to float
4346 ret float %cast
4447}
48+
4549define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_s (<2 x float > inreg %src ) {
4650; GCN-LABEL: v_test_cvt_v2f32_v2bf16_s:
4751; GCN: ; %bb.0:
@@ -66,6 +70,7 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_s(<2 x float> inreg %src) {
6670 %cast = bitcast <2 x bfloat> %res to float
6771 ret float %cast
6872}
73+
6974define amdgpu_ps float @v_test_cvt_f32_bf16_v (float %src ) {
7075; GCN-LABEL: v_test_cvt_f32_bf16_v:
7176; GCN: ; %bb.0:
@@ -82,6 +87,7 @@ define amdgpu_ps float @v_test_cvt_f32_bf16_v(float %src) {
8287 %ext = fpext bfloat %trunc to float
8388 ret float %ext
8489}
90+
8591define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v (<2 x double > %src ) {
8692; GCN-LABEL: v_test_cvt_v2f64_v2bf16_v:
8793; GCN: ; %bb.0:
@@ -128,6 +134,7 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) {
128134 %cast = bitcast <2 x bfloat> %res to float
129135 ret float %cast
130136}
137+
131138define amdgpu_ps float @fptrunc_f32_f32_to_v2bf16 (float %a , float %b ) {
132139; GCN-LABEL: fptrunc_f32_f32_to_v2bf16:
133140; GCN: ; %bb.0: ; %entry
@@ -155,6 +162,7 @@ entry:
155162 %ret = bitcast <2 x bfloat> %v2.2 to float
156163 ret float %ret
157164}
165+
158166define amdgpu_ps float @fptrunc_f32_f32_to_v2bf16_mods (float %a , float %b ) {
159167; GCN-LABEL: fptrunc_f32_f32_to_v2bf16_mods:
160168; GCN: ; %bb.0: ; %entry
@@ -186,6 +194,7 @@ entry:
186194 %ret = bitcast <2 x bfloat> %v2.2 to float
187195 ret float %ret
188196}
197+
189198define amdgpu_ps void @fptrunc_f32_to_bf16 (float %a , ptr %out ) {
190199; GCN-LABEL: fptrunc_f32_to_bf16:
191200; GCN: ; %bb.0: ; %entry
@@ -205,6 +214,7 @@ entry:
205214 store bfloat %a.cvt , ptr %out
206215 ret void
207216}
217+
208218define amdgpu_ps void @fptrunc_f32_to_bf16_abs (float %a , ptr %out ) {
209219; GCN-LABEL: fptrunc_f32_to_bf16_abs:
210220; GCN: ; %bb.0: ; %entry
@@ -226,6 +236,7 @@ entry:
226236 store bfloat %a.cvt , ptr %out
227237 ret void
228238}
239+
229240define amdgpu_ps void @fptrunc_f32_to_bf16_neg (float %a , ptr %out ) {
230241; GCN-LABEL: fptrunc_f32_to_bf16_neg:
231242; GCN: ; %bb.0: ; %entry
@@ -247,6 +258,7 @@ entry:
247258 store bfloat %a.cvt , ptr %out
248259 ret void
249260}
261+
250262define amdgpu_ps void @fptrunc_f64_to_bf16 (double %a , ptr %out ) {
251263; GCN-LABEL: fptrunc_f64_to_bf16:
252264; GCN: ; %bb.0: ; %entry
@@ -276,6 +288,7 @@ entry:
276288 store bfloat %a.cvt , ptr %out
277289 ret void
278290}
291+
279292define amdgpu_ps void @fptrunc_f64_to_bf16_neg (double %a , ptr %out ) {
280293; GCN-LABEL: fptrunc_f64_to_bf16_neg:
281294; GCN: ; %bb.0: ; %entry
@@ -307,6 +320,7 @@ entry:
307320 store bfloat %a.cvt , ptr %out
308321 ret void
309322}
323+
310324define amdgpu_ps void @fptrunc_f64_to_bf16_abs (double %a , ptr %out ) {
311325; GCN-LABEL: fptrunc_f64_to_bf16_abs:
312326; GCN: ; %bb.0: ; %entry
@@ -341,4 +355,3 @@ entry:
341355
342356declare float @llvm.fabs.f32 (float )
343357declare double @llvm.fabs.f64 (double )
344-
0 commit comments