1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
23; RUN: %if ptxas-11.8 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
34
@@ -6,36 +7,48 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
67declare <2 x bfloat> @llvm.sin.f16 (<2 x bfloat> %a ) #0
78declare <2 x bfloat> @llvm.cos.f16 (<2 x bfloat> %a ) #0
89
9- ; CHECK-LABEL: test_sin(
10- ; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sin_param_0];
11- ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
12- ; CHECK-DAG: cvt.f32.bf16 [[AF0:%f[0-9]+]], [[A0]];
13- ; CHECK-DAG: cvt.f32.bf16 [[AF1:%f[0-9]+]], [[A1]];
14- ; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
15- ; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
16- ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]];
17- ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]];
18- ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
19- ; CHECK: st.param.b32 [func_retval0], [[R]];
20- ; CHECK: ret;
2110define <2 x bfloat> @test_sin (<2 x bfloat> %a ) #0 #1 {
11+ ; CHECK-LABEL: test_sin(
12+ ; CHECK: {
13+ ; CHECK-NEXT: .reg .b16 %rs<5>;
14+ ; CHECK-NEXT: .reg .b32 %r<3>;
15+ ; CHECK-NEXT: .reg .f32 %f<5>;
16+ ; CHECK-EMPTY:
17+ ; CHECK-NEXT: // %bb.0:
18+ ; CHECK-NEXT: ld.param.b32 %r1, [test_sin_param_0];
19+ ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
20+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2;
21+ ; CHECK-NEXT: sin.approx.f32 %f2, %f1;
22+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
23+ ; CHECK-NEXT: cvt.f32.bf16 %f3, %rs1;
24+ ; CHECK-NEXT: sin.approx.f32 %f4, %f3;
25+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
26+ ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
27+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
28+ ; CHECK-NEXT: ret;
2229 %r = call <2 x bfloat> @llvm.sin.f16 (<2 x bfloat> %a )
2330 ret <2 x bfloat> %r
2431}
2532
26- ; CHECK-LABEL: test_cos(
27- ; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_cos_param_0];
28- ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
29- ; CHECK-DAG: cvt.f32.bf16 [[AF0:%f[0-9]+]], [[A0]];
30- ; CHECK-DAG: cvt.f32.bf16 [[AF1:%f[0-9]+]], [[A1]];
31- ; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
32- ; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
33- ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]];
34- ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]];
35- ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
36- ; CHECK: st.param.b32 [func_retval0], [[R]];
37- ; CHECK: ret;
3833define <2 x bfloat> @test_cos (<2 x bfloat> %a ) #0 #1 {
34+ ; CHECK-LABEL: test_cos(
35+ ; CHECK: {
36+ ; CHECK-NEXT: .reg .b16 %rs<5>;
37+ ; CHECK-NEXT: .reg .b32 %r<3>;
38+ ; CHECK-NEXT: .reg .f32 %f<5>;
39+ ; CHECK-EMPTY:
40+ ; CHECK-NEXT: // %bb.0:
41+ ; CHECK-NEXT: ld.param.b32 %r1, [test_cos_param_0];
42+ ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
43+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2;
44+ ; CHECK-NEXT: cos.approx.f32 %f2, %f1;
45+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
46+ ; CHECK-NEXT: cvt.f32.bf16 %f3, %rs1;
47+ ; CHECK-NEXT: cos.approx.f32 %f4, %f3;
48+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
49+ ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
50+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
51+ ; CHECK-NEXT: ret;
3952 %r = call <2 x bfloat> @llvm.cos.f16 (<2 x bfloat> %a )
4053 ret <2 x bfloat> %r
4154}
0 commit comments