1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck %s
23; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
34
45
5- ; CHECK-LABEL: cvt_rn_bf16x2_f32
66define <2 x bfloat> @cvt_rn_bf16x2_f32 (float %f1 , float %f2 ) {
7-
8- ; CHECK: cvt.rn.bf16x2.f32
9- %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn (float %f1 , float %f2 );
10-
11- ret <2 x bfloat> %val
7+ ; CHECK-LABEL: cvt_rn_bf16x2_f32(
8+ ; CHECK: {
9+ ; CHECK-NEXT: .reg .b32 %r<2>;
10+ ; CHECK-NEXT: .reg .f32 %f<3>;
11+ ; CHECK-EMPTY:
12+ ; CHECK-NEXT: // %bb.0:
13+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_bf16x2_f32_param_0];
14+ ; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_bf16x2_f32_param_1];
15+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f1, %f2;
16+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
17+ ; CHECK-NEXT: ret;
18+ %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn (float %f1 , float %f2 )
19+ ret <2 x bfloat> %val
1220}
1321
14- ; CHECK-LABEL: cvt_rn_relu_bf16x2_f32
1522define <2 x bfloat> @cvt_rn_relu_bf16x2_f32 (float %f1 , float %f2 ) {
16-
17- ; CHECK: cvt.rn.relu.bf16x2.f32
18- %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu (float %f1 , float %f2 );
19-
20- ret <2 x bfloat> %val
23+ ; CHECK-LABEL: cvt_rn_relu_bf16x2_f32(
24+ ; CHECK: {
25+ ; CHECK-NEXT: .reg .b32 %r<2>;
26+ ; CHECK-NEXT: .reg .f32 %f<3>;
27+ ; CHECK-EMPTY:
28+ ; CHECK-NEXT: // %bb.0:
29+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_bf16x2_f32_param_0];
30+ ; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_relu_bf16x2_f32_param_1];
31+ ; CHECK-NEXT: cvt.rn.relu.bf16x2.f32 %r1, %f1, %f2;
32+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
33+ ; CHECK-NEXT: ret;
34+ %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu (float %f1 , float %f2 )
35+ ret <2 x bfloat> %val
2136}
2237
23- ; CHECK-LABEL: cvt_rz_bf16x2_f32
2438define <2 x bfloat> @cvt_rz_bf16x2_f32 (float %f1 , float %f2 ) {
25-
26- ; CHECK: cvt.rz.bf16x2.f32
27- %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz (float %f1 , float %f2 );
28-
29- ret <2 x bfloat> %val
39+ ; CHECK-LABEL: cvt_rz_bf16x2_f32(
40+ ; CHECK: {
41+ ; CHECK-NEXT: .reg .b32 %r<2>;
42+ ; CHECK-NEXT: .reg .f32 %f<3>;
43+ ; CHECK-EMPTY:
44+ ; CHECK-NEXT: // %bb.0:
45+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_bf16x2_f32_param_0];
46+ ; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_bf16x2_f32_param_1];
47+ ; CHECK-NEXT: cvt.rz.bf16x2.f32 %r1, %f1, %f2;
48+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
49+ ; CHECK-NEXT: ret;
50+ %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz (float %f1 , float %f2 )
51+ ret <2 x bfloat> %val
3052}
3153
32- ; CHECK-LABEL: cvt_rz_relu_bf16x2_f32
3354define <2 x bfloat> @cvt_rz_relu_bf16x2_f32 (float %f1 , float %f2 ) {
34-
35- ; CHECK: cvt.rz.relu.bf16x2.f32
36- %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu (float %f1 , float %f2 );
37-
38- ret <2 x bfloat> %val
55+ ; CHECK-LABEL: cvt_rz_relu_bf16x2_f32(
56+ ; CHECK: {
57+ ; CHECK-NEXT: .reg .b32 %r<2>;
58+ ; CHECK-NEXT: .reg .f32 %f<3>;
59+ ; CHECK-EMPTY:
60+ ; CHECK-NEXT: // %bb.0:
61+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_bf16x2_f32_param_0];
62+ ; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_relu_bf16x2_f32_param_1];
63+ ; CHECK-NEXT: cvt.rz.relu.bf16x2.f32 %r1, %f1, %f2;
64+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
65+ ; CHECK-NEXT: ret;
66+ %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu (float %f1 , float %f2 )
67+ ret <2 x bfloat> %val
3968}
4069
4170declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn (float , float )
4271declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu (float , float )
4372declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz (float , float )
4473declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu (float , float )
4574
46- ; CHECK-LABEL: cvt_rn_f16x2_f32
4775define <2 x half > @cvt_rn_f16x2_f32 (float %f1 , float %f2 ) {
48-
49- ; CHECK: cvt.rn.f16x2.f32
50- %val = call <2 x half > @llvm.nvvm.ff2f16x2.rn (float %f1 , float %f2 );
51-
52- ret <2 x half > %val
76+ ; CHECK-LABEL: cvt_rn_f16x2_f32(
77+ ; CHECK: {
78+ ; CHECK-NEXT: .reg .b32 %r<2>;
79+ ; CHECK-NEXT: .reg .f32 %f<3>;
80+ ; CHECK-EMPTY:
81+ ; CHECK-NEXT: // %bb.0:
82+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_f16x2_f32_param_0];
83+ ; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_f16x2_f32_param_1];
84+ ; CHECK-NEXT: cvt.rn.f16x2.f32 %r1, %f1, %f2;
85+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
86+ ; CHECK-NEXT: ret;
87+ %val = call <2 x half > @llvm.nvvm.ff2f16x2.rn (float %f1 , float %f2 )
88+ ret <2 x half > %val
5389}
5490
55- ; CHECK-LABEL: cvt_rn_relu_f16x2_f32
5691define <2 x half > @cvt_rn_relu_f16x2_f32 (float %f1 , float %f2 ) {
57-
58- ; CHECK: cvt.rn.relu.f16x2.f32
59- %val = call <2 x half > @llvm.nvvm.ff2f16x2.rn.relu (float %f1 , float %f2 );
60-
61- ret <2 x half > %val
92+ ; CHECK-LABEL: cvt_rn_relu_f16x2_f32(
93+ ; CHECK: {
94+ ; CHECK-NEXT: .reg .b32 %r<2>;
95+ ; CHECK-NEXT: .reg .f32 %f<3>;
96+ ; CHECK-EMPTY:
97+ ; CHECK-NEXT: // %bb.0:
98+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_f16x2_f32_param_0];
99+ ; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_relu_f16x2_f32_param_1];
100+ ; CHECK-NEXT: cvt.rn.relu.f16x2.f32 %r1, %f1, %f2;
101+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
102+ ; CHECK-NEXT: ret;
103+ %val = call <2 x half > @llvm.nvvm.ff2f16x2.rn.relu (float %f1 , float %f2 )
104+ ret <2 x half > %val
62105}
63106
64- ; CHECK-LABEL: cvt_rz_f16x2_f32
65107define <2 x half > @cvt_rz_f16x2_f32 (float %f1 , float %f2 ) {
66-
67- ; CHECK: cvt.rz.f16x2.f32
68- %val = call <2 x half > @llvm.nvvm.ff2f16x2.rz (float %f1 , float %f2 );
69-
70- ret <2 x half > %val
108+ ; CHECK-LABEL: cvt_rz_f16x2_f32(
109+ ; CHECK: {
110+ ; CHECK-NEXT: .reg .b32 %r<2>;
111+ ; CHECK-NEXT: .reg .f32 %f<3>;
112+ ; CHECK-EMPTY:
113+ ; CHECK-NEXT: // %bb.0:
114+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_f16x2_f32_param_0];
115+ ; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_f16x2_f32_param_1];
116+ ; CHECK-NEXT: cvt.rz.f16x2.f32 %r1, %f1, %f2;
117+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
118+ ; CHECK-NEXT: ret;
119+ %val = call <2 x half > @llvm.nvvm.ff2f16x2.rz (float %f1 , float %f2 )
120+ ret <2 x half > %val
71121}
72122
73- ; CHECK-LABEL: cvt_rz_relu_f16x2_f32
74123define <2 x half > @cvt_rz_relu_f16x2_f32 (float %f1 , float %f2 ) {
75-
76- ; CHECK: cvt.rz.relu.f16x2.f32
77- %val = call <2 x half > @llvm.nvvm.ff2f16x2.rz.relu (float %f1 , float %f2 );
78-
79- ret <2 x half > %val
124+ ; CHECK-LABEL: cvt_rz_relu_f16x2_f32(
125+ ; CHECK: {
126+ ; CHECK-NEXT: .reg .b32 %r<2>;
127+ ; CHECK-NEXT: .reg .f32 %f<3>;
128+ ; CHECK-EMPTY:
129+ ; CHECK-NEXT: // %bb.0:
130+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_f16x2_f32_param_0];
131+ ; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_relu_f16x2_f32_param_1];
132+ ; CHECK-NEXT: cvt.rz.relu.f16x2.f32 %r1, %f1, %f2;
133+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
134+ ; CHECK-NEXT: ret;
135+ %val = call <2 x half > @llvm.nvvm.ff2f16x2.rz.relu (float %f1 , float %f2 )
136+ ret <2 x half > %val
80137}
81138
82139declare <2 x half > @llvm.nvvm.ff2f16x2.rn (float , float )
83140declare <2 x half > @llvm.nvvm.ff2f16x2.rn.relu (float , float )
84141declare <2 x half > @llvm.nvvm.ff2f16x2.rz (float , float )
85142declare <2 x half > @llvm.nvvm.ff2f16x2.rz.relu (float , float )
86143
87- ; CHECK-LABEL: cvt_rn_bf16_f32
88144define bfloat @cvt_rn_bf16_f32 (float %f1 ) {
89-
90- ; CHECK: cvt.rn.bf16.f32
91- %val = call bfloat @llvm.nvvm.f2bf16.rn (float %f1 );
92-
93- ret bfloat %val
145+ ; CHECK-LABEL: cvt_rn_bf16_f32(
146+ ; CHECK: {
147+ ; CHECK-NEXT: .reg .b16 %rs<2>;
148+ ; CHECK-NEXT: .reg .f32 %f<2>;
149+ ; CHECK-EMPTY:
150+ ; CHECK-NEXT: // %bb.0:
151+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_bf16_f32_param_0];
152+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs1, %f1;
153+ ; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
154+ ; CHECK-NEXT: ret;
155+ %val = call bfloat @llvm.nvvm.f2bf16.rn (float %f1 )
156+ ret bfloat %val
94157}
95158
96- ; CHECK-LABEL: cvt_rn_relu_bf16_f32
97159define bfloat @cvt_rn_relu_bf16_f32 (float %f1 ) {
98-
99- ; CHECK: cvt.rn.relu.bf16.f32
100- %val = call bfloat @llvm.nvvm.f2bf16.rn.relu (float %f1 );
101-
102- ret bfloat %val
160+ ; CHECK-LABEL: cvt_rn_relu_bf16_f32(
161+ ; CHECK: {
162+ ; CHECK-NEXT: .reg .b16 %rs<2>;
163+ ; CHECK-NEXT: .reg .f32 %f<2>;
164+ ; CHECK-EMPTY:
165+ ; CHECK-NEXT: // %bb.0:
166+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_bf16_f32_param_0];
167+ ; CHECK-NEXT: cvt.rn.relu.bf16.f32 %rs1, %f1;
168+ ; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
169+ ; CHECK-NEXT: ret;
170+ %val = call bfloat @llvm.nvvm.f2bf16.rn.relu (float %f1 )
171+ ret bfloat %val
103172}
104173
105- ; CHECK-LABEL: cvt_rz_bf16_f32
106174define bfloat @cvt_rz_bf16_f32 (float %f1 ) {
107-
108- ; CHECK: cvt.rz.bf16.f32
109- %val = call bfloat @llvm.nvvm.f2bf16.rz (float %f1 );
110-
111- ret bfloat %val
175+ ; CHECK-LABEL: cvt_rz_bf16_f32(
176+ ; CHECK: {
177+ ; CHECK-NEXT: .reg .b16 %rs<2>;
178+ ; CHECK-NEXT: .reg .f32 %f<2>;
179+ ; CHECK-EMPTY:
180+ ; CHECK-NEXT: // %bb.0:
181+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_bf16_f32_param_0];
182+ ; CHECK-NEXT: cvt.rz.bf16.f32 %rs1, %f1;
183+ ; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
184+ ; CHECK-NEXT: ret;
185+ %val = call bfloat @llvm.nvvm.f2bf16.rz (float %f1 )
186+ ret bfloat %val
112187}
113188
114- ; CHECK-LABEL: cvt_rz_relu_bf16_f32
115189define bfloat @cvt_rz_relu_bf16_f32 (float %f1 ) {
116-
117- ; CHECK: cvt.rz.relu.bf16.f32
118- %val = call bfloat @llvm.nvvm.f2bf16.rz.relu (float %f1 );
119-
120- ret bfloat %val
190+ ; CHECK-LABEL: cvt_rz_relu_bf16_f32(
191+ ; CHECK: {
192+ ; CHECK-NEXT: .reg .b16 %rs<2>;
193+ ; CHECK-NEXT: .reg .f32 %f<2>;
194+ ; CHECK-EMPTY:
195+ ; CHECK-NEXT: // %bb.0:
196+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_bf16_f32_param_0];
197+ ; CHECK-NEXT: cvt.rz.relu.bf16.f32 %rs1, %f1;
198+ ; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
199+ ; CHECK-NEXT: ret;
200+ %val = call bfloat @llvm.nvvm.f2bf16.rz.relu (float %f1 )
201+ ret bfloat %val
121202}
122203
123204declare bfloat @llvm.nvvm.f2bf16.rn (float )
124205declare bfloat @llvm.nvvm.f2bf16.rn.relu (float )
125206declare bfloat @llvm.nvvm.f2bf16.rz (float )
126207declare bfloat @llvm.nvvm.f2bf16.rz.relu (float )
127208
128- ; CHECK-LABEL: cvt_rna_tf32_f32
129209define i32 @cvt_rna_tf32_f32 (float %f1 ) {
130-
131- ; CHECK: cvt.rna.tf32.f32
132- %val = call i32 @llvm.nvvm.f2tf32.rna (float %f1 );
133-
134- ret i32 %val
210+ ; CHECK-LABEL: cvt_rna_tf32_f32(
211+ ; CHECK: {
212+ ; CHECK-NEXT: .reg .b32 %r<2>;
213+ ; CHECK-NEXT: .reg .f32 %f<2>;
214+ ; CHECK-EMPTY:
215+ ; CHECK-NEXT: // %bb.0:
216+ ; CHECK-NEXT: ld.param.f32 %f1, [cvt_rna_tf32_f32_param_0];
217+ ; CHECK-NEXT: cvt.rna.tf32.f32 %r1, %f1;
218+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
219+ ; CHECK-NEXT: ret;
220+ %val = call i32 @llvm.nvvm.f2tf32.rna (float %f1 )
221+ ret i32 %val
135222}
136223
137224declare i32 @llvm.nvvm.f2tf32.rna (float )
138225
139226
140227define <2 x bfloat> @fold_ff2bf16x2 (float %a , float %b ) {
141- ; CHECK-LABEL: fold_ff2bf16x2
142- ; CHECK: cvt.rn.bf16x2.f32
228+ ; CHECK-LABEL: fold_ff2bf16x2(
229+ ; CHECK: {
230+ ; CHECK-NEXT: .reg .b32 %r<2>;
231+ ; CHECK-NEXT: .reg .f32 %f<3>;
232+ ; CHECK-EMPTY:
233+ ; CHECK-NEXT: // %bb.0:
234+ ; CHECK-NEXT: ld.param.f32 %f1, [fold_ff2bf16x2_param_0];
235+ ; CHECK-NEXT: ld.param.f32 %f2, [fold_ff2bf16x2_param_1];
236+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f1, %f2;
237+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
238+ ; CHECK-NEXT: ret;
143239 %ah = fptrunc float %a to bfloat
144240 %bh = fptrunc float %b to bfloat
145241 %v0 = insertelement <2 x bfloat> poison, bfloat %ah , i64 0
@@ -148,8 +244,17 @@ define <2 x bfloat> @fold_ff2bf16x2(float %a, float %b) {
148244}
149245
150246define <2 x half > @fold_ff2f16x2 (float %a , float %b ) {
151- ; CHECK-LABEL: fold_ff2f16x2
152- ; CHECK: cvt.rn.f16x2.f32
247+ ; CHECK-LABEL: fold_ff2f16x2(
248+ ; CHECK: {
249+ ; CHECK-NEXT: .reg .b32 %r<2>;
250+ ; CHECK-NEXT: .reg .f32 %f<3>;
251+ ; CHECK-EMPTY:
252+ ; CHECK-NEXT: // %bb.0:
253+ ; CHECK-NEXT: ld.param.f32 %f1, [fold_ff2f16x2_param_0];
254+ ; CHECK-NEXT: ld.param.f32 %f2, [fold_ff2f16x2_param_1];
255+ ; CHECK-NEXT: cvt.rn.f16x2.f32 %r1, %f1, %f2;
256+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
257+ ; CHECK-NEXT: ret;
153258 %ah = fptrunc float %a to half
154259 %bh = fptrunc float %b to half
155260 %v0 = insertelement <2 x half > poison, half %ah , i64 0
0 commit comments