Skip to content

Commit 3f8f50d

Browse files
committed
address comments
1 parent 45db970 commit 3f8f50d

File tree

1 file changed

+187
-82
lines changed

1 file changed

+187
-82
lines changed

llvm/test/CodeGen/NVPTX/convert-sm80.ll

Lines changed: 187 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,145 +1,241 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck %s
23
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
34

45

5-
; CHECK-LABEL: cvt_rn_bf16x2_f32
66
define <2 x bfloat> @cvt_rn_bf16x2_f32(float %f1, float %f2) {
7-
8-
; CHECK: cvt.rn.bf16x2.f32
9-
%val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn(float %f1, float %f2);
10-
11-
ret <2 x bfloat> %val
7+
; CHECK-LABEL: cvt_rn_bf16x2_f32(
8+
; CHECK: {
9+
; CHECK-NEXT: .reg .b32 %r<2>;
10+
; CHECK-NEXT: .reg .f32 %f<3>;
11+
; CHECK-EMPTY:
12+
; CHECK-NEXT: // %bb.0:
13+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_bf16x2_f32_param_0];
14+
; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_bf16x2_f32_param_1];
15+
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f1, %f2;
16+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
17+
; CHECK-NEXT: ret;
18+
%val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn(float %f1, float %f2)
19+
ret <2 x bfloat> %val
1220
}
1321

14-
; CHECK-LABEL: cvt_rn_relu_bf16x2_f32
1522
define <2 x bfloat> @cvt_rn_relu_bf16x2_f32(float %f1, float %f2) {
16-
17-
; CHECK: cvt.rn.relu.bf16x2.f32
18-
%val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu(float %f1, float %f2);
19-
20-
ret <2 x bfloat> %val
23+
; CHECK-LABEL: cvt_rn_relu_bf16x2_f32(
24+
; CHECK: {
25+
; CHECK-NEXT: .reg .b32 %r<2>;
26+
; CHECK-NEXT: .reg .f32 %f<3>;
27+
; CHECK-EMPTY:
28+
; CHECK-NEXT: // %bb.0:
29+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_bf16x2_f32_param_0];
30+
; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_relu_bf16x2_f32_param_1];
31+
; CHECK-NEXT: cvt.rn.relu.bf16x2.f32 %r1, %f1, %f2;
32+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
33+
; CHECK-NEXT: ret;
34+
%val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu(float %f1, float %f2)
35+
ret <2 x bfloat> %val
2136
}
2237

23-
; CHECK-LABEL: cvt_rz_bf16x2_f32
2438
define <2 x bfloat> @cvt_rz_bf16x2_f32(float %f1, float %f2) {
25-
26-
; CHECK: cvt.rz.bf16x2.f32
27-
%val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz(float %f1, float %f2);
28-
29-
ret <2 x bfloat> %val
39+
; CHECK-LABEL: cvt_rz_bf16x2_f32(
40+
; CHECK: {
41+
; CHECK-NEXT: .reg .b32 %r<2>;
42+
; CHECK-NEXT: .reg .f32 %f<3>;
43+
; CHECK-EMPTY:
44+
; CHECK-NEXT: // %bb.0:
45+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_bf16x2_f32_param_0];
46+
; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_bf16x2_f32_param_1];
47+
; CHECK-NEXT: cvt.rz.bf16x2.f32 %r1, %f1, %f2;
48+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
49+
; CHECK-NEXT: ret;
50+
%val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz(float %f1, float %f2)
51+
ret <2 x bfloat> %val
3052
}
3153

32-
; CHECK-LABEL: cvt_rz_relu_bf16x2_f32
3354
define <2 x bfloat> @cvt_rz_relu_bf16x2_f32(float %f1, float %f2) {
34-
35-
; CHECK: cvt.rz.relu.bf16x2.f32
36-
%val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float %f1, float %f2);
37-
38-
ret <2 x bfloat> %val
55+
; CHECK-LABEL: cvt_rz_relu_bf16x2_f32(
56+
; CHECK: {
57+
; CHECK-NEXT: .reg .b32 %r<2>;
58+
; CHECK-NEXT: .reg .f32 %f<3>;
59+
; CHECK-EMPTY:
60+
; CHECK-NEXT: // %bb.0:
61+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_bf16x2_f32_param_0];
62+
; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_relu_bf16x2_f32_param_1];
63+
; CHECK-NEXT: cvt.rz.relu.bf16x2.f32 %r1, %f1, %f2;
64+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
65+
; CHECK-NEXT: ret;
66+
%val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float %f1, float %f2)
67+
ret <2 x bfloat> %val
3968
}
4069

4170
declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn(float, float)
4271
declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu(float, float)
4372
declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz(float, float)
4473
declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float, float)
4574

46-
; CHECK-LABEL: cvt_rn_f16x2_f32
4775
define <2 x half> @cvt_rn_f16x2_f32(float %f1, float %f2) {
48-
49-
; CHECK: cvt.rn.f16x2.f32
50-
%val = call <2 x half> @llvm.nvvm.ff2f16x2.rn(float %f1, float %f2);
51-
52-
ret <2 x half> %val
76+
; CHECK-LABEL: cvt_rn_f16x2_f32(
77+
; CHECK: {
78+
; CHECK-NEXT: .reg .b32 %r<2>;
79+
; CHECK-NEXT: .reg .f32 %f<3>;
80+
; CHECK-EMPTY:
81+
; CHECK-NEXT: // %bb.0:
82+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_f16x2_f32_param_0];
83+
; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_f16x2_f32_param_1];
84+
; CHECK-NEXT: cvt.rn.f16x2.f32 %r1, %f1, %f2;
85+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
86+
; CHECK-NEXT: ret;
87+
%val = call <2 x half> @llvm.nvvm.ff2f16x2.rn(float %f1, float %f2)
88+
ret <2 x half> %val
5389
}
5490

55-
; CHECK-LABEL: cvt_rn_relu_f16x2_f32
5691
define <2 x half> @cvt_rn_relu_f16x2_f32(float %f1, float %f2) {
57-
58-
; CHECK: cvt.rn.relu.f16x2.f32
59-
%val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu(float %f1, float %f2);
60-
61-
ret <2 x half> %val
92+
; CHECK-LABEL: cvt_rn_relu_f16x2_f32(
93+
; CHECK: {
94+
; CHECK-NEXT: .reg .b32 %r<2>;
95+
; CHECK-NEXT: .reg .f32 %f<3>;
96+
; CHECK-EMPTY:
97+
; CHECK-NEXT: // %bb.0:
98+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_f16x2_f32_param_0];
99+
; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_relu_f16x2_f32_param_1];
100+
; CHECK-NEXT: cvt.rn.relu.f16x2.f32 %r1, %f1, %f2;
101+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
102+
; CHECK-NEXT: ret;
103+
%val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu(float %f1, float %f2)
104+
ret <2 x half> %val
62105
}
63106

64-
; CHECK-LABEL: cvt_rz_f16x2_f32
65107
define <2 x half> @cvt_rz_f16x2_f32(float %f1, float %f2) {
66-
67-
; CHECK: cvt.rz.f16x2.f32
68-
%val = call <2 x half> @llvm.nvvm.ff2f16x2.rz(float %f1, float %f2);
69-
70-
ret <2 x half> %val
108+
; CHECK-LABEL: cvt_rz_f16x2_f32(
109+
; CHECK: {
110+
; CHECK-NEXT: .reg .b32 %r<2>;
111+
; CHECK-NEXT: .reg .f32 %f<3>;
112+
; CHECK-EMPTY:
113+
; CHECK-NEXT: // %bb.0:
114+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_f16x2_f32_param_0];
115+
; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_f16x2_f32_param_1];
116+
; CHECK-NEXT: cvt.rz.f16x2.f32 %r1, %f1, %f2;
117+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
118+
; CHECK-NEXT: ret;
119+
%val = call <2 x half> @llvm.nvvm.ff2f16x2.rz(float %f1, float %f2)
120+
ret <2 x half> %val
71121
}
72122

73-
; CHECK-LABEL: cvt_rz_relu_f16x2_f32
74123
define <2 x half> @cvt_rz_relu_f16x2_f32(float %f1, float %f2) {
75-
76-
; CHECK: cvt.rz.relu.f16x2.f32
77-
%val = call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float %f1, float %f2);
78-
79-
ret <2 x half> %val
124+
; CHECK-LABEL: cvt_rz_relu_f16x2_f32(
125+
; CHECK: {
126+
; CHECK-NEXT: .reg .b32 %r<2>;
127+
; CHECK-NEXT: .reg .f32 %f<3>;
128+
; CHECK-EMPTY:
129+
; CHECK-NEXT: // %bb.0:
130+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_f16x2_f32_param_0];
131+
; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_relu_f16x2_f32_param_1];
132+
; CHECK-NEXT: cvt.rz.relu.f16x2.f32 %r1, %f1, %f2;
133+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
134+
; CHECK-NEXT: ret;
135+
%val = call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float %f1, float %f2)
136+
ret <2 x half> %val
80137
}
81138

82139
declare <2 x half> @llvm.nvvm.ff2f16x2.rn(float, float)
83140
declare <2 x half> @llvm.nvvm.ff2f16x2.rn.relu(float, float)
84141
declare <2 x half> @llvm.nvvm.ff2f16x2.rz(float, float)
85142
declare <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float, float)
86143

87-
; CHECK-LABEL: cvt_rn_bf16_f32
88144
define bfloat @cvt_rn_bf16_f32(float %f1) {
89-
90-
; CHECK: cvt.rn.bf16.f32
91-
%val = call bfloat @llvm.nvvm.f2bf16.rn(float %f1);
92-
93-
ret bfloat %val
145+
; CHECK-LABEL: cvt_rn_bf16_f32(
146+
; CHECK: {
147+
; CHECK-NEXT: .reg .b16 %rs<2>;
148+
; CHECK-NEXT: .reg .f32 %f<2>;
149+
; CHECK-EMPTY:
150+
; CHECK-NEXT: // %bb.0:
151+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_bf16_f32_param_0];
152+
; CHECK-NEXT: cvt.rn.bf16.f32 %rs1, %f1;
153+
; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
154+
; CHECK-NEXT: ret;
155+
%val = call bfloat @llvm.nvvm.f2bf16.rn(float %f1)
156+
ret bfloat %val
94157
}
95158

96-
; CHECK-LABEL: cvt_rn_relu_bf16_f32
97159
define bfloat @cvt_rn_relu_bf16_f32(float %f1) {
98-
99-
; CHECK: cvt.rn.relu.bf16.f32
100-
%val = call bfloat @llvm.nvvm.f2bf16.rn.relu(float %f1);
101-
102-
ret bfloat %val
160+
; CHECK-LABEL: cvt_rn_relu_bf16_f32(
161+
; CHECK: {
162+
; CHECK-NEXT: .reg .b16 %rs<2>;
163+
; CHECK-NEXT: .reg .f32 %f<2>;
164+
; CHECK-EMPTY:
165+
; CHECK-NEXT: // %bb.0:
166+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_bf16_f32_param_0];
167+
; CHECK-NEXT: cvt.rn.relu.bf16.f32 %rs1, %f1;
168+
; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
169+
; CHECK-NEXT: ret;
170+
%val = call bfloat @llvm.nvvm.f2bf16.rn.relu(float %f1)
171+
ret bfloat %val
103172
}
104173

105-
; CHECK-LABEL: cvt_rz_bf16_f32
106174
define bfloat @cvt_rz_bf16_f32(float %f1) {
107-
108-
; CHECK: cvt.rz.bf16.f32
109-
%val = call bfloat @llvm.nvvm.f2bf16.rz(float %f1);
110-
111-
ret bfloat %val
175+
; CHECK-LABEL: cvt_rz_bf16_f32(
176+
; CHECK: {
177+
; CHECK-NEXT: .reg .b16 %rs<2>;
178+
; CHECK-NEXT: .reg .f32 %f<2>;
179+
; CHECK-EMPTY:
180+
; CHECK-NEXT: // %bb.0:
181+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_bf16_f32_param_0];
182+
; CHECK-NEXT: cvt.rz.bf16.f32 %rs1, %f1;
183+
; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
184+
; CHECK-NEXT: ret;
185+
%val = call bfloat @llvm.nvvm.f2bf16.rz(float %f1)
186+
ret bfloat %val
112187
}
113188

114-
; CHECK-LABEL: cvt_rz_relu_bf16_f32
115189
define bfloat @cvt_rz_relu_bf16_f32(float %f1) {
116-
117-
; CHECK: cvt.rz.relu.bf16.f32
118-
%val = call bfloat @llvm.nvvm.f2bf16.rz.relu(float %f1);
119-
120-
ret bfloat %val
190+
; CHECK-LABEL: cvt_rz_relu_bf16_f32(
191+
; CHECK: {
192+
; CHECK-NEXT: .reg .b16 %rs<2>;
193+
; CHECK-NEXT: .reg .f32 %f<2>;
194+
; CHECK-EMPTY:
195+
; CHECK-NEXT: // %bb.0:
196+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_bf16_f32_param_0];
197+
; CHECK-NEXT: cvt.rz.relu.bf16.f32 %rs1, %f1;
198+
; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
199+
; CHECK-NEXT: ret;
200+
%val = call bfloat @llvm.nvvm.f2bf16.rz.relu(float %f1)
201+
ret bfloat %val
121202
}
122203

123204
declare bfloat @llvm.nvvm.f2bf16.rn(float)
124205
declare bfloat @llvm.nvvm.f2bf16.rn.relu(float)
125206
declare bfloat @llvm.nvvm.f2bf16.rz(float)
126207
declare bfloat @llvm.nvvm.f2bf16.rz.relu(float)
127208

128-
; CHECK-LABEL: cvt_rna_tf32_f32
129209
define i32 @cvt_rna_tf32_f32(float %f1) {
130-
131-
; CHECK: cvt.rna.tf32.f32
132-
%val = call i32 @llvm.nvvm.f2tf32.rna(float %f1);
133-
134-
ret i32 %val
210+
; CHECK-LABEL: cvt_rna_tf32_f32(
211+
; CHECK: {
212+
; CHECK-NEXT: .reg .b32 %r<2>;
213+
; CHECK-NEXT: .reg .f32 %f<2>;
214+
; CHECK-EMPTY:
215+
; CHECK-NEXT: // %bb.0:
216+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_rna_tf32_f32_param_0];
217+
; CHECK-NEXT: cvt.rna.tf32.f32 %r1, %f1;
218+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
219+
; CHECK-NEXT: ret;
220+
%val = call i32 @llvm.nvvm.f2tf32.rna(float %f1)
221+
ret i32 %val
135222
}
136223

137224
declare i32 @llvm.nvvm.f2tf32.rna(float)
138225

139226

140227
define <2 x bfloat> @fold_ff2bf16x2(float %a, float %b) {
141-
; CHECK-LABEL: fold_ff2bf16x2
142-
; CHECK: cvt.rn.bf16x2.f32
228+
; CHECK-LABEL: fold_ff2bf16x2(
229+
; CHECK: {
230+
; CHECK-NEXT: .reg .b32 %r<2>;
231+
; CHECK-NEXT: .reg .f32 %f<3>;
232+
; CHECK-EMPTY:
233+
; CHECK-NEXT: // %bb.0:
234+
; CHECK-NEXT: ld.param.f32 %f1, [fold_ff2bf16x2_param_0];
235+
; CHECK-NEXT: ld.param.f32 %f2, [fold_ff2bf16x2_param_1];
236+
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f1, %f2;
237+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
238+
; CHECK-NEXT: ret;
143239
%ah = fptrunc float %a to bfloat
144240
%bh = fptrunc float %b to bfloat
145241
%v0 = insertelement <2 x bfloat> poison, bfloat %ah, i64 0
@@ -148,8 +244,17 @@ define <2 x bfloat> @fold_ff2bf16x2(float %a, float %b) {
148244
}
149245

150246
define <2 x half> @fold_ff2f16x2(float %a, float %b) {
151-
; CHECK-LABEL: fold_ff2f16x2
152-
; CHECK: cvt.rn.f16x2.f32
247+
; CHECK-LABEL: fold_ff2f16x2(
248+
; CHECK: {
249+
; CHECK-NEXT: .reg .b32 %r<2>;
250+
; CHECK-NEXT: .reg .f32 %f<3>;
251+
; CHECK-EMPTY:
252+
; CHECK-NEXT: // %bb.0:
253+
; CHECK-NEXT: ld.param.f32 %f1, [fold_ff2f16x2_param_0];
254+
; CHECK-NEXT: ld.param.f32 %f2, [fold_ff2f16x2_param_1];
255+
; CHECK-NEXT: cvt.rn.f16x2.f32 %r1, %f1, %f2;
256+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
257+
; CHECK-NEXT: ret;
153258
%ah = fptrunc float %a to half
154259
%bh = fptrunc float %b to half
155260
%v0 = insertelement <2 x half> poison, half %ah, i64 0

0 commit comments

Comments
 (0)