@@ -288,3 +288,85 @@ define <2 x bfloat> @cvt_bf16x2_ue8m0x2(i16 %in) {
288288 %val = call <2 x bfloat> @llvm.nvvm.ue8m0x2.to.bf16x2 (i16 %in )
289289 ret <2 x bfloat> %val
290290}
291+
292+ define i16 @cvt_rn_sf_e2m1x2_f32 (float %f1 , float %f2 ) {
293+ ; CHECK-LABEL: cvt_rn_sf_e2m1x2_f32(
294+ ; CHECK: {
295+ ; CHECK-NEXT: .reg .b16 %rs<2>;
296+ ; CHECK-NEXT: .reg .b32 %r<2>;
297+ ; CHECK-NEXT: .reg .b32 %f<3>;
298+ ; CHECK-EMPTY:
299+ ; CHECK-NEXT: // %bb.0:
300+ ; CHECK-NEXT: ld.param.b32 %f1, [cvt_rn_sf_e2m1x2_f32_param_0];
301+ ; CHECK-NEXT: ld.param.b32 %f2, [cvt_rn_sf_e2m1x2_f32_param_1];
302+ ; CHECK-NEXT: {
303+ ; CHECK-NEXT: .reg .b8 %e2m1x2_out;
304+ ; CHECK-NEXT: cvt.rn.satfinite.e2m1x2.f32 %e2m1x2_out, %f1, %f2;
305+ ; CHECK-NEXT: cvt.u16.u8 %rs1, %e2m1x2_out;
306+ ; CHECK-NEXT: }
307+ ; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
308+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
309+ ; CHECK-NEXT: ret;
310+ %val = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.satfinite (float %f1 , float %f2 )
311+ ret i16 %val
312+ }
313+
314+ define i16 @cvt_rn_relu_sf_e2m1x2_f32 (float %f1 , float %f2 ) {
315+ ; CHECK-LABEL: cvt_rn_relu_sf_e2m1x2_f32(
316+ ; CHECK: {
317+ ; CHECK-NEXT: .reg .b16 %rs<2>;
318+ ; CHECK-NEXT: .reg .b32 %r<2>;
319+ ; CHECK-NEXT: .reg .b32 %f<3>;
320+ ; CHECK-EMPTY:
321+ ; CHECK-NEXT: // %bb.0:
322+ ; CHECK-NEXT: ld.param.b32 %f1, [cvt_rn_relu_sf_e2m1x2_f32_param_0];
323+ ; CHECK-NEXT: ld.param.b32 %f2, [cvt_rn_relu_sf_e2m1x2_f32_param_1];
324+ ; CHECK-NEXT: {
325+ ; CHECK-NEXT: .reg .b8 %e2m1x2_out;
326+ ; CHECK-NEXT: cvt.rn.satfinite.relu.e2m1x2.f32 %e2m1x2_out, %f1, %f2;
327+ ; CHECK-NEXT: cvt.u16.u8 %rs1, %e2m1x2_out;
328+ ; CHECK-NEXT: }
329+ ; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
330+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
331+ ; CHECK-NEXT: ret;
332+ %val = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.relu.satfinite (float %f1 , float %f2 )
333+ ret i16 %val
334+ }
335+
336+ define <2 x half > @cvt_rn_f16x2_e2m1x2 (i16 %in ) {
337+ ; CHECK-LABEL: cvt_rn_f16x2_e2m1x2(
338+ ; CHECK: {
339+ ; CHECK-NEXT: .reg .b16 %rs<2>;
340+ ; CHECK-NEXT: .reg .b32 %r<2>;
341+ ; CHECK-EMPTY:
342+ ; CHECK-NEXT: // %bb.0:
343+ ; CHECK-NEXT: ld.param.b16 %rs1, [cvt_rn_f16x2_e2m1x2_param_0];
344+ ; CHECK-NEXT: {
345+ ; CHECK-NEXT: .reg .b8 %e2m1x2_in;
346+ ; CHECK-NEXT: cvt.u8.u16 %e2m1x2_in, %rs1;
347+ ; CHECK-NEXT: cvt.rn.f16x2.e2m1x2 %r1, %e2m1x2_in;
348+ ; CHECK-NEXT: }
349+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
350+ ; CHECK-NEXT: ret;
351+ %val = call <2 x half > @llvm.nvvm.e2m1x2.to.f16x2.rn (i16 %in )
352+ ret <2 x half > %val
353+ }
354+
355+ define <2 x half > @cvt_rn_relu_f16x2_e2m1x2 (i16 %in ) {
356+ ; CHECK-LABEL: cvt_rn_relu_f16x2_e2m1x2(
357+ ; CHECK: {
358+ ; CHECK-NEXT: .reg .b16 %rs<2>;
359+ ; CHECK-NEXT: .reg .b32 %r<2>;
360+ ; CHECK-EMPTY:
361+ ; CHECK-NEXT: // %bb.0:
362+ ; CHECK-NEXT: ld.param.b16 %rs1, [cvt_rn_relu_f16x2_e2m1x2_param_0];
363+ ; CHECK-NEXT: {
364+ ; CHECK-NEXT: .reg .b8 %e2m1x2_in;
365+ ; CHECK-NEXT: cvt.u8.u16 %e2m1x2_in, %rs1;
366+ ; CHECK-NEXT: cvt.rn.relu.f16x2.e2m1x2 %r1, %e2m1x2_in;
367+ ; CHECK-NEXT: }
368+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
369+ ; CHECK-NEXT: ret;
370+ %val = call <2 x half > @llvm.nvvm.e2m1x2.to.f16x2.rn.relu (i16 %in )
371+ ret <2 x half > %val
372+ }
0 commit comments