@@ -22,20 +22,18 @@ define <2 x bfloat> @test_ret_const() #0 {
2222define <2 x bfloat> @test_fadd_imm_0 (<2 x bfloat> %a ) #0 {
2323; SM80-LABEL: test_fadd_imm_0(
2424; SM80: {
25- ; SM80-NEXT: .reg .b16 %rs<5 >;
25+ ; SM80-NEXT: .reg .b16 %rs<3 >;
2626; SM80-NEXT: .reg .b32 %r<3>;
2727; SM80-NEXT: .reg .f32 %f<5>;
2828; SM80-EMPTY:
2929; SM80-NEXT: // %bb.0:
3030; SM80-NEXT: ld.param.b32 %r1, [test_fadd_imm_0_param_0];
3131; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
32- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
33- ; SM80-NEXT: add.rn.f32 %f2, %f1, 0f40000000;
34- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
35- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
36- ; SM80-NEXT: add.rn.f32 %f4, %f3, 0f3F800000;
37- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
38- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
32+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
33+ ; SM80-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
34+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
35+ ; SM80-NEXT: add.rn.f32 %f4, %f3, 0f40000000;
36+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
3937; SM80-NEXT: st.param.b32 [func_retval0], %r2;
4038; SM80-NEXT: ret;
4139;
@@ -84,24 +82,22 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 {
8482define <2 x bfloat> @test_fsubx2 (<2 x bfloat> %a , <2 x bfloat> %b ) #0 {
8583; SM80-LABEL: test_fsubx2(
8684; SM80: {
87- ; SM80-NEXT: .reg .b16 %rs<7 >;
85+ ; SM80-NEXT: .reg .b16 %rs<5 >;
8886; SM80-NEXT: .reg .b32 %r<4>;
8987; SM80-NEXT: .reg .f32 %f<7>;
9088; SM80-EMPTY:
9189; SM80-NEXT: // %bb.0:
9290; SM80-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
9391; SM80-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
9492; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
95- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
93+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
9694; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
97- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
95+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
9896; SM80-NEXT: sub.rn.f32 %f3, %f2, %f1;
99- ; SM80-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
100- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
101- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
97+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2;
98+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4;
10299; SM80-NEXT: sub.rn.f32 %f6, %f5, %f4;
103- ; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
104- ; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5};
100+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
105101; SM80-NEXT: st.param.b32 [func_retval0], %r3;
106102; SM80-NEXT: ret;
107103;
@@ -122,24 +118,22 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
122118define <2 x bfloat> @test_fmulx2 (<2 x bfloat> %a , <2 x bfloat> %b ) #0 {
123119; SM80-LABEL: test_fmulx2(
124120; SM80: {
125- ; SM80-NEXT: .reg .b16 %rs<7 >;
121+ ; SM80-NEXT: .reg .b16 %rs<5 >;
126122; SM80-NEXT: .reg .b32 %r<4>;
127123; SM80-NEXT: .reg .f32 %f<7>;
128124; SM80-EMPTY:
129125; SM80-NEXT: // %bb.0:
130126; SM80-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
131127; SM80-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
132128; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
133- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
129+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
134130; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
135- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
131+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
136132; SM80-NEXT: mul.rn.f32 %f3, %f2, %f1;
137- ; SM80-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
138- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
139- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
133+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2;
134+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4;
140135; SM80-NEXT: mul.rn.f32 %f6, %f5, %f4;
141- ; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
142- ; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5};
136+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
143137; SM80-NEXT: st.param.b32 [func_retval0], %r3;
144138; SM80-NEXT: ret;
145139;
@@ -160,24 +154,22 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
160154define <2 x bfloat> @test_fdiv (<2 x bfloat> %a , <2 x bfloat> %b ) #0 {
161155; CHECK-LABEL: test_fdiv(
162156; CHECK: {
163- ; CHECK-NEXT: .reg .b16 %rs<7 >;
157+ ; CHECK-NEXT: .reg .b16 %rs<5 >;
164158; CHECK-NEXT: .reg .b32 %r<4>;
165159; CHECK-NEXT: .reg .f32 %f<7>;
166160; CHECK-EMPTY:
167161; CHECK-NEXT: // %bb.0:
168162; CHECK-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
169163; CHECK-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
170164; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2;
171- ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2 ;
165+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs1 ;
172166; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1;
173- ; CHECK-NEXT: cvt.f32.bf16 %f2, %rs4 ;
167+ ; CHECK-NEXT: cvt.f32.bf16 %f2, %rs3 ;
174168; CHECK-NEXT: div.rn.f32 %f3, %f2, %f1;
175- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
176- ; CHECK-NEXT: cvt.f32.bf16 %f4, %rs1;
177- ; CHECK-NEXT: cvt.f32.bf16 %f5, %rs3;
169+ ; CHECK-NEXT: cvt.f32.bf16 %f4, %rs2;
170+ ; CHECK-NEXT: cvt.f32.bf16 %f5, %rs4;
178171; CHECK-NEXT: div.rn.f32 %f6, %f5, %f4;
179- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
180- ; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5};
172+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
181173; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
182174; CHECK-NEXT: ret;
183175 %r = fdiv <2 x bfloat> %a , %b
@@ -418,15 +410,12 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b,
418410define <2 x bfloat> @test_fptrunc_2xfloat (<2 x float > %a ) #0 {
419411; CHECK-LABEL: test_fptrunc_2xfloat(
420412; CHECK: {
421- ; CHECK-NEXT: .reg .b16 %rs<3>;
422413; CHECK-NEXT: .reg .b32 %r<2>;
423414; CHECK-NEXT: .reg .f32 %f<3>;
424415; CHECK-EMPTY:
425416; CHECK-NEXT: // %bb.0:
426417; CHECK-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_fptrunc_2xfloat_param_0];
427- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs1, %f2;
428- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs2, %f1;
429- ; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1};
418+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f2, %f1;
430419; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
431420; CHECK-NEXT: ret;
432421 %r = fptrunc <2 x float > %a to <2 x bfloat>
@@ -503,20 +492,18 @@ declare <2 x bfloat> @llvm.fmuladd.f16(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bf
503492define <2 x bfloat> @test_sqrt (<2 x bfloat> %a ) #0 {
504493; CHECK-LABEL: test_sqrt(
505494; CHECK: {
506- ; CHECK-NEXT: .reg .b16 %rs<5 >;
495+ ; CHECK-NEXT: .reg .b16 %rs<3 >;
507496; CHECK-NEXT: .reg .b32 %r<3>;
508497; CHECK-NEXT: .reg .f32 %f<5>;
509498; CHECK-EMPTY:
510499; CHECK-NEXT: // %bb.0:
511500; CHECK-NEXT: ld.param.b32 %r1, [test_sqrt_param_0];
512501; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
513- ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2 ;
502+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs1 ;
514503; CHECK-NEXT: sqrt.rn.f32 %f2, %f1;
515- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
516- ; CHECK-NEXT: cvt.f32.bf16 %f3, %rs1;
504+ ; CHECK-NEXT: cvt.f32.bf16 %f3, %rs2;
517505; CHECK-NEXT: sqrt.rn.f32 %f4, %f3;
518- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
519- ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
506+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
520507; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
521508; CHECK-NEXT: ret;
522509 %r = call <2 x bfloat> @llvm.sqrt.f16 (<2 x bfloat> %a )
@@ -556,33 +543,29 @@ define <2 x bfloat> @test_fabs(<2 x bfloat> %a) #0 {
556543define <2 x bfloat> @test_fabs_add (<2 x bfloat> %a , <2 x bfloat> %b ) #0 {
557544; SM80-LABEL: test_fabs_add(
558545; SM80: {
559- ; SM80-NEXT: .reg .b16 %rs<11 >;
546+ ; SM80-NEXT: .reg .b16 %rs<7 >;
560547; SM80-NEXT: .reg .b32 %r<6>;
561548; SM80-NEXT: .reg .f32 %f<11>;
562549; SM80-EMPTY:
563550; SM80-NEXT: // %bb.0:
564551; SM80-NEXT: ld.param.b32 %r1, [test_fabs_add_param_1];
565552; SM80-NEXT: ld.param.b32 %r2, [test_fabs_add_param_0];
566553; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
567- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
554+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
568555; SM80-NEXT: add.rn.f32 %f2, %f1, %f1;
569- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
570- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
556+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
571557; SM80-NEXT: add.rn.f32 %f4, %f3, %f3;
572- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
573- ; SM80-NEXT: mov.b32 %r3, {%rs4, %rs3};
558+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f4, %f2;
574559; SM80-NEXT: abs.bf16x2 %r4, %r3;
575- ; SM80-NEXT: mov.b32 {%rs5 , %rs6 }, %r4;
576- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs6 ;
577- ; SM80-NEXT: mov.b32 {%rs7 , %rs8 }, %r1;
578- ; SM80-NEXT: cvt.f32.bf16 %f6, %rs8 ;
560+ ; SM80-NEXT: mov.b32 {%rs3 , %rs4 }, %r4;
561+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
562+ ; SM80-NEXT: mov.b32 {%rs5 , %rs6 }, %r1;
563+ ; SM80-NEXT: cvt.f32.bf16 %f6, %rs5 ;
579564; SM80-NEXT: add.rn.f32 %f7, %f5, %f6;
580- ; SM80-NEXT: cvt.rn.bf16.f32 %rs9, %f7;
581- ; SM80-NEXT: cvt.f32.bf16 %f8, %rs5;
582- ; SM80-NEXT: cvt.f32.bf16 %f9, %rs7;
565+ ; SM80-NEXT: cvt.f32.bf16 %f8, %rs4;
566+ ; SM80-NEXT: cvt.f32.bf16 %f9, %rs6;
583567; SM80-NEXT: add.rn.f32 %f10, %f8, %f9;
584- ; SM80-NEXT: cvt.rn.bf16.f32 %rs10, %f10;
585- ; SM80-NEXT: mov.b32 %r5, {%rs10, %rs9};
568+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r5, %f10, %f7;
586569; SM80-NEXT: st.param.b32 [func_retval0], %r5;
587570; SM80-NEXT: ret;
588571;
@@ -637,20 +620,18 @@ define <2 x bfloat> @test_maxnum(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
637620define <2 x bfloat> @test_floor (<2 x bfloat> %a ) #0 {
638621; SM80-LABEL: test_floor(
639622; SM80: {
640- ; SM80-NEXT: .reg .b16 %rs<5 >;
623+ ; SM80-NEXT: .reg .b16 %rs<3 >;
641624; SM80-NEXT: .reg .b32 %r<3>;
642625; SM80-NEXT: .reg .f32 %f<5>;
643626; SM80-EMPTY:
644627; SM80-NEXT: // %bb.0:
645628; SM80-NEXT: ld.param.b32 %r1, [test_floor_param_0];
646629; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
647- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
630+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
648631; SM80-NEXT: cvt.rmi.f32.f32 %f2, %f1;
649- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
650- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
632+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
651633; SM80-NEXT: cvt.rmi.f32.f32 %f4, %f3;
652- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
653- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
634+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
654635; SM80-NEXT: st.param.b32 [func_retval0], %r2;
655636; SM80-NEXT: ret;
656637;
@@ -674,20 +655,18 @@ define <2 x bfloat> @test_floor(<2 x bfloat> %a) #0 {
674655define <2 x bfloat> @test_ceil (<2 x bfloat> %a ) #0 {
675656; SM80-LABEL: test_ceil(
676657; SM80: {
677- ; SM80-NEXT: .reg .b16 %rs<5 >;
658+ ; SM80-NEXT: .reg .b16 %rs<3 >;
678659; SM80-NEXT: .reg .b32 %r<3>;
679660; SM80-NEXT: .reg .f32 %f<5>;
680661; SM80-EMPTY:
681662; SM80-NEXT: // %bb.0:
682663; SM80-NEXT: ld.param.b32 %r1, [test_ceil_param_0];
683664; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
684- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
665+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
685666; SM80-NEXT: cvt.rpi.f32.f32 %f2, %f1;
686- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
687- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
667+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
688668; SM80-NEXT: cvt.rpi.f32.f32 %f4, %f3;
689- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
690- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
669+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
691670; SM80-NEXT: st.param.b32 [func_retval0], %r2;
692671; SM80-NEXT: ret;
693672;
@@ -711,20 +690,18 @@ define <2 x bfloat> @test_ceil(<2 x bfloat> %a) #0 {
711690define <2 x bfloat> @test_trunc (<2 x bfloat> %a ) #0 {
712691; SM80-LABEL: test_trunc(
713692; SM80: {
714- ; SM80-NEXT: .reg .b16 %rs<5 >;
693+ ; SM80-NEXT: .reg .b16 %rs<3 >;
715694; SM80-NEXT: .reg .b32 %r<3>;
716695; SM80-NEXT: .reg .f32 %f<5>;
717696; SM80-EMPTY:
718697; SM80-NEXT: // %bb.0:
719698; SM80-NEXT: ld.param.b32 %r1, [test_trunc_param_0];
720699; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
721- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
700+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
722701; SM80-NEXT: cvt.rzi.f32.f32 %f2, %f1;
723- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
724- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
702+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
725703; SM80-NEXT: cvt.rzi.f32.f32 %f4, %f3;
726- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
727- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
704+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
728705; SM80-NEXT: st.param.b32 [func_retval0], %r2;
729706; SM80-NEXT: ret;
730707;
@@ -748,20 +725,18 @@ define <2 x bfloat> @test_trunc(<2 x bfloat> %a) #0 {
748725define <2 x bfloat> @test_rint (<2 x bfloat> %a ) #0 {
749726; SM80-LABEL: test_rint(
750727; SM80: {
751- ; SM80-NEXT: .reg .b16 %rs<5 >;
728+ ; SM80-NEXT: .reg .b16 %rs<3 >;
752729; SM80-NEXT: .reg .b32 %r<3>;
753730; SM80-NEXT: .reg .f32 %f<5>;
754731; SM80-EMPTY:
755732; SM80-NEXT: // %bb.0:
756733; SM80-NEXT: ld.param.b32 %r1, [test_rint_param_0];
757734; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
758- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
735+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
759736; SM80-NEXT: cvt.rni.f32.f32 %f2, %f1;
760- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
761- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
737+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
762738; SM80-NEXT: cvt.rni.f32.f32 %f4, %f3;
763- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
764- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
739+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
765740; SM80-NEXT: st.param.b32 [func_retval0], %r2;
766741; SM80-NEXT: ret;
767742;
@@ -786,14 +761,14 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 {
786761; CHECK-LABEL: test_round(
787762; CHECK: {
788763; CHECK-NEXT: .reg .pred %p<5>;
789- ; CHECK-NEXT: .reg .b16 %rs<5 >;
764+ ; CHECK-NEXT: .reg .b16 %rs<3 >;
790765; CHECK-NEXT: .reg .b32 %r<9>;
791766; CHECK-NEXT: .reg .f32 %f<17>;
792767; CHECK-EMPTY:
793768; CHECK-NEXT: // %bb.0:
794769; CHECK-NEXT: ld.param.b32 %r1, [test_round_param_0];
795770; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
796- ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2 ;
771+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs1 ;
797772; CHECK-NEXT: mov.b32 %r2, %f1;
798773; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
799774; CHECK-NEXT: or.b32 %r4, %r3, 1056964608;
@@ -806,8 +781,7 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 {
806781; CHECK-NEXT: cvt.rzi.f32.f32 %f7, %f1;
807782; CHECK-NEXT: setp.lt.f32 %p2, %f5, 0f3F000000;
808783; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2;
809- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f8;
810- ; CHECK-NEXT: cvt.f32.bf16 %f9, %rs1;
784+ ; CHECK-NEXT: cvt.f32.bf16 %f9, %rs2;
811785; CHECK-NEXT: mov.b32 %r5, %f9;
812786; CHECK-NEXT: and.b32 %r6, %r5, -2147483648;
813787; CHECK-NEXT: or.b32 %r7, %r6, 1056964608;
@@ -820,8 +794,7 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 {
820794; CHECK-NEXT: cvt.rzi.f32.f32 %f15, %f9;
821795; CHECK-NEXT: setp.lt.f32 %p4, %f13, 0f3F000000;
822796; CHECK-NEXT: selp.f32 %f16, %f15, %f14, %p4;
823- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f16;
824- ; CHECK-NEXT: mov.b32 %r8, {%rs4, %rs3};
797+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r8, %f16, %f8;
825798; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
826799; CHECK-NEXT: ret;
827800 %r = call <2 x bfloat> @llvm.round.f16 (<2 x bfloat> %a )
0 commit comments