@@ -212,12 +212,12 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
212212; SM80-NEXT: ld.param.b32 %r1, [test_faddx2_param_0];
213213; SM80-NEXT: ld.param.b32 %r2, [test_faddx2_param_1];
214214; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
215- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
215+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
216216; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
217- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
217+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
218218; SM80-NEXT: add.rn.f32 %f3, %f2, %f1;
219- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1 ;
220- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
219+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2 ;
220+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4 ;
221221; SM80-NEXT: add.rn.f32 %f6, %f5, %f4;
222222; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
223223; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -233,12 +233,12 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
233233; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_faddx2_param_0];
234234; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_faddx2_param_1];
235235; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
236- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2 ;
236+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1 ;
237237; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
238- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4 ;
238+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3 ;
239239; SM80-FTZ-NEXT: add.rn.ftz.f32 %f3, %f2, %f1;
240- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1 ;
241- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3 ;
240+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2 ;
241+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4 ;
242242; SM80-FTZ-NEXT: add.rn.ftz.f32 %f6, %f5, %f4;
243243; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
244244; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -315,12 +315,12 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
315315; SM80-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
316316; SM80-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
317317; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
318- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
318+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
319319; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
320- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
320+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
321321; SM80-NEXT: sub.rn.f32 %f3, %f2, %f1;
322- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1 ;
323- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
322+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2 ;
323+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4 ;
324324; SM80-NEXT: sub.rn.f32 %f6, %f5, %f4;
325325; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
326326; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -336,12 +336,12 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
336336; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
337337; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
338338; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
339- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2 ;
339+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1 ;
340340; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
341- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4 ;
341+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3 ;
342342; SM80-FTZ-NEXT: sub.rn.ftz.f32 %f3, %f2, %f1;
343- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1 ;
344- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3 ;
343+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2 ;
344+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4 ;
345345; SM80-FTZ-NEXT: sub.rn.ftz.f32 %f6, %f5, %f4;
346346; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
347347; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -418,12 +418,12 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
418418; SM80-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
419419; SM80-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
420420; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
421- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
421+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
422422; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
423- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
423+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
424424; SM80-NEXT: mul.rn.f32 %f3, %f2, %f1;
425- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1 ;
426- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
425+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2 ;
426+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4 ;
427427; SM80-NEXT: mul.rn.f32 %f6, %f5, %f4;
428428; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
429429; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -439,12 +439,12 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
439439; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
440440; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
441441; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
442- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2 ;
442+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1 ;
443443; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
444- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4 ;
444+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3 ;
445445; SM80-FTZ-NEXT: mul.rn.ftz.f32 %f3, %f2, %f1;
446- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1 ;
447- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3 ;
446+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2 ;
447+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4 ;
448448; SM80-FTZ-NEXT: mul.rn.ftz.f32 %f6, %f5, %f4;
449449; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
450450; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -521,12 +521,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
521521; SM80-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
522522; SM80-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
523523; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
524- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
524+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
525525; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
526- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
526+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
527527; SM80-NEXT: div.rn.f32 %f3, %f2, %f1;
528- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1 ;
529- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
528+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2 ;
529+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4 ;
530530; SM80-NEXT: div.rn.f32 %f6, %f5, %f4;
531531; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
532532; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -542,12 +542,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
542542; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
543543; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
544544; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
545- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2 ;
545+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1 ;
546546; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
547- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4 ;
547+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3 ;
548548; SM80-FTZ-NEXT: div.rn.ftz.f32 %f3, %f2, %f1;
549- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1 ;
550- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3 ;
549+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2 ;
550+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4 ;
551551; SM80-FTZ-NEXT: div.rn.ftz.f32 %f6, %f5, %f4;
552552; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
553553; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -563,12 +563,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
563563; SM90-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
564564; SM90-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
565565; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r2;
566- ; SM90-NEXT: cvt.f32.bf16 %f1, %rs2 ;
566+ ; SM90-NEXT: cvt.f32.bf16 %f1, %rs1 ;
567567; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r1;
568- ; SM90-NEXT: cvt.f32.bf16 %f2, %rs4 ;
568+ ; SM90-NEXT: cvt.f32.bf16 %f2, %rs3 ;
569569; SM90-NEXT: div.rn.f32 %f3, %f2, %f1;
570- ; SM90-NEXT: cvt.f32.bf16 %f4, %rs1 ;
571- ; SM90-NEXT: cvt.f32.bf16 %f5, %rs3 ;
570+ ; SM90-NEXT: cvt.f32.bf16 %f4, %rs2 ;
571+ ; SM90-NEXT: cvt.f32.bf16 %f5, %rs4 ;
572572; SM90-NEXT: div.rn.f32 %f6, %f5, %f4;
573573; SM90-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
574574; SM90-NEXT: st.param.b32 [func_retval0], %r3;
0 commit comments