@@ -1351,28 +1351,30 @@ define bfloat @test_roundeven(bfloat %a) {
13511351define bfloat @test_maximum (bfloat %a , bfloat %b ) {
13521352; SM70-LABEL: test_maximum(
13531353; SM70: {
1354- ; SM70-NEXT: .reg .pred %p<5 >;
1355- ; SM70-NEXT: .reg .b16 %rs<7 >;
1354+ ; SM70-NEXT: .reg .pred %p<6 >;
1355+ ; SM70-NEXT: .reg .b16 %rs<8 >;
13561356; SM70-NEXT: .reg .b32 %r<7>;
13571357; SM70-EMPTY:
13581358; SM70-NEXT: // %bb.0:
13591359; SM70-NEXT: ld.param.b16 %rs1, [test_maximum_param_0];
1360- ; SM70-NEXT: setp.eq.s16 %p1, %rs1, 0;
13611360; SM70-NEXT: ld.param.b16 %rs2, [test_maximum_param_1];
1362- ; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
13631361; SM70-NEXT: cvt.u32.u16 %r1, %rs2;
13641362; SM70-NEXT: shl.b32 %r2, %r1, 16;
13651363; SM70-NEXT: cvt.u32.u16 %r3, %rs1;
13661364; SM70-NEXT: shl.b32 %r4, %r3, 16;
1367- ; SM70-NEXT: setp.gt.f32 %p2, %r4, %r2;
1368- ; SM70-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
1369- ; SM70-NEXT: setp.nan.f32 %p3, %r4, %r2;
1370- ; SM70-NEXT: selp.b16 %rs5, 0x7FC0, %rs4, %p3;
1371- ; SM70-NEXT: cvt.u32.u16 %r5, %rs5;
1365+ ; SM70-NEXT: setp.gt.f32 %p1, %r4, %r2;
1366+ ; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
1367+ ; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2;
1368+ ; SM70-NEXT: selp.b16 %rs4, 0x7FC0, %rs3, %p2;
1369+ ; SM70-NEXT: setp.eq.s16 %p3, %rs1, 0;
1370+ ; SM70-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
1371+ ; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
1372+ ; SM70-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
1373+ ; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
13721374; SM70-NEXT: shl.b32 %r6, %r5, 16;
1373- ; SM70-NEXT: setp.eq.f32 %p4 , %r6, 0f00000000;
1374- ; SM70-NEXT: selp.b16 %rs6 , %rs3 , %rs5 , %p4 ;
1375- ; SM70-NEXT: st.param.b16 [func_retval0], %rs6 ;
1375+ ; SM70-NEXT: setp.eq.f32 %p5 , %r6, 0f00000000;
1376+ ; SM70-NEXT: selp.b16 %rs7 , %rs6 , %rs4 , %p5 ;
1377+ ; SM70-NEXT: st.param.b16 [func_retval0], %rs7 ;
13761378; SM70-NEXT: ret;
13771379;
13781380; SM80-LABEL: test_maximum(
@@ -1473,44 +1475,48 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
14731475define <2 x bfloat> @test_maximum_v2 (<2 x bfloat> %a , <2 x bfloat> %b ) {
14741476; SM70-LABEL: test_maximum_v2(
14751477; SM70: {
1476- ; SM70-NEXT: .reg .pred %p<9 >;
1477- ; SM70-NEXT: .reg .b16 %rs<15 >;
1478+ ; SM70-NEXT: .reg .pred %p<11 >;
1479+ ; SM70-NEXT: .reg .b16 %rs<19 >;
14781480; SM70-NEXT: .reg .b32 %r<16>;
14791481; SM70-EMPTY:
14801482; SM70-NEXT: // %bb.0:
14811483; SM70-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_0];
14821484; SM70-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_1];
14831485; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r2;
1484- ; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
1485- ; SM70-NEXT: setp.eq.s16 %p1, %rs4, 0;
1486- ; SM70-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
14871486; SM70-NEXT: cvt.u32.u16 %r3, %rs2;
14881487; SM70-NEXT: shl.b32 %r4, %r3, 16;
1488+ ; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
14891489; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
14901490; SM70-NEXT: shl.b32 %r6, %r5, 16;
1491- ; SM70-NEXT: setp.gt.f32 %p2, %r6, %r4;
1492- ; SM70-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
1493- ; SM70-NEXT: setp.nan.f32 %p3, %r6, %r4;
1494- ; SM70-NEXT: selp.b16 %rs9, 0x7FC0, %rs8, %p3;
1495- ; SM70-NEXT: cvt.u32.u16 %r7, %rs9;
1491+ ; SM70-NEXT: setp.gt.f32 %p1, %r6, %r4;
1492+ ; SM70-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1493+ ; SM70-NEXT: setp.nan.f32 %p2, %r6, %r4;
1494+ ; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2;
1495+ ; SM70-NEXT: setp.eq.s16 %p3, %rs4, 0;
1496+ ; SM70-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
1497+ ; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
1498+ ; SM70-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
1499+ ; SM70-NEXT: cvt.u32.u16 %r7, %rs6;
14961500; SM70-NEXT: shl.b32 %r8, %r7, 16;
1497- ; SM70-NEXT: setp.eq.f32 %p4, %r8, 0f00000000;
1498- ; SM70-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
1499- ; SM70-NEXT: setp.eq.s16 %p5, %rs3, 0;
1500- ; SM70-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
1501+ ; SM70-NEXT: setp.eq.f32 %p5, %r8, 0f00000000;
1502+ ; SM70-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
15011503; SM70-NEXT: cvt.u32.u16 %r9, %rs1;
15021504; SM70-NEXT: shl.b32 %r10, %r9, 16;
15031505; SM70-NEXT: cvt.u32.u16 %r11, %rs3;
15041506; SM70-NEXT: shl.b32 %r12, %r11, 16;
15051507; SM70-NEXT: setp.gt.f32 %p6, %r12, %r10;
1506- ; SM70-NEXT: selp.b16 %rs12 , %rs3, %rs1, %p6;
1508+ ; SM70-NEXT: selp.b16 %rs14 , %rs3, %rs1, %p6;
15071509; SM70-NEXT: setp.nan.f32 %p7, %r12, %r10;
1508- ; SM70-NEXT: selp.b16 %rs13, 0x7FC0, %rs12, %p7;
1509- ; SM70-NEXT: cvt.u32.u16 %r13, %rs13;
1510+ ; SM70-NEXT: selp.b16 %rs15, 0x7FC0, %rs14, %p7;
1511+ ; SM70-NEXT: setp.eq.s16 %p8, %rs3, 0;
1512+ ; SM70-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
1513+ ; SM70-NEXT: setp.eq.s16 %p9, %rs1, 0;
1514+ ; SM70-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
1515+ ; SM70-NEXT: cvt.u32.u16 %r13, %rs15;
15101516; SM70-NEXT: shl.b32 %r14, %r13, 16;
1511- ; SM70-NEXT: setp.eq.f32 %p8 , %r14, 0f00000000;
1512- ; SM70-NEXT: selp.b16 %rs14 , %rs11 , %rs13 , %p8 ;
1513- ; SM70-NEXT: mov.b32 %r15, {%rs14 , %rs10 };
1517+ ; SM70-NEXT: setp.eq.f32 %p10 , %r14, 0f00000000;
1518+ ; SM70-NEXT: selp.b16 %rs18 , %rs17 , %rs15 , %p10 ;
1519+ ; SM70-NEXT: mov.b32 %r15, {%rs18 , %rs13 };
15141520; SM70-NEXT: st.param.b32 [func_retval0], %r15;
15151521; SM70-NEXT: ret;
15161522;
0 commit comments