@@ -1372,11 +1372,6 @@ def BREV64 :
13721372// restriction in PTX?
13731373//
13741374// dest and src may be int32 or int64, but start and end are always int32.
1375- def SDTBFE :
1376- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
1377- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
1378- def bfe : SDNode<"NVPTXISD::BFE", SDTBFE>;
1379-
13801375def SDTBFI :
13811376 SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
13821377 SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
@@ -1387,22 +1382,13 @@ def SDTPRMT :
13871382 SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
13881383def prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>;
13891384
1390- multiclass BFE<string Instr, ValueType T, RegisterClass RC> {
1385+ multiclass BFE<string Instr, RegisterClass RC> {
13911386 def rrr
1392- : BasicNVPTXInst<(outs RC:$d),
1393- (ins RC:$a, B32:$b, B32:$c),
1394- Instr,
1395- [(set T:$d, (bfe T:$a, i32:$b, i32:$c))]>;
1387+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, B32:$b, B32:$c), Instr>;
13961388 def rri
1397- : BasicNVPTXInst<(outs RC:$d),
1398- (ins RC:$a, B32:$b, i32imm:$c),
1399- Instr,
1400- [(set T:$d, (bfe T:$a, i32:$b, imm:$c))]>;
1389+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, B32:$b, i32imm:$c), Instr>;
14011390 def rii
1402- : BasicNVPTXInst<(outs RC:$d),
1403- (ins RC:$a, i32imm:$b, i32imm:$c),
1404- Instr,
1405- [(set T:$d, (bfe T:$a, imm:$b, imm:$c))]>;
1391+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, i32imm:$b, i32imm:$c), Instr>;
14061392}
14071393
14081394multiclass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
@@ -1447,10 +1433,10 @@ let hasSideEffects = false in {
14471433 // the same patterns, so the first one wins. Having unsigned byte extraction
14481434 // has the benefit of always having zero in unused bits, which makes some
14491435 // optimizations easier (e.g. no need to mask them).
1450- defm BFE_U32 : BFE<"bfe.u32", i32, B32>;
1451- defm BFE_S32 : BFE<"bfe.s32", i32, B32>;
1452- defm BFE_U64 : BFE<"bfe.u64", i64, B64>;
1453- defm BFE_S64 : BFE<"bfe.s64", i64, B64>;
1436+ defm BFE_U32 : BFE<"bfe.u32", B32>;
1437+ defm BFE_S32 : BFE<"bfe.s32", B32>;
1438+ defm BFE_U64 : BFE<"bfe.u64", B64>;
1439+ defm BFE_S64 : BFE<"bfe.s64", B64>;
14541440
14551441 defm BFI_B32 : BFI<"bfi.b32", i32, B32, i32imm>;
14561442 defm BFI_B64 : BFI<"bfi.b64", i64, B64, i64imm>;
@@ -1487,19 +1473,26 @@ def : Pat<(fshr i32:$hi, i32:$lo, (shl i32:$amt, (i32 3))),
14871473 (PRMT_B32rrr $lo, $hi, $amt, PrmtF4E)>;
14881474
14891475
1476+ def byte_extract_prmt : ImmLeaf<i32, [{
1477+ return (Imm == 0x7770) || (Imm == 0x7771) || (Imm == 0x7772) || (Imm == 0x7773);
1478+ }]>;
1479+
1480+ def to_sign_extend_selector : SDNodeXForm<imm, [{
1481+ const APInt &V = N->getAPIntValue();
1482+ const APInt B = V.trunc(4);
1483+ const APInt BSext = B | 8;
1484+ const APInt R = BSext.concat(BSext).concat(BSext).concat(B).zext(32);
1485+ return CurDAG->getTargetConstant(R, SDLoc(N), MVT::i32);
1486+ }]>;
1487+
1488+
14901489// byte extraction + signed/unsigned extension to i32.
1491- def : Pat<(i32 (sext_inreg (bfe i32:$s, i32:$o, 8), i8)),
1492- (BFE_S32rri $s, $o, 8)>;
1493- def : Pat<(i32 (sext_inreg (bfe i32:$s, imm:$o, 8), i8)),
1494- (BFE_S32rii $s, imm:$o, 8)>;
1495- def : Pat<(i32 (and (bfe i32:$s, i32:$o, 8), 255)),
1496- (BFE_U32rri $s, $o, 8)>;
1497- def : Pat<(i32 (and (bfe i32:$s, imm:$o, 8), 255)),
1498- (BFE_U32rii $s, imm:$o, 8)>;
1490+ def : Pat<(i32 (sext_inreg (prmt i32:$s, 0, byte_extract_prmt:$sel, PrmtNONE), i8)),
1491+ (PRMT_B32rii $s, 0, (to_sign_extend_selector $sel), PrmtNONE)>;
14991492
15001493// byte extraction + signed extension to i16
1501- def : Pat<(i16 (sext_inreg (trunc (bfe i32:$s, imm:$o, 8 )), i8)),
1502- (CVT_s8_s32 (BFE_S32rii $s, imm:$o, 8 ), CvtNONE)>;
1494+ def : Pat<(i16 (sext_inreg (trunc (prmt i32:$s, 0, byte_extract_prmt:$sel, PrmtNONE )), i8)),
1495+ (CVT_u16_u32 (PRMT_B32rii $s, 0, (to_sign_extend_selector $sel), PrmtNONE ), CvtNONE)>;
15031496
15041497
15051498// Byte extraction via shift/trunc/sext
@@ -1709,28 +1702,36 @@ def cond_not_signed : PatLeaf<(cond), [{
17091702 return !isSignedIntSetCC(N->get());
17101703}]>;
17111704
1712- // comparisons of i8 extracted with BFE as i32
1713- // It's faster to do comparison directly on i32 extracted by BFE ,
1705+ // comparisons of i8 extracted with PRMT as i32
1706+ // It's faster to do comparison directly on i32 extracted by PRMT ,
17141707// instead of the long conversion and sign extending.
1715- def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (bfe B32 :$a, B32:$oa, 8 ))), i8)),
1716- (i16 (sext_inreg (i16 (trunc (bfe B32 :$b, B32:$ob, 8 ))), i8)),
1708+ def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE ))), i8)),
1709+ (i16 (sext_inreg (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE ))), i8)),
17171710 cond_signed:$cc),
1718- (SETP_i32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), (cond2cc $cc))>;
1711+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1712+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1713+ (cond2cc $cc))>;
17191714
1720- def: Pat<(setcc (i16 (sext_inreg (trunc (bfe B32 :$a, imm:$oa, 8 )), i8)),
1721- (i16 (sext_inreg (trunc (bfe B32 :$b, imm:$ob, 8 )), i8)),
1715+ def: Pat<(setcc (i16 (sext_inreg (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE )), i8)),
1716+ (i16 (sext_inreg (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE )), i8)),
17221717 cond_signed:$cc),
1723- (SETP_i32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), (cond2cc $cc))>;
1718+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1719+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1720+ (cond2cc $cc))>;
17241721
1725- def: Pat<(setcc (i16 (and ( trunc (bfe B32 :$a, B32:$oa, 8)), 255 )),
1726- (i16 (and ( trunc (bfe B32 :$b, B32:$ob, 8)), 255 )),
1722+ def: Pat<(setcc (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE) )),
1723+ (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE) )),
17271724 cond_signed:$cc),
1728- (SETP_i32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), (cond2cc $cc))>;
1725+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1726+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1727+ (cond2cc $cc))>;
17291728
1730- def: Pat<(setcc (i16 (and ( trunc (bfe B32 :$a, imm:$oa, 8)), 255 )),
1731- (i16 (and ( trunc (bfe B32 :$b, imm:$ob, 8)), 255 )),
1729+ def: Pat<(setcc (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE) )),
1730+ (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE) )),
17321731 cond_not_signed:$cc),
1733- (SETP_i32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), (cond2cc $cc))>;
1732+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1733+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1734+ (cond2cc $cc))>;
17341735
17351736def SDTDeclareArrayParam :
17361737 SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
0 commit comments