@@ -1359,11 +1359,6 @@ def BREV64 :
13591359// restriction in PTX?
13601360//
13611361// dest and src may be int32 or int64, but start and end are always int32.
1362- def SDTBFE :
1363- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
1364- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
1365- def bfe : SDNode<"NVPTXISD::BFE", SDTBFE>;
1366-
13671362def SDTBFI :
13681363 SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
13691364 SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
@@ -1374,22 +1369,13 @@ def SDTPRMT :
13741369 SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
13751370def prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>;
13761371
1377- multiclass BFE<string Instr, ValueType T, RegisterClass RC> {
1372+ multiclass BFE<string Instr, RegisterClass RC> {
13781373 def rrr
1379- : BasicNVPTXInst<(outs RC:$d),
1380- (ins RC:$a, B32:$b, B32:$c),
1381- Instr,
1382- [(set T:$d, (bfe T:$a, i32:$b, i32:$c))]>;
1374+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, B32:$b, B32:$c), Instr>;
13831375 def rri
1384- : BasicNVPTXInst<(outs RC:$d),
1385- (ins RC:$a, B32:$b, i32imm:$c),
1386- Instr,
1387- [(set T:$d, (bfe T:$a, i32:$b, imm:$c))]>;
1376+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, B32:$b, i32imm:$c), Instr>;
13881377 def rii
1389- : BasicNVPTXInst<(outs RC:$d),
1390- (ins RC:$a, i32imm:$b, i32imm:$c),
1391- Instr,
1392- [(set T:$d, (bfe T:$a, imm:$b, imm:$c))]>;
1378+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, i32imm:$b, i32imm:$c), Instr>;
13931379}
13941380
13951381multiclass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
@@ -1434,10 +1420,10 @@ let hasSideEffects = false in {
14341420 // the same patterns, so the first one wins. Having unsigned byte extraction
14351421 // has the benefit of always having zero in unused bits, which makes some
14361422 // optimizations easier (e.g. no need to mask them).
1437- defm BFE_U32 : BFE<"bfe.u32", i32, B32>;
1438- defm BFE_S32 : BFE<"bfe.s32", i32, B32>;
1439- defm BFE_U64 : BFE<"bfe.u64", i64, B64>;
1440- defm BFE_S64 : BFE<"bfe.s64", i64, B64>;
1423+ defm BFE_U32 : BFE<"bfe.u32", B32>;
1424+ defm BFE_S32 : BFE<"bfe.s32", B32>;
1425+ defm BFE_U64 : BFE<"bfe.u64", B64>;
1426+ defm BFE_S64 : BFE<"bfe.s64", B64>;
14411427
14421428 defm BFI_B32 : BFI<"bfi.b32", i32, B32, i32imm>;
14431429 defm BFI_B64 : BFI<"bfi.b64", i64, B64, i64imm>;
@@ -1474,19 +1460,26 @@ def : Pat<(fshr i32:$hi, i32:$lo, (shl i32:$amt, (i32 3))),
14741460 (PRMT_B32rrr $lo, $hi, $amt, PrmtF4E)>;
14751461
14761462
1463+ def byte_extract_prmt : ImmLeaf<i32, [{
1464+ return (Imm == 0x7770) || (Imm == 0x7771) || (Imm == 0x7772) || (Imm == 0x7773);
1465+ }]>;
1466+
1467+ def to_sign_extend_selector : SDNodeXForm<imm, [{
1468+ const APInt &V = N->getAPIntValue();
1469+ const APInt B = V.trunc(4);
1470+ const APInt BSext = B | 8;
1471+ const APInt R = BSext.concat(BSext).concat(BSext).concat(B).zext(32);
1472+ return CurDAG->getTargetConstant(R, SDLoc(N), MVT::i32);
1473+ }]>;
1474+
1475+
14771476// byte extraction + signed/unsigned extension to i32.
1478- def : Pat<(i32 (sext_inreg (bfe i32:$s, i32:$o, 8), i8)),
1479- (BFE_S32rri $s, $o, 8)>;
1480- def : Pat<(i32 (sext_inreg (bfe i32:$s, imm:$o, 8), i8)),
1481- (BFE_S32rii $s, imm:$o, 8)>;
1482- def : Pat<(i32 (and (bfe i32:$s, i32:$o, 8), 255)),
1483- (BFE_U32rri $s, $o, 8)>;
1484- def : Pat<(i32 (and (bfe i32:$s, imm:$o, 8), 255)),
1485- (BFE_U32rii $s, imm:$o, 8)>;
1477+ def : Pat<(i32 (sext_inreg (prmt i32:$s, 0, byte_extract_prmt:$sel, PrmtNONE), i8)),
1478+ (PRMT_B32rii $s, 0, (to_sign_extend_selector $sel), PrmtNONE)>;
14861479
14871480// byte extraction + signed extension to i16
1488- def : Pat<(i16 (sext_inreg (trunc (bfe i32:$s, imm:$o, 8 )), i8)),
1489- (CVT_s8_s32 (BFE_S32rii $s, imm:$o, 8 ), CvtNONE)>;
1481+ def : Pat<(i16 (sext_inreg (trunc (prmt i32:$s, 0, byte_extract_prmt:$sel, PrmtNONE )), i8)),
1482+ (CVT_u16_u32 (PRMT_B32rii $s, 0, (to_sign_extend_selector $sel), PrmtNONE ), CvtNONE)>;
14901483
14911484
14921485// Byte extraction via shift/trunc/sext
@@ -1699,25 +1692,33 @@ def cond_not_signed : PatLeaf<(cond), [{
16991692// comparisons of i8 extracted with BFE as i32
17001693// It's faster to do comparison directly on i32 extracted by BFE,
17011694// instead of the long conversion and sign extending.
1702- def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (bfe B32 :$a, B32:$oa, 8 ))), i8)),
1703- (i16 (sext_inreg (i16 (trunc (bfe B32 :$b, B32:$ob, 8 ))), i8)),
1695+ def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE ))), i8)),
1696+ (i16 (sext_inreg (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE ))), i8)),
17041697 cond_signed:$cc),
1705- (SETP_i32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), (cond2cc $cc))>;
1698+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1699+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1700+ (cond2cc $cc))>;
17061701
1707- def: Pat<(setcc (i16 (sext_inreg (trunc (bfe B32 :$a, imm:$oa, 8 )), i8)),
1708- (i16 (sext_inreg (trunc (bfe B32 :$b, imm:$ob, 8 )), i8)),
1702+ def: Pat<(setcc (i16 (sext_inreg (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE )), i8)),
1703+ (i16 (sext_inreg (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE )), i8)),
17091704 cond_signed:$cc),
1710- (SETP_i32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), (cond2cc $cc))>;
1705+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1706+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1707+ (cond2cc $cc))>;
17111708
1712- def: Pat<(setcc (i16 (and ( trunc (bfe B32 :$a, B32:$oa, 8)), 255 )),
1713- (i16 (and ( trunc (bfe B32 :$b, B32:$ob, 8)), 255 )),
1709+ def: Pat<(setcc (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE) )),
1710+ (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE) )),
17141711 cond_signed:$cc),
1715- (SETP_i32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), (cond2cc $cc))>;
1712+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1713+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1714+ (cond2cc $cc))>;
17161715
1717- def: Pat<(setcc (i16 (and ( trunc (bfe B32 :$a, imm:$oa, 8)), 255 )),
1718- (i16 (and ( trunc (bfe B32 :$b, imm:$ob, 8)), 255 )),
1716+ def: Pat<(setcc (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE) )),
1717+ (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE) )),
17191718 cond_not_signed:$cc),
1720- (SETP_i32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), (cond2cc $cc))>;
1719+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1720+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1721+ (cond2cc $cc))>;
17211722
17221723def SDTDeclareArrayParam :
17231724 SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
0 commit comments