@@ -1301,9 +1301,14 @@ def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32>;
1301
1301
def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32>;
1302
1302
def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64>;
1303
1303
1304
- def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>;
1305
- def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>;
1306
- def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>;
1304
+ // For dags like (or (and (not _), _), (shl _, _)) where the "or" with
1305
+ // one argument matches the patterns below, and with the other argument
1306
+ // matches S2_asl_r_r_or, etc, prefer the patterns below.
1307
+ let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor.
1308
+ def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>;
1309
+ def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>;
1310
+ def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>;
1311
+ }
1307
1312
1308
1313
// S4_addaddi and S4_subaddi don't have tied operands, so give them
1309
1314
// a bit of preference.
@@ -1418,10 +1423,18 @@ def : Pat <(mulhs I64:$Rss, I64:$Rtt),
1418
1423
(A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
1419
1424
(A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;
1420
1425
1421
- def: Pat<(add (Su<Mul> I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6),
1422
- (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
1423
- def: Pat<(add (Su<Mul> I32:$Rs, I32:$Rt), anyimm:$u6),
1424
- (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
1426
+ // Prefer these instructions over M2_macsip/M2_macsin: the macsi* instructions
1427
+ // will put the immediate addend into a register, while these instructions will
1428
+ // use it directly. Such a construct does not appear in the middle of a gep,
1429
+ // where M2_macsip would be preferable.
1430
+ let AddedComplexity = 20 in {
1431
+ def: Pat<(add (Su<Mul> I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6),
1432
+ (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
1433
+ def: Pat<(add (Su<Mul> I32:$Rs, I32:$Rt), anyimm:$u6),
1434
+ (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
1435
+ }
1436
+
1437
+ // Keep these instructions less preferable to M2_macsip/M2_macsin.
1425
1438
def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, u6_2ImmPred:$u6_2)),
1426
1439
(M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>;
1427
1440
def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, anyimm:$u6)),
@@ -1599,6 +1612,16 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
1599
1612
def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
1600
1613
(C4_nbitsset I32:$Rs, I32:$Rt)>;
1601
1614
1615
+ // Special patterns to address certain cases where the "top-down" matching
1616
+ // algorithm would cause suboptimal selection.
1617
+
1618
+ let AddedComplexity = 100 in {
1619
+ // Avoid A4_rcmp[n]eqi in these cases:
1620
+ def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
1621
+ (I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
1622
+ def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
1623
+ (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
1624
+ }
1602
1625
1603
1626
// --(11) Load -----------------------------------------------------------
1604
1627
//
0 commit comments