Skip to content

Commit 4e23101

Browse files
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (10/11) (#116836)
SVE2.2 introduces instructions with predicated forms with zeroing of the inactive lanes. This allows in some cases to save a `movprfx` or a `mov` instruction when emitting code for `_x` or `_z` variants of intrinsics. This patch adds support for emitting the zeroing forms of certain `RBIT`, `REVB`, `REVH`, `REVW`, and `REVD` instructions.
1 parent 2625510 commit 4e23101

File tree

3 files changed

+1542
-8
lines changed

3 files changed

+1542
-8
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4339,11 +4339,11 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
43394339
defm LASTP_XPP : sve_int_pcount_pred_tmp<0b010, "lastp">;
43404340

43414341
// SVE reverse within elements, zeroing predicate
4342-
defm RBIT_ZPzZ : sve_int_perm_rev_rbit_z<"rbit">;
4343-
defm REVB_ZPzZ : sve_int_perm_rev_revb_z<"revb">;
4344-
defm REVH_ZPzZ : sve_int_perm_rev_revh_z<"revh">;
4345-
def REVW_ZPzZ : sve_int_perm_rev_z<0b11, 0b0110, "revw", ZPR64>;
4346-
def REVD_ZPzZ : sve_int_perm_rev_z<0b00, 0b1110, "revd", ZPR128>;
4342+
defm RBIT_ZPzZ : sve_int_perm_rev_rbit_z<"rbit", AArch64rbit_mt>;
4343+
defm REVB_ZPzZ : sve_int_perm_rev_revb_z<"revb", AArch64revb_mt>;
4344+
defm REVH_ZPzZ : sve_int_perm_rev_revh_z<"revh", AArch64revh_mt>;
4345+
defm REVW_ZPzZ : sve_int_perm_rev_revw_z<"revw", AArch64revw_mt>;
4346+
defm REVD_ZPzZ : sve_int_perm_rev_revd_z<"revd", AArch64revd_mt>;
43474347
} // End HasSME2p2orSVE2p2
43484348

43494349
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7657,22 +7657,54 @@ class sve_int_perm_rev_z<bits<2> sz, bits<4> opc, string asm,
76577657
let hasSideEffects = 0;
76587658
}
76597659

7660-
multiclass sve_int_perm_rev_rbit_z<string asm> {
7660+
multiclass sve_int_perm_rev_rbit_z<string asm, SDPatternOperator op> {
76617661
def _B : sve_int_perm_rev_z<0b00, 0b0111, asm, ZPR8>;
76627662
def _H : sve_int_perm_rev_z<0b01, 0b0111, asm, ZPR16>;
76637663
def _S : sve_int_perm_rev_z<0b10, 0b0111, asm, ZPR32>;
76647664
def _D : sve_int_perm_rev_z<0b11, 0b0111, asm, ZPR64>;
7665+
7666+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
7667+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
7668+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
7669+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
76657670
}
76667671

7667-
multiclass sve_int_perm_rev_revb_z<string asm> {
7672+
multiclass sve_int_perm_rev_revb_z<string asm, SDPatternOperator op> {
76687673
def _H : sve_int_perm_rev_z<0b01, 0b0100, asm, ZPR16>;
76697674
def _S : sve_int_perm_rev_z<0b10, 0b0100, asm, ZPR32>;
76707675
def _D : sve_int_perm_rev_z<0b11, 0b0100, asm, ZPR64>;
7676+
7677+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
7678+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
7679+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
76717680
}
76727681

7673-
multiclass sve_int_perm_rev_revh_z<string asm> {
7682+
multiclass sve_int_perm_rev_revh_z<string asm, SDPatternOperator op> {
76747683
def _S : sve_int_perm_rev_z<0b10, 0b0101, asm, ZPR32>;
76757684
def _D : sve_int_perm_rev_z<0b11, 0b0101, asm, ZPR64>;
7685+
7686+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
7687+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
7688+
}
7689+
7690+
multiclass sve_int_perm_rev_revw_z<string asm, SDPatternOperator op> {
7691+
def _D : sve_int_perm_rev_z<0b11, 0b0110, asm, ZPR64>;
7692+
7693+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
7694+
}
7695+
7696+
multiclass sve_int_perm_rev_revd_z<string asm, SDPatternOperator op> {
7697+
def NAME : sve_int_perm_rev_z<0b00, 0b1110, asm, ZPR128>;
7698+
7699+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
7700+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>;
7701+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>;
7702+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>;
7703+
7704+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>;
7705+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME)>;
7706+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
7707+
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME)>;
76767708
}
76777709

76787710
class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,

0 commit comments

Comments
 (0)