@@ -6630,60 +6630,43 @@ defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
66306630// CodeGen patterns for addhn and subhn instructions, which can actually be
66316631// written in LLVM IR without too much difficulty.
66326632
6633- // Prioritize ADDHN and SUBHN over UZP2.
6634- let AddedComplexity = 10 in {
6635-
6636- // ADDHN
6637- def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
6638- (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6639- def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6640- (i32 16))))),
6641- (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6642- def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6643- (i32 32))))),
6644- (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6645- def : Pat<(concat_vectors (v8i8 V64:$Rd),
6646- (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6647- (i32 8))))),
6648- (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6649- V128:$Rn, V128:$Rm)>;
6650- def : Pat<(concat_vectors (v4i16 V64:$Rd),
6651- (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6652- (i32 16))))),
6653- (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6654- V128:$Rn, V128:$Rm)>;
6655- def : Pat<(concat_vectors (v2i32 V64:$Rd),
6656- (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6657- (i32 32))))),
6658- (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6659- V128:$Rn, V128:$Rm)>;
6660-
6661- // SUBHN
6662- def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
6663- (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6664- def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6665- (i32 16))))),
6666- (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6667- def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6668- (i32 32))))),
6669- (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6670- def : Pat<(concat_vectors (v8i8 V64:$Rd),
6671- (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6672- (i32 8))))),
6673- (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6674- V128:$Rn, V128:$Rm)>;
6675- def : Pat<(concat_vectors (v4i16 V64:$Rd),
6676- (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6677- (i32 16))))),
6678- (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6679- V128:$Rn, V128:$Rm)>;
6680- def : Pat<(concat_vectors (v2i32 V64:$Rd),
6681- (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6682- (i32 32))))),
6683- (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6684- V128:$Rn, V128:$Rm)>;
6685-
6686- } // AddedComplexity = 10
6633+ multiclass AddSubHNPatterns<Instruction ADDHN, Instruction ADDHN2, Instruction SUBHN,
6634+ Instruction SUBHN2, ValueType VT64, ValueType VT128, int Shift> {
6635+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 Shift))))),
6636+ (ADDHN V128:$Rn, V128:$Rm)>;
6637+ let AddedComplexity = 10 in
6638+ def : Pat<(concat_vectors (VT64 V64:$Rd),
6639+ (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6640+ (i32 Shift))))),
6641+ (ADDHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
6642+
6643+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 Shift))))),
6644+ (SUBHN V128:$Rn, V128:$Rm)>;
6645+ let AddedComplexity = 10 in
6646+ def : Pat<(concat_vectors (VT64 V64:$Rd),
6647+ (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6648+ (i32 Shift))))),
6649+ (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
6650+
6651+ // xor by -1 can also be treated as sub
6652+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm), (i32 Shift))))),
6653+ (SUBHN V128:$Rm, V128:$Rn)>;
6654+ let AddedComplexity = 10 in
6655+ def : Pat<(concat_vectors (VT64 V64:$Rd),
6656+ (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm),
6657+ (i32 Shift))))),
6658+ (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rm, V128:$Rn)>;
6659+ }
6660+
6661+ defm : AddSubHNPatterns<ADDHNv8i16_v8i8, ADDHNv8i16_v16i8,
6662+ SUBHNv8i16_v8i8, SUBHNv8i16_v16i8,
6663+ v8i8, v8i16, 8>;
6664+ defm : AddSubHNPatterns<ADDHNv4i32_v4i16, ADDHNv4i32_v8i16,
6665+ SUBHNv4i32_v4i16, SUBHNv4i32_v8i16,
6666+ v4i16, v4i32, 16>;
6667+ defm : AddSubHNPatterns<ADDHNv2i64_v2i32, ADDHNv2i64_v4i32,
6668+ SUBHNv2i64_v2i32, SUBHNv2i64_v4i32,
6669+ v2i32, v2i64, 32>;
66876670
66886671//----------------------------------------------------------------------------
66896672// AdvSIMD bitwise extract from vector instruction.
0 commit comments