From e8cccef6f82db09ebfa1a9482c2ba93b610d7a49 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 6 Feb 2025 17:29:57 +0000 Subject: [PATCH] [AArch64] Add SUBHN patterns for xor variant `xor x, -1` can be treated as `sub -1, x`, add patterns for generating subhn as opposed to a not. Fixes #123999 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 91 +++++++++------------ llvm/test/CodeGen/AArch64/arm64-vadd.ll | 18 ++-- 2 files changed, 46 insertions(+), 63 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ce0c260b78410..658ac7490eb33 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6630,60 +6630,43 @@ defm : Neon_addl_extract_patterns; // CodeGen patterns for addhn and subhn instructions, which can actually be // written in LLVM IR without too much difficulty. -// Prioritize ADDHN and SUBHN over UZP2. -let AddedComplexity = 10 in { - -// ADDHN -def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), - (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 16))))), - (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 32))))), - (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 8))))), - (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 16))))), - (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), - (i32 32))))), - (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - -// SUBHN -def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), - (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 16))))), - (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 32))))), - (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 8))))), - (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 16))))), - (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), - (i32 32))))), - (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - -} // AddedComplexity = 10 +multiclass AddSubHNPatterns { + def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 Shift))))), + (ADDHN V128:$Rn, V128:$Rm)>; + let AddedComplexity = 10 in + def : Pat<(concat_vectors (VT64 V64:$Rd), + (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 Shift))))), + (ADDHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; + + def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 Shift))))), + (SUBHN V128:$Rn, V128:$Rm)>; + let AddedComplexity = 10 in + def : Pat<(concat_vectors (VT64 V64:$Rd), + (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 Shift))))), + (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; + + // xor by -1 can also be treated as sub + def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm), (i32 Shift))))), + (SUBHN V128:$Rm, V128:$Rn)>; + let AddedComplexity = 10 in + def : Pat<(concat_vectors (VT64 V64:$Rd), + (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm), + (i32 Shift))))), + (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rm, V128:$Rn)>; +} + +defm : AddSubHNPatterns; +defm : AddSubHNPatterns; +defm : AddSubHNPatterns; //---------------------------------------------------------------------------- // AdvSIMD bitwise extract from vector instruction. diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll index c893138cf7a8c..d982dbbb1f69b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll @@ -1521,9 +1521,9 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { define <16 x i8> @neg_narrow_i8(<16 x i16> %a) { ; CHECK-SD-LABEL: neg_narrow_i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mvn v1.16b, v1.16b -; CHECK-SD-NEXT: mvn v0.16b, v0.16b -; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h +; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neg_narrow_i8: @@ -1542,9 +1542,9 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) { define <8 x i16> @neg_narrow_i16(<8 x i32> %a) { ; CHECK-SD-LABEL: neg_narrow_i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mvn v1.16b, v1.16b -; CHECK-SD-NEXT: mvn v0.16b, v0.16b -; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s +; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neg_narrow_i16: @@ -1563,9 +1563,9 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) { define <4 x i32> @neg_narrow_i32(<4 x i64> %a) { ; CHECK-SD-LABEL: neg_narrow_i32: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mvn v1.16b, v1.16b -; CHECK-SD-NEXT: mvn v0.16b, v0.16b -; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d +; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neg_narrow_i32: