From 73137e420704b09fb45ed4f0cb8206539f0a6b33 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 6 Feb 2025 11:22:54 +0000 Subject: [PATCH 1/2] [AArch64] Add test cases for IsUndefDeInterleave. NFC --- llvm/test/CodeGen/AArch64/zext-shuffle.ll | 142 ++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/zext-shuffle.ll b/llvm/test/CodeGen/AArch64/zext-shuffle.ll index 2965996ddcb02..b4346c7132be6 100644 --- a/llvm/test/CodeGen/AArch64/zext-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/zext-shuffle.ll @@ -543,3 +543,145 @@ define <8 x double> @uitofp_load_fadd(ptr %p) { ret <8 x double> %c } +define <4 x i32> @isUndefDeInterleave_b0(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: isUndefDeInterleave_b0: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_b1(<8 x i16> %a) { +; CHECK-LABEL: isUndefDeInterleave_b1: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_b2(<8 x i16> %a) { +; CHECK-LABEL: isUndefDeInterleave_b2: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_b3(<8 x i16> %a) { +; CHECK-LABEL: isUndefDeInterleave_b3: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_t0(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: isUndefDeInterleave_t0: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_t1(<8 x i16> %a) { +; CHECK-LABEL: isUndefDeInterleave_t1: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_t2(<8 x i16> %a) { +; CHECK-LABEL: isUndefDeInterleave_t2: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_t3(<8 x i16> %a) { +; CHECK-LABEL: isUndefDeInterleave_t3: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: isUndefDeInterleave_b0_bad: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) { +; CHECK-LABEL: isUndefDeInterleave_t1_bad: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #16 +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> + %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> + %3 = zext <4 x i16> %s2 to <4 x i32> + ret <4 x i32> %3 +} + +define i16 @undeftop(<8 x i16> %0) { +; CHECK-LABEL: undeftop: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: add v0.4s, v0.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: ret + %2 = shufflevector <8 x i16> %0, <8 x i16> zeroinitializer, <8 x i32> + %3 = zext <8 x i16> %2 to <8 x i64> + %new0 = add <8 x i64> %3, %3 + %last = trunc <8 x i64> %new0 to <8 x i16> + %4 = extractelement <8 x i16> %last, i32 0 + ret i16 %4 +} From da719496d639b008c7da3bb7ba32511281cd5a27 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 6 Feb 2025 14:00:55 +0000 Subject: [PATCH 2/2] [AArch64] Fix op mask detection in performZExtDeinterleaveShuffleCombine Given a zext from an extract vector, with a shuffle mask like <4, 0, 0, 4> we would previously recognize the top half as a deinterleave. In order to convert into a uzp we should have been checking that the bottom half is also undef. Fixes #125989 --- .../Target/AArch64/AArch64ISelLowering.cpp | 3 +++ llvm/test/CodeGen/AArch64/zext-shuffle.ll | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8617377ffc55b..b3584ddf5b6ca 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22332,6 +22332,9 @@ static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N, if (!IsDeInterleave) IsUndefDeInterleave = Shuffle->getOperand(1).isUndef() && + all_of( + Shuffle->getMask().slice(ExtOffset, VT.getVectorNumElements() / 2), + [](int M) { return M < 0; }) && ShuffleVectorInst::isDeInterleaveMaskOfFactor( Shuffle->getMask().slice(ExtOffset + VT.getVectorNumElements() / 2, VT.getVectorNumElements() / 2), diff --git a/llvm/test/CodeGen/AArch64/zext-shuffle.ll b/llvm/test/CodeGen/AArch64/zext-shuffle.ll index b4346c7132be6..20d2071d7fe54 100644 --- a/llvm/test/CodeGen/AArch64/zext-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/zext-shuffle.ll @@ -646,9 +646,10 @@ define <4 x i32> @isUndefDeInterleave_t3(<8 x i16> %a) { define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: isUndefDeInterleave_b0_bad: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: adrp x8, .LCPI40_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI40_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ret %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> @@ -659,8 +660,10 @@ define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) { ; CHECK-LABEL: isUndefDeInterleave_t1_bad: ; CHECK: // %bb.0: -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: ushr v0.4s, v0.4s, #16 +; CHECK-NEXT: adrp x8, .LCPI41_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI41_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-NEXT: ret %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> @@ -671,10 +674,8 @@ define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) { define i16 @undeftop(<8 x i16> %0) { ; CHECK-LABEL: undeftop: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: add v0.4s, v0.4s, v0.4s +; CHECK-NEXT: dup v0.8h, v0.h[4] +; CHECK-NEXT: uaddl v0.4s, v0.4h, v0.4h ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: ret