Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented May 19, 2025

Very similar to what we do in lowerShuffleAsVALIGN

I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero.

Noticed while working on #140516

@RKSimon RKSimon requested a review from phoebewang May 19, 2025 12:54
@llvmbot
Copy link
Member

llvmbot commented May 19, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Very similar to what we do in lowerShuffleAsVALIGN

I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero.

Noticed while working on #140516


Full diff: https://github.com/llvm/llvm-project/pull/140538.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+37-5)
  • (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll (+2-5)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3b6b0d7b86c9c..6f58910e55add 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10096,7 +10096,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
   if (Size != (int)ExpectedMask.size())
     return false;
   assert(llvm::all_of(ExpectedMask,
-                      [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
+                      [Size](int M) {
+                        return M == SM_SentinelZero || (M, 0, 2 * Size);
+                      }) &&
          "Illegal target shuffle mask");
 
   // Check for out-of-range target shuffle mask indices.
@@ -10119,6 +10121,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
     int ExpectedIdx = ExpectedMask[i];
     if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
       continue;
+    // If we failed to match an expected SM_SentinelZero then early out.
+    if (ExpectedIdx < 0)
+      return false;
     if (MaskIdx == SM_SentinelZero) {
       // If we need this expected index to be a zero element, then update the
       // relevant zero mask and perform the known bits at the end to minimize
@@ -39594,18 +39599,45 @@ static bool matchBinaryPermuteShuffle(
       ((MaskVT.is128BitVector() && Subtarget.hasVLX()) ||
        (MaskVT.is256BitVector() && Subtarget.hasVLX()) ||
        (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
+    MVT AlignVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits),
+                                   MaskVT.getSizeInBits() / EltSizeInBits);
     if (!isAnyZero(Mask)) {
       int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
       if (0 < Rotation) {
         Shuffle = X86ISD::VALIGN;
-        if (EltSizeInBits == 64)
-          ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
-        else
-          ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
+        ShuffleVT = AlignVT;
         PermuteImm = Rotation;
         return true;
       }
     }
+    // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.
+    unsigned ZeroLo = Zeroable.countr_one();
+    unsigned ZeroHi = Zeroable.countl_one();
+    assert((ZeroLo + ZeroHi) < NumMaskElts && "Zeroable shuffle detected");
+    if (ZeroLo) {
+      SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
+      std::iota(ShiftMask.begin() + ZeroLo, ShiftMask.end(), 0);
+      if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
+        V1 = V1;
+        V2 = getZeroVector(AlignVT, Subtarget, DAG, DL);
+        Shuffle = X86ISD::VALIGN;
+        ShuffleVT = AlignVT;
+        PermuteImm = NumMaskElts - ZeroLo;
+        return true;
+      }
+    }
+    if (ZeroHi) {
+      SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
+      std::iota(ShiftMask.begin(), ShiftMask.begin() + NumMaskElts - ZeroHi, ZeroHi);
+      if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
+        V2 = V1;
+        V1 = getZeroVector(AlignVT, Subtarget, DAG, DL);
+        Shuffle = X86ISD::VALIGN;
+        ShuffleVT = AlignVT;
+        PermuteImm = ZeroHi;
+        return true;
+      }
+    }
   }
 
   // Attempt to match against PALIGNR byte rotate.
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index b3b90b5f51501..68967c2ce6536 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -812,10 +812,8 @@ define <8 x i64> @combine_vpermt2var_8i64_as_valignq(<8 x i64> %x0, <8 x i64> %x
 define <8 x i64> @combine_vpermt2var_8i64_as_valignq_zero(<8 x i64> %x0) {
 ; CHECK-LABEL: combine_vpermt2var_8i64_as_valignq_zero:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [15,0,1,2,3,4,5,6]
 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
-; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT:    valignq {{.*#+}} zmm0 = zmm0[7],zmm1[0,1,2,3,4,5,6]
 ; CHECK-NEXT:    ret{{[l|q]}}
   %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 15, i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6>, <8 x i64> zeroinitializer, <8 x i64> %x0, i8 -1)
   ret <8 x i64> %res0
@@ -825,8 +823,7 @@ define <8 x i64> @combine_vpermt2var_8i64_as_zero_valignq(<8 x i64> %x0) {
 ; CHECK-LABEL: combine_vpermt2var_8i64_as_zero_valignq:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [15,0,1,2,3,4,5,6]
-; CHECK-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    valignq {{.*#+}} zmm0 = zmm1[7],zmm0[0,1,2,3,4,5,6]
 ; CHECK-NEXT:    ret{{[l|q]}}
   %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 15, i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6>, <8 x i64> %x0, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res0

@github-actions
Copy link

github-actions bot commented May 19, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@RKSimon RKSimon force-pushed the x86-avx512f-element-shift branch from 8676e30 to 0e1f9fb Compare May 19, 2025 12:58
…DQ patterns using VALIGN

Very similar to what we do in lowerShuffleAsVALIGN

I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero.

Noticed while working on llvm#140516
@RKSimon RKSimon force-pushed the x86-avx512f-element-shift branch from 0e1f9fb to 9a82207 Compare May 19, 2025 16:46
Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@RKSimon RKSimon merged commit 09fd8f0 into llvm:main May 20, 2025
10 of 11 checks passed
@RKSimon RKSimon deleted the x86-avx512f-element-shift branch May 20, 2025 15:08
@kazutakahirata
Copy link
Contributor

@RKSimon I've landed ad80f73 to fix a warning from this PR. Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants