Revert "[VectorCombine] Shrink loads used in shufflevector rebroadcasts. (#153138)" #153403

PeddleSpam · 2025-08-13T12:33:42Z

This reverts commit 9115bef.

…ts. (llvm#153138)" This reverts commit 9115bef.

llvmbot · 2025-08-13T12:34:16Z

@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-clang

Author: Leon Clark (PeddleSpam)

Changes

This reverts commit 9115bef.

Patch is 39.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153403.diff

7 Files Affected:

(modified) clang/test/CodeGenOpenCL/preserve_vec3.cl (+11-11)
(modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (-135)
(modified) llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll (+4-4)
(modified) llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll (+4-2)
(modified) llvm/test/Transforms/VectorCombine/X86/load-widening.ll (+2-2)
(modified) llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll (+15-9)
(removed) llvm/test/Transforms/VectorCombine/load-shufflevector.ll (-404)

diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index e73657e30d884..49ebae6fc7013 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -11,8 +11,8 @@ typedef float float4 __attribute__((ext_vector_type(4)));
 // CHECK-LABEL: define dso_local spir_kernel void @foo(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
@@ -23,8 +23,8 @@ void kernel foo(global float3 *a, global float3 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3(
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -35,8 +35,8 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -47,9 +47,9 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
 void kernel float3_to_double2(global float3 *a, global double2 *b) {
@@ -59,8 +59,8 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3(
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index c45005db6016b..c43505432367c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,7 +16,6 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -30,16 +29,13 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <numeric>
-#include <optional>
 #include <queue>
 #include <set>
-#include <tuple>
 
 #define DEBUG_TYPE "vector-combine"
 #include "llvm/Transforms/Utils/InstructionWorklist.h"
@@ -141,7 +137,6 @@ class VectorCombine {
   bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
   bool foldInterleaveIntrinsics(Instruction &I);
   bool shrinkType(Instruction &I);
-  bool shrinkLoadForShuffles(Instruction &I);
   bool shrinkPhiOfShuffles(Instruction &I);
 
   void replaceValue(Value &Old, Value &New) {
@@ -3868,133 +3863,6 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
   return true;
 }
 
-// Attempt to shrink loads that are only used by shufflevector instructions.
-bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
-  auto *OldLoad = dyn_cast<LoadInst>(&I);
-  if (!OldLoad || !OldLoad->isSimple())
-    return false;
-
-  auto *OldLoadTy = dyn_cast<FixedVectorType>(OldLoad->getType());
-  if (!OldLoadTy)
-    return false;
-
-  unsigned const OldNumElements = OldLoadTy->getNumElements();
-
-  // Search all uses of load. If all uses are shufflevector instructions, and
-  // the second operands are all poison values, find the minimum and maximum
-  // indices of the vector elements referenced by all shuffle masks.
-  // Otherwise return `std::nullopt`.
-  using IndexRange = std::pair<int, int>;
-  auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
-    IndexRange OutputRange = IndexRange(OldNumElements, -1);
-    for (llvm::Use &Use : I.uses()) {
-      // Ensure all uses match the required pattern.
-      User *Shuffle = Use.getUser();
-      ArrayRef<int> Mask;
-
-      if (!match(Shuffle,
-                 m_Shuffle(m_Specific(OldLoad), m_Undef(), m_Mask(Mask))))
-        return std::nullopt;
-
-      // Ignore shufflevector instructions that have no uses.
-      if (Shuffle->use_empty())
-        continue;
-
-      // Find the min and max indices used by the shufflevector instruction.
-      for (int Index : Mask) {
-        if (Index >= 0 && Index < static_cast<int>(OldNumElements)) {
-          OutputRange.first = std::min(Index, OutputRange.first);
-          OutputRange.second = std::max(Index, OutputRange.second);
-        }
-      }
-    }
-
-    if (OutputRange.second < OutputRange.first)
-      return std::nullopt;
-
-    return OutputRange;
-  };
-
-  // Get the range of vector elements used by shufflevector instructions.
-  if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
-    unsigned const NewNumElements = Indices->second + 1u;
-
-    // If the range of vector elements is smaller than the full load, attempt
-    // to create a smaller load.
-    if (NewNumElements < OldNumElements) {
-      IRBuilder Builder(&I);
-      Builder.SetCurrentDebugLocation(I.getDebugLoc());
-
-      // Calculate costs of old and new ops.
-      Type *ElemTy = OldLoadTy->getElementType();
-      FixedVectorType *NewLoadTy = FixedVectorType::get(ElemTy, NewNumElements);
-      Value *PtrOp = OldLoad->getPointerOperand();
-
-      InstructionCost OldCost = TTI.getMemoryOpCost(
-          Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
-          OldLoad->getPointerAddressSpace(), CostKind);
-      InstructionCost NewCost =
-          TTI.getMemoryOpCost(Instruction::Load, NewLoadTy, OldLoad->getAlign(),
-                              OldLoad->getPointerAddressSpace(), CostKind);
-
-      using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
-      SmallVector<UseEntry, 4u> NewUses;
-      unsigned const MaxIndex = NewNumElements * 2u;
-
-      for (llvm::Use &Use : I.uses()) {
-        auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
-        ArrayRef<int> OldMask = Shuffle->getShuffleMask();
-
-        // Create entry for new use.
-        NewUses.push_back({Shuffle, OldMask});
-
-        // Validate mask indices.
-        for (int Index : OldMask) {
-          if (Index >= static_cast<int>(MaxIndex))
-            return false;
-        }
-
-        // Update costs.
-        OldCost +=
-            TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Shuffle->getType(),
-                               OldLoadTy, OldMask, CostKind);
-        NewCost +=
-            TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Shuffle->getType(),
-                               NewLoadTy, OldMask, CostKind);
-      }
-
-      LLVM_DEBUG(
-          dbgs() << "Found a load used only by shufflevector instructions: "
-                 << I << "\n  OldCost: " << OldCost
-                 << " vs NewCost: " << NewCost << "\n");
-
-      if (OldCost < NewCost || !NewCost.isValid())
-        return false;
-
-      // Create new load of smaller vector.
-      auto *NewLoad = cast<LoadInst>(
-          Builder.CreateAlignedLoad(NewLoadTy, PtrOp, OldLoad->getAlign()));
-      NewLoad->copyMetadata(I);
-
-      // Replace all uses.
-      for (UseEntry &Use : NewUses) {
-        ShuffleVectorInst *Shuffle = Use.first;
-        std::vector<int> &NewMask = Use.second;
-
-        Builder.SetInsertPoint(Shuffle);
-        Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc());
-        Value *NewShuffle = Builder.CreateShuffleVector(
-            NewLoad, PoisonValue::get(NewLoadTy), NewMask);
-
-        replaceValue(*Shuffle, *NewShuffle);
-      }
-
-      return true;
-    }
-  }
-  return false;
-}
-
 // Attempt to narrow a phi of shufflevector instructions where the two incoming
 // values have the same operands but different masks. If the two shuffle masks
 // are offsets of one another we can use one branch to rotate the incoming
@@ -4166,9 +4034,6 @@ bool VectorCombine::run() {
         MadeChange |= foldSelectShuffle(I);
         MadeChange |= foldShuffleToIdentity(I);
         break;
-      case Instruction::Load:
-        MadeChange |= shrinkLoadForShuffles(I);
-        break;
       case Instruction::BitCast:
         MadeChange |= foldBitcastShuffle(I);
         break;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
index 86bdd125e5e57..f6e8fcd5d1d8c 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
@@ -11,13 +11,13 @@ $getAt = comdat any
 
 define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 {
 ; SSE-LABEL: @ConvertVectors_ByRef(
-; SSE-NEXT:    [[TMP2:%.*]] = load <3 x float>, ptr [[TMP0:%.*]], align 16
-; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
 ; SSE-NEXT:    ret <4 x float> [[TMP3]]
 ;
 ; AVX-LABEL: @ConvertVectors_ByRef(
-; AVX-NEXT:    [[TMP2:%.*]] = load <3 x float>, ptr [[TMP0:%.*]], align 16
-; AVX-NEXT:    [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
+; AVX-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
 ; AVX-NEXT:    ret <4 x float> [[TMP3]]
 ;
   %2 = alloca ptr, align 8
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index 0c2346e616e36..977da754ec5a7 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -252,7 +252,8 @@ define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(ptr align 16 dereferenc
 define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %p) nofree nosync {
 ; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
-; CHECK-NEXT:    [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -340,7 +341,8 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
 define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
-; CHECK-NEXT:    [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll
index eacc40bfa9b53..30a089818074e 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll
@@ -443,8 +443,8 @@ define <8 x float> @load_v2f32_v8f32_hwasan(ptr dereferenceable(32) %p) sanitize
 
 define <4 x i32> @load_v2i32_v4i32_asan(ptr dereferenceable(16) %p) sanitize_address {
 ; CHECK-LABEL: @load_v2i32_v4i32_asan(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 1
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %l = load <2 x i32>, ptr %p, align 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
index eddfc57a7d256..b30dc9ffdc596 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
@@ -47,12 +47,21 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
 ; broadcast loads are free on AVX (and blends are much cheap than general 2-operand shuffles)
 
 define  <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1)  {
-; CHECK-LABEL: define <4 x double> @blend_broadcasts_v4f64(
-; CHECK-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x double>, ptr [[P0]], align 32
-; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x double>, ptr [[P1]], align 32
-; CHECK-NEXT:    [[BLEND:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x double> [[BLEND]]
+; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64(
+; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
+; SSE-NEXT:    [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
+; SSE-NEXT:    [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> <i32 0, i32 4, i32 4, i32 0>
+; SSE-NEXT:    ret <4 x double> [[BLEND]]
+;
+; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64(
+; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
+; AVX-NEXT:    [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
+; AVX-NEXT:    [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer
+; AVX-NEXT:    [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer
+; AVX-NEXT:    [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; AVX-NEXT:    ret <4 x double> [[BLEND]]
 ;
   %ld0 = load <4 x double>, ptr %p0, align 32
   %ld1 = load <4 x double>, ptr %p1, align 32
@@ -72,6 +81,3 @@ define <2 x float> @PR86068(<2 x float> %a0, <2 x float> %a1) {
   %s2 = shufflevector <2 x float> %s1, <2 x float> %a0, <2 x i32> <i32 0, i32 3>
   ret <2 x float> %s2
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
-; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/load-shufflevector.ll b/llvm/test/Transforms/VectorCombine/load-shufflevector.ll
deleted file mode 100644
index 7d9393ab77f20..0000000000000
--- a/llvm/test/Transforms/VectorCombine/load-shufflevector.ll
+++ /dev/null
@@ -1,404 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=vector-combine -S < %s | FileCheck %s
-
-define <8 x half> @shuffle_v4_v8f16_r0_1_volatile(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
-; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0...
[truncated]

github-actions · 2025-08-13T12:36:09Z

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:

git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Transforms/Vectorize/VectorCombine.cpp llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll llvm/test/Transforms/VectorCombine/X86/load-widening.ll llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll

The following files introduce new uses of undef:

llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

Revert "[VectorCombine] Shrink loads used in shufflevector rebroadcas…

4ad75f7

…ts. (llvm#153138)" This reverts commit 9115bef.

llvmbot added clang Clang issues not falling into any other category vectorizers llvm:transforms llvm:vectorcombine labels Aug 13, 2025

PeddleSpam closed this Aug 13, 2025

PeddleSpam deleted the revert_153138 branch August 13, 2025 13:56

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Revert "[VectorCombine] Shrink loads used in shufflevector rebroadcasts. (#153138)" #153403

Revert "[VectorCombine] Shrink loads used in shufflevector rebroadcasts. (#153138)" #153403

Uh oh!

PeddleSpam commented Aug 13, 2025

Uh oh!

llvmbot commented Aug 13, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Aug 13, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Revert "[VectorCombine] Shrink loads used in shufflevector rebroadcasts. (#153138)" #153403

Revert "[VectorCombine] Shrink loads used in shufflevector rebroadcasts. (#153138)" #153403

Uh oh!

Conversation

PeddleSpam commented Aug 13, 2025

Uh oh!

llvmbot commented Aug 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Aug 13, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

llvmbot commented Aug 13, 2025 •

edited

Loading