diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl index e73657e30d884..49ebae6fc7013 100644 --- a/clang/test/CodeGenOpenCL/preserve_vec3.cl +++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl @@ -11,8 +11,8 @@ typedef float float4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define dso_local spir_kernel void @foo( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: ret void // @@ -23,8 +23,8 @@ void kernel foo(global float3 *a, global float3 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3( // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // @@ -35,8 +35,8 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // @@ -47,9 +47,9 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // void kernel float3_to_double2(global float3 *a, global double2 *b) { @@ -59,8 +59,8 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) { // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3( // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]] +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index fef0934010df4..6345b18b809a6 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -30,16 +29,13 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include -#include #include #include -#include #define DEBUG_TYPE "vector-combine" #include "llvm/Transforms/Utils/InstructionWorklist.h" @@ -141,7 +137,6 @@ class VectorCombine { bool foldSelectShuffle(Instruction &I, bool FromReduction = false); bool foldInterleaveIntrinsics(Instruction &I); bool shrinkType(Instruction &I); - bool shrinkLoadForShuffles(Instruction &I); void replaceValue(Value &Old, Value &New) { LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n'); @@ -3866,126 +3861,6 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) { return true; } -// Attempt to shrink loads that are only used by shufflevector instructions. -bool VectorCombine::shrinkLoadForShuffles(Instruction &I) { - auto *OldLoad = dyn_cast(&I); - if (!OldLoad || !OldLoad->isSimple()) - return false; - - auto *OldLoadTy = dyn_cast(OldLoad->getType()); - if (!OldLoadTy) - return false; - - unsigned const OldNumElements = OldLoadTy->getNumElements(); - - // Search all uses of load. If all uses are shufflevector instructions, and - // the second operands are all poison values, find the minimum and maximum - // indices of the vector elements referenced by all shuffle masks. - // Otherwise return `std::nullopt`. - using IndexRange = std::pair; - auto GetIndexRangeInShuffles = [&]() -> std::optional { - IndexRange OutputRange = IndexRange(OldNumElements, -1); - for (llvm::Use &Use : I.uses()) { - // Ensure all uses match the required pattern. - User *Shuffle = Use.getUser(); - ArrayRef Mask; - - if (!match(Shuffle, - m_Shuffle(m_Specific(OldLoad), m_Undef(), m_Mask(Mask)))) - return std::nullopt; - - // Ignore shufflevector instructions that have no uses. - if (Shuffle->use_empty()) - continue; - - // Find the min and max indices used by the shufflevector instruction. - for (int Index : Mask) { - if (Index >= 0 && Index < static_cast(OldNumElements)) { - OutputRange.first = std::min(Index, OutputRange.first); - OutputRange.second = std::max(Index, OutputRange.second); - } - } - } - - if (OutputRange.second < OutputRange.first) - return std::nullopt; - - return OutputRange; - }; - - // Get the range of vector elements used by shufflevector instructions. - if (std::optional Indices = GetIndexRangeInShuffles()) { - unsigned const NewNumElements = Indices->second + 1u; - - // If the range of vector elements is smaller than the full load, attempt - // to create a smaller load. - if (NewNumElements < OldNumElements) { - IRBuilder Builder(&I); - Builder.SetCurrentDebugLocation(I.getDebugLoc()); - - // Calculate costs of old and new ops. - Type *ElemTy = OldLoadTy->getElementType(); - FixedVectorType *NewLoadTy = FixedVectorType::get(ElemTy, NewNumElements); - Value *PtrOp = OldLoad->getPointerOperand(); - - InstructionCost OldCost = TTI.getMemoryOpCost( - Instruction::Load, OldLoad->getType(), OldLoad->getAlign(), - OldLoad->getPointerAddressSpace(), CostKind); - InstructionCost NewCost = - TTI.getMemoryOpCost(Instruction::Load, NewLoadTy, OldLoad->getAlign(), - OldLoad->getPointerAddressSpace(), CostKind); - - using UseEntry = std::pair>; - SmallVector NewUses; - - for (llvm::Use &Use : I.uses()) { - auto *Shuffle = cast(Use.getUser()); - ArrayRef OldMask = Shuffle->getShuffleMask(); - - // Create entry for new use. - NewUses.push_back({Shuffle, OldMask}); - - // Update costs. - OldCost += - TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Shuffle->getType(), - OldLoadTy, OldMask, CostKind); - NewCost += - TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Shuffle->getType(), - NewLoadTy, OldMask, CostKind); - } - - LLVM_DEBUG( - dbgs() << "Found a load used only by shufflevector instructions: " - << I << "\n OldCost: " << OldCost - << " vs NewCost: " << NewCost << "\n"); - - if (OldCost < NewCost || !NewCost.isValid()) - return false; - - // Create new load of smaller vector. - auto *NewLoad = cast( - Builder.CreateAlignedLoad(NewLoadTy, PtrOp, OldLoad->getAlign())); - NewLoad->copyMetadata(I); - - // Replace all uses. - for (UseEntry &Use : NewUses) { - ShuffleVectorInst *Shuffle = Use.first; - std::vector &NewMask = Use.second; - - Builder.SetInsertPoint(Shuffle); - Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc()); - Value *NewShuffle = Builder.CreateShuffleVector( - NewLoad, PoisonValue::get(NewLoadTy), NewMask); - - replaceValue(*Shuffle, *NewShuffle); - } - - return true; - } - } - return false; -} - /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. bool VectorCombine::run() { @@ -4062,9 +3937,6 @@ bool VectorCombine::run() { MadeChange |= foldSelectShuffle(I); MadeChange |= foldShuffleToIdentity(I); break; - case Instruction::Load: - MadeChange |= shrinkLoadForShuffles(I); - break; case Instruction::BitCast: MadeChange |= foldBitcastShuffle(I); break; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll index 9218cc2d019f8..85f6fceb5bdbe 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll @@ -11,13 +11,13 @@ $getAt = comdat any define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 { ; SSE-LABEL: @ConvertVectors_ByRef( -; SSE-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[TMP0:%.*]], align 16 -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16 +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; SSE-NEXT: ret <4 x float> [[TMP3]] ; ; AVX-LABEL: @ConvertVectors_ByRef( -; AVX-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[TMP0:%.*]], align 16 -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> +; AVX-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16 +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> ; AVX-NEXT: ret <4 x float> [[TMP3]] ; %2 = alloca ptr, align 8 diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll index 0c2346e616e36..977da754ec5a7 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll @@ -252,7 +252,8 @@ define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(ptr align 16 dereferenc define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %p) nofree nosync { ; CHECK-LABEL: @gep01_load_i16_insert_v8i16( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2 +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 @@ -340,7 +341,8 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 diff --git a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll index eacc40bfa9b53..30a089818074e 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll @@ -443,8 +443,8 @@ define <8 x float> @load_v2f32_v8f32_hwasan(ptr dereferenceable(32) %p) sanitize define <4 x i32> @load_v2i32_v4i32_asan(ptr dereferenceable(16) %p) sanitize_address { ; CHECK-LABEL: @load_v2i32_v4i32_asan( -; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 1 -; CHECK-NEXT: [[S:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <4 x i32> +; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %l = load <2 x i32>, ptr %p, align 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll index eddfc57a7d256..b30dc9ffdc596 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll @@ -47,12 +47,21 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) { ; broadcast loads are free on AVX (and blends are much cheap than general 2-operand shuffles) define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) { -; CHECK-LABEL: define <4 x double> @blend_broadcasts_v4f64( -; CHECK-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <1 x double>, ptr [[P0]], align 32 -; CHECK-NEXT: [[TMP2:%.*]] = load <1 x double>, ptr [[P1]], align 32 -; CHECK-NEXT: [[BLEND:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP2]], <4 x i32> -; CHECK-NEXT: ret <4 x double> [[BLEND]] +; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64( +; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32 +; SSE-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32 +; SSE-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[BLEND]] +; +; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64( +; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32 +; AVX-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32 +; AVX-NEXT: [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer +; AVX-NEXT: [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer +; AVX-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> +; AVX-NEXT: ret <4 x double> [[BLEND]] ; %ld0 = load <4 x double>, ptr %p0, align 32 %ld1 = load <4 x double>, ptr %p1, align 32 @@ -72,6 +81,3 @@ define <2 x float> @PR86068(<2 x float> %a0, <2 x float> %a1) { %s2 = shufflevector <2 x float> %s1, <2 x float> %a0, <2 x i32> ret <2 x float> %s2 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX: {{.*}} -; SSE: {{.*}} diff --git a/llvm/test/Transforms/VectorCombine/load-shufflevector.ll b/llvm/test/Transforms/VectorCombine/load-shufflevector.ll deleted file mode 100644 index 467c20c5da0c2..0000000000000 --- a/llvm/test/Transforms/VectorCombine/load-shufflevector.ll +++ /dev/null @@ -1,392 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=vector-combine -S < %s | FileCheck %s - -define <8 x half> @shuffle_v4_v8f16_r0_1_volatile(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1_volatile( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load volatile <4 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <8 x i32> -; CHECK-NEXT: ret <8 x half> [[TMP1]] -; -entry: - %val0 = load volatile <4 x half>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> - ret <8 x half> %val1 -} - -define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> -; CHECK-NEXT: ret <8 x half> [[TMP1]] -; -entry: - %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> - ret <8 x half> %val1 -} - -define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> -; CHECK-NEXT: ret <8 x half> [[TMP1]] -; -entry: - %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> - ret <8 x half> %val1 -} - -define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> -; CHECK-NEXT: ret <4 x half> [[TMP1]] -; -entry: - %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> - ret <4 x half> %val1 -} - -define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> -; CHECK-NEXT: ret <8 x half> [[TMP1]] -; -entry: - %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> - ret <8 x half> %val1 -} - -define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { -; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: br label %[[FINALLY:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY]] -; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] -; CHECK-NEXT: ret <8 x half> [[VAL3]] -; -entry: - %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 - br i1 %cond, label %then, label %else - -then: - %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> - br label %finally - -else: - %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> - br label %finally - -finally: - %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ] - ret <8 x half> %val3 -} - -define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { -; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> -; CHECK-NEXT: br label %[[FINALLY:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> -; CHECK-NEXT: br label %[[FINALLY]] -; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] -; CHECK-NEXT: ret <4 x half> [[VAL3]] -; -entry: - %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 - br i1 %cond, label %then, label %else - -then: - %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> - br label %finally - -else: - %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> - br label %finally - -finally: - %val3 = phi <4 x half> [ %val1, %then ], [ %val2, %else ] - ret <4 x half> %val3 -} - -define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { -; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY]] -; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] -; CHECK-NEXT: ret <8 x half> [[VAL3]] -; -entry: - %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 - br i1 %cond, label %then, label %else - -then: - %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> - br label %finally - -else: - %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> - br label %finally - -finally: - %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ] - ret <8 x half> %val3 -} - -define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - ret <8 x i32> %val1 -} - -define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - ret <8 x i32> %val1 -} - -define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[TMP1]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> - ret <4 x i32> %val1 -} - -define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - ret <8 x i32> %val1 -} - -define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { -; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: br label %[[FINALLY:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY]] -; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] -; CHECK-NEXT: ret <8 x i32> [[VAL3]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - br i1 %cond, label %then, label %else - -then: - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - br label %finally - -else: - %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - br label %finally - -finally: - %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ] - ret <8 x i32> %val3 -} - -define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { -; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: br label %[[FINALLY:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY]] -; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] -; CHECK-NEXT: ret <8 x i32> [[VAL3]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - br i1 %cond, label %then, label %else - -then: - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - br label %finally - -else: - %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - br label %finally - -finally: - %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ] - ret <8 x i32> %val3 -} - -define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { -; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> -; CHECK-NEXT: br label %[[FINALLY:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> -; CHECK-NEXT: br label %[[FINALLY]] -; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] -; CHECK-NEXT: ret <4 x i32> [[VAL3]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - br i1 %cond, label %then, label %else - -then: - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> - br label %finally - -else: - %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> - br label %finally - -finally: - %val3 = phi <4 x i32> [ %val1, %then ], [ %val2, %else ] - ret <4 x i32> %val3 -} - -define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { -; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY]] -; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] -; CHECK-NEXT: ret <8 x i32> [[VAL3]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - br i1 %cond, label %then, label %else - -then: - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - br label %finally - -else: - %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - br label %finally - -finally: - %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ] - ret <8 x i32> %val3 -} - -define <8 x i32> @shuffle_v4_v8i32_cond_r1_4(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { -; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_4( -; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: br label %[[FINALLY]] -; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] -; CHECK-NEXT: ret <8 x i32> [[VAL3]] -; -entry: - %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 - br i1 %cond, label %then, label %else - -then: - %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - br label %finally - -else: - %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> - br label %finally - -finally: - %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ] - ret <8 x i32> %val3 -}