From 1c73391d0a52b28a0f226baaf22739943852313a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 Oct 2025 19:03:17 -0700 Subject: [PATCH] [Clang] Freeze padded vectors before storing. (#164821) Currently Clang usually leaves padding bits uninitialized, which means they are undef at the moment. When expanding stores of vector types to include padding, the padding lanes will be poison, hence the padding bits will be poison. This interacts badly with coercion of arguments and return values, where 3 x float vectors will be loaded as i128 integer; poisoning the padding bits will make the whole value poison. Not sure if there's a better way, but I think we have a number of places that currently rely on the padding being undef, not poison. PR: https://github.com/llvm/llvm-project/pull/164821 rdar://162655662 (cherry-picked from 53785846aa11bdecbbb683e1ebec9461539f1e97) --- clang/lib/CodeGen/CGExpr.cpp | 8 ++- .../CodeGen/AArch64/ext-vector-coercion.c | 42 ++++++++++++++++ .../test/CodeGenCXX/matrix-vector-bit-int.cpp | 8 +-- clang/test/CodeGenOpenCL/preserve_vec3.cl | 49 ++++++++++--------- 4 files changed, 79 insertions(+), 28 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/ext-vector-coercion.c diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 38150571ea797..200f57e37d7f0 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2716,9 +2716,13 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, CGM.getABIInfo().getOptimalVectorMemoryType(VecTy, getLangOpts()); if (!ClangVecTy->isPackedVectorBoolType(getContext()) && VecTy != NewVecTy) { - SmallVector Mask(NewVecTy->getNumElements(), -1); + SmallVector Mask(NewVecTy->getNumElements(), + VecTy->getNumElements()); std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0); - Value = Builder.CreateShuffleVector(Value, Mask, "extractVec"); + // Use undef instead of poison for the padding lanes, to make sure no + // padding bits are poisoned, which may break coercion. + Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy), + Mask, "extractVec"); SrcTy = NewVecTy; } if (Addr.getElementType() != SrcTy) diff --git a/clang/test/CodeGen/AArch64/ext-vector-coercion.c b/clang/test/CodeGen/AArch64/ext-vector-coercion.c new file mode 100644 index 0000000000000..354980afe06d7 --- /dev/null +++ b/clang/test/CodeGen/AArch64/ext-vector-coercion.c @@ -0,0 +1,42 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -fenable-matrix -triple arm64-apple-macosx %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +typedef float float3 __attribute__((ext_vector_type(3))); +struct Vec3 { + union { + struct { + float x; + float y; + float z; + }; + float vec __attribute__((ext_vector_type(3))); + }; +}; + +// CHECK-LABEL: define i128 @add( +// CHECK-SAME: i128 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_VEC3:%.*]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_VEC3]], align 16 +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: store i128 [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x float>, ptr [[TMP1]], align 16 +// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> [[LOADVECN1]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], [[EXTRACTVEC2]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> undef, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16 +// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16 +// CHECK-NEXT: ret i128 [[TMP3]] +// +struct Vec3 add(struct Vec3 a) { + struct Vec3 res; + res.vec = a.vec + a.vec; + return res; +} + diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp index 2e7531b334ecb..4be1cb3067c2f 100644 --- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp +++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp @@ -19,7 +19,7 @@ using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3))); // CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4 // CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i8>, ptr [[A]], align 4 // CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> undef, <4 x i32> // CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVECN2]], <4 x i8> poison, <3 x i32> @@ -38,7 +38,7 @@ i8x3 v1(i8x3 a) { // CHECK-SAME: <3 x i32> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i32>, align 16 -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> undef, <4 x i32> // CHECK-NEXT: store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16 // CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16 // CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVECN]], <4 x i32> poison, <3 x i32> @@ -57,7 +57,7 @@ i32x3 v2(i32x3 a) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i512>, align 256 // CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256 // CHECK-NEXT: [[A:%.*]] = shufflevector <4 x i512> [[LOADVECN]], <4 x i512> poison, <3 x i32> -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> undef, <4 x i32> // CHECK-NEXT: store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256 // CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256 // CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVECN1]], <4 x i512> poison, <3 x i32> @@ -80,7 +80,7 @@ i512x3 v3(i512x3 a) { // CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4 // CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4 // CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> undef, <4 x i32> // CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl index 49ebae6fc7013..904d8cb57ea2a 100644 --- a/clang/test/CodeGenOpenCL/preserve_vec3.cl +++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl @@ -11,9 +11,10 @@ typedef float float4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define dso_local spir_kernel void @foo( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[EXTRACTVEC_I]], <3 x float> , <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: ret void // void kernel foo(global float3 *a, global float3 *b) { @@ -24,8 +25,9 @@ void kernel foo(global float3 *a, global float3 *b) { // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]] +// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[ASTYPE_I]], <3 x float> , <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // void kernel float4_to_float3(global float3 *a, global float4 *b) { @@ -35,9 +37,9 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] +// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[ASTYPE_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // void kernel float3_to_float4(global float3 *a, global float4 *b) { @@ -47,8 +49,8 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // @@ -60,8 +62,9 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) { // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]] +// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[ASTYPE_I]], <3 x i16> , <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // void kernel char8_to_short3(global short3 *a, global char8 *b) { @@ -69,10 +72,10 @@ void kernel char8_to_short3(global short3 *a, global char8 *b) { } // CHECK-LABEL: define dso_local spir_func void @from_char3( -// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> -// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> +// CHECK-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]] // CHECK-NEXT: ret void // void from_char3(char3 a, global int *out) { @@ -80,10 +83,10 @@ void from_char3(char3 a, global int *out) { } // CHECK-LABEL: define dso_local spir_func void @from_short3( -// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]] // CHECK-NEXT: ret void // void from_short3(short3 a, global long *out) { @@ -91,10 +94,11 @@ void from_short3(short3 a, global long *out) { } // CHECK-LABEL: define dso_local spir_func void @scalar_to_char3( -// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8> -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> +// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[ASTYPE]], <3 x i8> , <4 x i32> // CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // @@ -103,10 +107,11 @@ void scalar_to_char3(int a, global char3 *out) { } // CHECK-LABEL: define dso_local spir_func void @scalar_to_short3( -// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16> -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[ASTYPE]], <3 x i16> , <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]] // CHECK-NEXT: ret void //