Skip to content

Commit 1c73391

Browse files
committed
[Clang] Freeze padded vectors before storing. (llvm#164821)
Currently Clang usually leaves padding bits uninitialized, which means they are undef at the moment. When expanding stores of vector types to include padding, the padding lanes will be poison, hence the padding bits will be poison. This interacts badly with coercion of arguments and return values, where 3 x float vectors will be loaded as i128 integer; poisoning the padding bits will make the whole value poison. Not sure if there's a better way, but I think we have a number of places that currently rely on the padding being undef, not poison. PR: llvm#164821 rdar://162655662 (cherry-picked from 5378584)
1 parent f5781a0 commit 1c73391

File tree

4 files changed

+79
-28
lines changed

4 files changed

+79
-28
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2716,9 +2716,13 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
27162716
CGM.getABIInfo().getOptimalVectorMemoryType(VecTy, getLangOpts());
27172717
if (!ClangVecTy->isPackedVectorBoolType(getContext()) &&
27182718
VecTy != NewVecTy) {
2719-
SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1);
2719+
SmallVector<int, 16> Mask(NewVecTy->getNumElements(),
2720+
VecTy->getNumElements());
27202721
std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0);
2721-
Value = Builder.CreateShuffleVector(Value, Mask, "extractVec");
2722+
// Use undef instead of poison for the padding lanes, to make sure no
2723+
// padding bits are poisoned, which may break coercion.
2724+
Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
2725+
Mask, "extractVec");
27222726
SrcTy = NewVecTy;
27232727
}
27242728
if (Addr.getElementType() != SrcTy)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
2+
// RUN: %clang_cc1 -fenable-matrix -triple arm64-apple-macosx %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
3+
4+
typedef float float3 __attribute__((ext_vector_type(3)));
5+
struct Vec3 {
6+
union {
7+
struct {
8+
float x;
9+
float y;
10+
float z;
11+
};
12+
float vec __attribute__((ext_vector_type(3)));
13+
};
14+
};
15+
16+
// CHECK-LABEL: define i128 @add(
17+
// CHECK-SAME: i128 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
18+
// CHECK-NEXT: [[ENTRY:.*:]]
19+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_VEC3:%.*]], align 16
20+
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_VEC3]], align 16
21+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
22+
// CHECK-NEXT: store i128 [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
23+
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
24+
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
25+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
26+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
27+
// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x float>, ptr [[TMP1]], align 16
28+
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> [[LOADVECN1]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
29+
// CHECK-NEXT: [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], [[EXTRACTVEC2]]
30+
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
31+
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
32+
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
33+
// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
34+
// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
35+
// CHECK-NEXT: ret i128 [[TMP3]]
36+
//
37+
struct Vec3 add(struct Vec3 a) {
38+
struct Vec3 res;
39+
res.vec = a.vec + a.vec;
40+
return res;
41+
}
42+

clang/test/CodeGenCXX/matrix-vector-bit-int.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3)));
1919
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
2020
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i8>, ptr [[A]], align 4
2121
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
22-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
22+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2323
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
2424
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
2525
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVECN2]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -38,7 +38,7 @@ i8x3 v1(i8x3 a) {
3838
// CHECK-SAME: <3 x i32> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
3939
// CHECK-NEXT: [[ENTRY:.*:]]
4040
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i32>, align 16
41-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
41+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4242
// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16
4343
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
4444
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVECN]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -57,7 +57,7 @@ i32x3 v2(i32x3 a) {
5757
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i512>, align 256
5858
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
5959
// CHECK-NEXT: [[A:%.*]] = shufflevector <4 x i512> [[LOADVECN]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
60-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
60+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6161
// CHECK-NEXT: store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256
6262
// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
6363
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVECN1]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -80,7 +80,7 @@ i512x3 v3(i512x3 a) {
8080
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
8181
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
8282
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
83-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
83+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8484
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
8585
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
8686
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>

clang/test/CodeGenOpenCL/preserve_vec3.cl

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ typedef float float4 __attribute__((ext_vector_type(4)));
1111
// CHECK-LABEL: define dso_local spir_kernel void @foo(
1212
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
1313
// CHECK-NEXT: [[ENTRY:.*:]]
14-
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
15-
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
16-
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
14+
// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
15+
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
16+
// CHECK-NEXT: [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[EXTRACTVEC_I]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17+
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
1718
// CHECK-NEXT: ret void
1819
//
1920
void kernel foo(global float3 *a, global float3 *b) {
@@ -24,8 +25,9 @@ void kernel foo(global float3 *a, global float3 *b) {
2425
// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
2526
// CHECK-NEXT: [[ENTRY:.*:]]
2627
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
27-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
28-
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
28+
// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
29+
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[ASTYPE_I]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30+
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
2931
// CHECK-NEXT: ret void
3032
//
3133
void kernel float4_to_float3(global float3 *a, global float4 *b) {
@@ -35,9 +37,9 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) {
3537
// CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4(
3638
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] {
3739
// CHECK-NEXT: [[ENTRY:.*:]]
38-
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
39-
// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
40-
// CHECK-NEXT: store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
40+
// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
41+
// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
42+
// CHECK-NEXT: store <4 x float> [[ASTYPE_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
4143
// CHECK-NEXT: ret void
4244
//
4345
void kernel float3_to_float4(global float3 *a, global float4 *b) {
@@ -47,8 +49,8 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) {
4749
// CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2(
4850
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] {
4951
// CHECK-NEXT: [[ENTRY:.*:]]
50-
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
51-
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
52+
// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
53+
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5254
// CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
5355
// CHECK-NEXT: ret void
5456
//
@@ -60,41 +62,43 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
6062
// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
6163
// CHECK-NEXT: [[ENTRY:.*:]]
6264
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
63-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
64-
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
65+
// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
66+
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[ASTYPE_I]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
67+
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
6568
// CHECK-NEXT: ret void
6669
//
6770
void kernel char8_to_short3(global short3 *a, global char8 *b) {
6871
*a = __builtin_astype(*b, short3);
6972
}
7073

7174
// CHECK-LABEL: define dso_local spir_func void @from_char3(
72-
// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
75+
// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
7376
// CHECK-NEXT: [[ENTRY:.*:]]
74-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
75-
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
77+
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
78+
// CHECK-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
7679
// CHECK-NEXT: ret void
7780
//
7881
void from_char3(char3 a, global int *out) {
7982
*out = __builtin_astype(a, int);
8083
}
8184

8285
// CHECK-LABEL: define dso_local spir_func void @from_short3(
83-
// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
86+
// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
8487
// CHECK-NEXT: [[ENTRY:.*:]]
85-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
86-
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
88+
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
89+
// CHECK-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
8790
// CHECK-NEXT: ret void
8891
//
8992
void from_short3(short3 a, global long *out) {
9093
*out = __builtin_astype(a, long);
9194
}
9295

9396
// CHECK-LABEL: define dso_local spir_func void @scalar_to_char3(
94-
// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
97+
// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
9598
// CHECK-NEXT: [[ENTRY:.*:]]
9699
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8>
97-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
100+
// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
101+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[ASTYPE]], <3 x i8> <i8 undef, i8 poison, i8 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
98102
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA8]]
99103
// CHECK-NEXT: ret void
100104
//
@@ -103,10 +107,11 @@ void scalar_to_char3(int a, global char3 *out) {
103107
}
104108

105109
// CHECK-LABEL: define dso_local spir_func void @scalar_to_short3(
106-
// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
110+
// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
107111
// CHECK-NEXT: [[ENTRY:.*:]]
108112
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
109-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
113+
// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
114+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[ASTYPE]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
110115
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]]
111116
// CHECK-NEXT: ret void
112117
//

0 commit comments

Comments
 (0)