@@ -11,9 +11,10 @@ typedef float float4 __attribute__((ext_vector_type(4)));
1111// CHECK-LABEL: define dso_local spir_kernel void @foo(
1212// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
1313// CHECK-NEXT: [[ENTRY:.*:]]
14- // CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
15- // CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
16- // CHECK-NEXT: store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
14+ // CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
15+ // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
16+ // CHECK-NEXT: [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[EXTRACTVEC_I]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17+ // CHECK-NEXT: store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
1718// CHECK-NEXT: ret void
1819//
1920void kernel foo (global float3 * a , global float3 * b ) {
@@ -24,8 +25,9 @@ void kernel foo(global float3 *a, global float3 *b) {
2425// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
2526// CHECK-NEXT: [[ENTRY:.*:]]
2627// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
27- // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
28- // CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
28+ // CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
29+ // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[ASTYPE_I]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30+ // CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
2931// CHECK-NEXT: ret void
3032//
3133void kernel float4_to_float3 (global float3 * a , global float4 * b ) {
@@ -35,9 +37,9 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) {
3537// CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4(
3638// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] {
3739// CHECK-NEXT: [[ENTRY:.*:]]
38- // CHECK-NEXT: [[LOADVECN :%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
39- // CHECK-NEXT: [[ASTYPE :%.*]] = shufflevector <4 x float> [[LOADVECN ]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
40- // CHECK-NEXT: store <4 x float> [[ASTYPE ]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
40+ // CHECK-NEXT: [[LOADVECN_I :%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
41+ // CHECK-NEXT: [[ASTYPE_I :%.*]] = shufflevector <4 x float> [[LOADVECN_I ]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
42+ // CHECK-NEXT: store <4 x float> [[ASTYPE_I ]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
4143// CHECK-NEXT: ret void
4244//
4345void kernel float3_to_float4 (global float3 * a , global float4 * b ) {
@@ -47,8 +49,8 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) {
4749// CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2(
4850// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] {
4951// CHECK-NEXT: [[ENTRY:.*:]]
50- // CHECK-NEXT: [[LOADVECN :%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
51- // CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN ]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
52+ // CHECK-NEXT: [[LOADVECN_I :%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
53+ // CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN_I ]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5254// CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
5355// CHECK-NEXT: ret void
5456//
@@ -60,41 +62,43 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
6062// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
6163// CHECK-NEXT: [[ENTRY:.*:]]
6264// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
63- // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
64- // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
65+ // CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
66+ // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[ASTYPE_I]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
67+ // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
6568// CHECK-NEXT: ret void
6669//
6770void kernel char8_to_short3 (global short3 * a , global char8 * b ) {
6871 * a = __builtin_astype (* b , short3 );
6972}
7073
7174// CHECK-LABEL: define dso_local spir_func void @from_char3(
72- // CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1 :[0-9]+]] {
75+ // CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2 :[0-9]+]] {
7376// CHECK-NEXT: [[ENTRY:.*:]]
74- // CHECK-NEXT: [[EXTRACTVEC :%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
75- // CHECK-NEXT: store <4 x i8> [[EXTRACTVEC ]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
77+ // CHECK-NEXT: [[TMP0 :%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
78+ // CHECK-NEXT: store <4 x i8> [[TMP0 ]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
7679// CHECK-NEXT: ret void
7780//
7881void from_char3 (char3 a , global int * out ) {
7982 * out = __builtin_astype (a , int );
8083}
8184
8285// CHECK-LABEL: define dso_local spir_func void @from_short3(
83- // CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1 ]] {
86+ // CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2 ]] {
8487// CHECK-NEXT: [[ENTRY:.*:]]
85- // CHECK-NEXT: [[EXTRACTVEC :%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
86- // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC ]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
88+ // CHECK-NEXT: [[TMP0 :%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
89+ // CHECK-NEXT: store <4 x i16> [[TMP0 ]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
8790// CHECK-NEXT: ret void
8891//
8992void from_short3 (short3 a , global long * out ) {
9093 * out = __builtin_astype (a , long );
9194}
9295
9396// CHECK-LABEL: define dso_local spir_func void @scalar_to_char3(
94- // CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1 ]] {
97+ // CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2 ]] {
9598// CHECK-NEXT: [[ENTRY:.*:]]
9699// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8>
97- // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
100+ // CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
101+ // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[ASTYPE]], <3 x i8> <i8 undef, i8 poison, i8 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
98102// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA8]]
99103// CHECK-NEXT: ret void
100104//
@@ -103,10 +107,11 @@ void scalar_to_char3(int a, global char3 *out) {
103107}
104108
105109// CHECK-LABEL: define dso_local spir_func void @scalar_to_short3(
106- // CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1 ]] {
110+ // CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2 ]] {
107111// CHECK-NEXT: [[ENTRY:.*:]]
108112// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
109- // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
113+ // CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
114+ // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[ASTYPE]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
110115// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]]
111116// CHECK-NEXT: ret void
112117//
0 commit comments