@@ -7,17 +7,19 @@ define amdgpu_kernel void @test_overwrite(i64 %val, i1 %cond) {
77; CHECK-LABEL: define amdgpu_kernel void @test_overwrite
88; CHECK-SAME: (i64 [[VAL:%.*]], i1 [[COND:%.*]]) {
99; CHECK-NEXT: entry:
10+ ; CHECK-NEXT: [[STACK:%.*]] = freeze <3 x i64> poison
11+ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <3 x i64> [[STACK]], i64 43, i32 0
1012; CHECK-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[END:%.*]]
1113; CHECK: loop:
12- ; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP2 :%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef> , [[ENTRY:%.*]] ]
13- ; CHECK-NEXT: [[TMP0 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
14- ; CHECK-NEXT: [[TMP1 :%.*]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 68, i32 0
15- ; CHECK-NEXT: [[TMP2 ]] = insertelement <3 x i64> [[TMP1 ]], i64 32, i32 0
16- ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP0 ]], 68
14+ ; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP3 :%.*]], [[LOOP]] ], [ [[TMP0]] , [[ENTRY:%.*]] ]
15+ ; CHECK-NEXT: [[TMP1 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
16+ ; CHECK-NEXT: [[TMP2 :%.*]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 68, i32 0
17+ ; CHECK-NEXT: [[TMP3 ]] = insertelement <3 x i64> [[TMP2 ]], i64 32, i32 0
18+ ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP1 ]], 68
1719; CHECK-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[END]]
1820; CHECK: end:
19- ; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP2 ]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef> , [[ENTRY]] ]
20- ; CHECK-NEXT: [[TMP3 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
21+ ; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP3 ]], [[LOOP]] ], [ [[TMP0]] , [[ENTRY]] ]
22+ ; CHECK-NEXT: [[TMP4 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
2123; CHECK-NEXT: ret void
2224;
2325entry:
@@ -42,8 +44,9 @@ define <4 x i64> @test_fullvec_out_of_bounds(<4 x i64> %arg) {
4244; CHECK-LABEL: define <4 x i64> @test_fullvec_out_of_bounds
4345; CHECK-SAME: (<4 x i64> [[ARG:%.*]]) {
4446; CHECK-NEXT: entry:
47+ ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i64> poison
4548; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG]], i64 0
46- ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef , i64 [[TMP0]], i32 3
49+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[STACK]] , i64 [[TMP0]], i32 3
4750; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[ARG]], i64 1
4851; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[ARG]], i64 2
4952; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[ARG]], i64 3
@@ -62,17 +65,19 @@ define amdgpu_kernel void @test_no_overwrite(i64 %val, i1 %cond) {
6265; CHECK-LABEL: define amdgpu_kernel void @test_no_overwrite
6366; CHECK-SAME: (i64 [[VAL:%.*]], i1 [[COND:%.*]]) {
6467; CHECK-NEXT: entry:
68+ ; CHECK-NEXT: [[STACK:%.*]] = freeze <3 x i64> poison
69+ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <3 x i64> [[STACK]], i64 43, i32 0
6570; CHECK-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[END:%.*]]
6671; CHECK: loop:
67- ; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP1 :%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef> , [[ENTRY:%.*]] ]
68- ; CHECK-NEXT: [[TMP0 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
69- ; CHECK-NEXT: [[TMP1 ]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 32, i32 1
70- ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP0 ]], 32
72+ ; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP2 :%.*]], [[LOOP]] ], [ [[TMP0]] , [[ENTRY:%.*]] ]
73+ ; CHECK-NEXT: [[TMP1 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
74+ ; CHECK-NEXT: [[TMP2 ]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 32, i32 1
75+ ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP1 ]], 32
7176; CHECK-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[END]]
7277; CHECK: end:
73- ; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP1 ]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef> , [[ENTRY]] ]
74- ; CHECK-NEXT: [[TMP2 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
75- ; CHECK-NEXT: [[TMP3 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 1
78+ ; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP2 ]], [[LOOP]] ], [ [[TMP0]] , [[ENTRY]] ]
79+ ; CHECK-NEXT: [[TMP3 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
80+ ; CHECK-NEXT: [[TMP4 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 1
7681; CHECK-NEXT: ret void
7782;
7883entry:
@@ -97,6 +102,7 @@ define ptr @alloca_load_store_ptr64_full_ivec(ptr %arg) {
97102; CHECK-LABEL: define ptr @alloca_load_store_ptr64_full_ivec
98103; CHECK-SAME: (ptr [[ARG:%.*]]) {
99104; CHECK-NEXT: entry:
105+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x i8> poison
100106; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
101107; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <8 x i8>
102108; CHECK-NEXT: ret ptr [[ARG]]
@@ -112,6 +118,7 @@ define ptr addrspace(3) @alloca_load_store_ptr32_full_ivec(ptr addrspace(3) %arg
112118; CHECK-LABEL: define ptr addrspace(3) @alloca_load_store_ptr32_full_ivec
113119; CHECK-SAME: (ptr addrspace(3) [[ARG:%.*]]) {
114120; CHECK-NEXT: entry:
121+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <4 x i8> poison
115122; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[ARG]] to i32
116123; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[TMP0]] to <4 x i8>
117124; CHECK-NEXT: ret ptr addrspace(3) [[ARG]]
@@ -127,6 +134,7 @@ define <4 x ptr addrspace(3)> @alloca_load_store_ptr_mixed_full_ptrvec(<2 x ptr>
127134; CHECK-LABEL: define <4 x ptr addrspace(3)> @alloca_load_store_ptr_mixed_full_ptrvec
128135; CHECK-SAME: (<2 x ptr> [[ARG:%.*]]) {
129136; CHECK-NEXT: entry:
137+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <4 x i32> poison
130138; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr> [[ARG]] to <2 x i64>
131139; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
132140; CHECK-NEXT: [[TMP2:%.*]] = inttoptr <4 x i32> [[TMP1]] to <4 x ptr addrspace(3)>
@@ -143,6 +151,7 @@ define <8 x i16> @ptralloca_load_store_ints_full(<2 x i64> %arg) {
143151; CHECK-LABEL: define <8 x i16> @ptralloca_load_store_ints_full
144152; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) {
145153; CHECK-NEXT: entry:
154+ ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x ptr addrspace(5)> poison
146155; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[ARG]] to <4 x i32>
147156; CHECK-NEXT: [[TMP1:%.*]] = inttoptr <4 x i32> [[TMP0]] to <4 x ptr addrspace(5)>
148157; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x i16>
@@ -159,19 +168,22 @@ define void @alloca_load_store_ptr_mixed_ptrvec(<2 x ptr addrspace(3)> %arg) {
159168; CHECK-LABEL: define void @alloca_load_store_ptr_mixed_ptrvec
160169; CHECK-SAME: (<2 x ptr addrspace(3)> [[ARG:%.*]]) {
161170; CHECK-NEXT: entry:
171+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x i32> poison
162172; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[ARG]] to <2 x i32>
163173; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
164- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef , i32 [[TMP1]], i32 0
174+ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[ALLOCA]] , i32 [[TMP1]], i32 0
165175; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
166176; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP3]], i32 1
167177; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
168178; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP3]], i64 1
169179; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr addrspace(3)>
170180; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
171181; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3]], i64 1
172- ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 undef, i64 2
173- ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 undef, i64 3
174- ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x ptr addrspace(3)>
182+ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
183+ ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP10]], i64 2
184+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
185+ ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i64 3
186+ ; CHECK-NEXT: [[TMP14:%.*]] = inttoptr <4 x i32> [[TMP13]] to <4 x ptr addrspace(3)>
175187; CHECK-NEXT: ret void
176188;
177189entry:
0 commit comments