@@ -158,8 +158,8 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)
158158
159159define <4 x float > @load_f32_insert_v4f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
160160; CHECK-LABEL: @load_f32_insert_v4f32(
161- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 16
162- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
161+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
162+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i32 0
163163; CHECK-NEXT: ret <4 x float> [[R]]
164164;
165165 %s = load float , ptr %p , align 4
@@ -169,8 +169,8 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n
169169
170170define <4 x float > @casted_load_f32_insert_v4f32 (ptr align 4 dereferenceable (16 ) %p ) nofree nosync {
171171; CHECK-LABEL: @casted_load_f32_insert_v4f32(
172- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 4
173- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
172+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
173+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i32 0
174174; CHECK-NEXT: ret <4 x float> [[R]]
175175;
176176 %s = load float , ptr %p , align 4
@@ -182,8 +182,8 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)
182182
183183define <4 x i32 > @load_i32_insert_v4i32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
184184; CHECK-LABEL: @load_i32_insert_v4i32(
185- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32> , ptr [[P:%.*]], align 16
186- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1 ]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
185+ ; CHECK-NEXT: [[S :%.*]] = load i32, ptr [[P:%.*]], align 4
186+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S ]], i32 0
187187; CHECK-NEXT: ret <4 x i32> [[R]]
188188;
189189 %s = load i32 , ptr %p , align 4
@@ -195,8 +195,8 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof
195195
196196define <4 x i32 > @casted_load_i32_insert_v4i32 (ptr align 4 dereferenceable (16 ) %p ) nofree nosync {
197197; CHECK-LABEL: @casted_load_i32_insert_v4i32(
198- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32> , ptr [[P:%.*]], align 4
199- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1 ]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
198+ ; CHECK-NEXT: [[S :%.*]] = load i32, ptr [[P:%.*]], align 4
199+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S ]], i32 0
200200; CHECK-NEXT: ret <4 x i32> [[R]]
201201;
202202 %s = load i32 , ptr %p , align 4
@@ -208,8 +208,8 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %
208208
209209define <4 x float > @gep00_load_f32_insert_v4f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
210210; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
211- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 16
212- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
211+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 16
212+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i64 0
213213; CHECK-NEXT: ret <4 x float> [[R]]
214214;
215215 %s = load float , ptr %p , align 16
@@ -221,8 +221,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)
221221
222222define <4 x float > @gep00_load_f32_insert_v4f32_addrspace (ptr addrspace (44 ) align 16 dereferenceable (16 ) %p ) nofree nosync {
223223; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
224- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr addrspace(44) [[P:%.*]], align 16
225- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
224+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr addrspace(44) [[P:%.*]], align 16
225+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i64 0
226226; CHECK-NEXT: ret <4 x float> [[R]]
227227;
228228 %s = load float , ptr addrspace (44 ) %p , align 16
@@ -235,8 +235,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) alig
235235define <8 x i16 > @gep01_load_i16_insert_v8i16 (ptr align 16 dereferenceable (18 ) %p ) nofree nosync {
236236; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
237237; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
238- ; CHECK-NEXT: [[TMP1 :%.*]] = load <8 x i16> , ptr [[GEP]], align 2
239- ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1 ]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
238+ ; CHECK-NEXT: [[S :%.*]] = load i16, ptr [[GEP]], align 2
239+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S ]], i64 0
240240; CHECK-NEXT: ret <8 x i16> [[R]]
241241;
242242 %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
@@ -248,16 +248,11 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
248248; Can't safely load the offset vector, but can load+shuffle if it is profitable.
249249
250250define <8 x i16 > @gep01_load_i16_insert_v8i16_deref (ptr align 16 dereferenceable (17 ) %p ) nofree nosync {
251- ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
252- ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
253- ; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
254- ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
255- ; SSE2-NEXT: ret <8 x i16> [[R]]
256- ;
257- ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
258- ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
259- ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
260- ; AVX2-NEXT: ret <8 x i16> [[R]]
251+ ; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
252+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
253+ ; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
254+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
255+ ; CHECK-NEXT: ret <8 x i16> [[R]]
261256;
262257 %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
263258 %s = load i16 , ptr %gep , align 2
@@ -268,16 +263,11 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
268263; Verify that alignment of the new load is not over-specified.
269264
270265define <8 x i16 > @gep01_load_i16_insert_v8i16_deref_minalign (ptr align 2 dereferenceable (16 ) %p ) nofree nosync {
271- ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
272- ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
273- ; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
274- ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
275- ; SSE2-NEXT: ret <8 x i16> [[R]]
276- ;
277- ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
278- ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
279- ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
280- ; AVX2-NEXT: ret <8 x i16> [[R]]
266+ ; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
267+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
268+ ; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
269+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
270+ ; CHECK-NEXT: ret <8 x i16> [[R]]
281271;
282272 %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
283273 %s = load i16 , ptr %gep , align 8
@@ -304,8 +294,9 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
304294
305295define <4 x i32 > @gep012_bitcast_load_i32_insert_v4i32 (ptr align 1 dereferenceable (20 ) %p ) nofree nosync {
306296; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
307- ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
308- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
297+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 12
298+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
299+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
309300; CHECK-NEXT: ret <4 x i32> [[R]]
310301;
311302 %gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 12
@@ -336,8 +327,8 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
336327define <8 x i16 > @gep10_load_i16_insert_v8i16 (ptr align 16 dereferenceable (32 ) %p ) nofree nosync {
337328; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
338329; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
339- ; CHECK-NEXT: [[TMP1 :%.*]] = load <8 x i16> , ptr [[GEP]], align 16
340- ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1 ]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
330+ ; CHECK-NEXT: [[S :%.*]] = load i16, ptr [[GEP]], align 16
331+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S ]], i64 0
341332; CHECK-NEXT: ret <8 x i16> [[R]]
342333;
343334 %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 1 , i64 0
@@ -439,8 +430,8 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(
439430
440431define <4 x float > @load_f32_insert_v4f32_align (ptr align 1 dereferenceable (16 ) %p ) nofree nosync {
441432; CHECK-LABEL: @load_f32_insert_v4f32_align(
442- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 4
443- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
433+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
434+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i32 0
444435; CHECK-NEXT: ret <4 x float> [[R]]
445436;
446437 %s = load float , ptr %p , align 4
@@ -463,8 +454,8 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)
463454
464455define <8 x i32 > @load_i32_insert_v8i32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
465456; CHECK-LABEL: @load_i32_insert_v8i32(
466- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32> , ptr [[P:%.*]], align 16
467- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, < 8 x i32> <i32 0 , i32 poison , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
457+ ; CHECK-NEXT: [[S :%.*]] = load i32, ptr [[P:%.*]], align 4
458+ ; CHECK-NEXT: [[R:%.*]] = insertelement < 8 x i32> undef , i32 [[S]] , i32 0
468459; CHECK-NEXT: ret <8 x i32> [[R]]
469460;
470461 %s = load i32 , ptr %p , align 4
@@ -474,8 +465,8 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof
474465
475466define <8 x i32 > @casted_load_i32_insert_v8i32 (ptr align 4 dereferenceable (16 ) %p ) nofree nosync {
476467; CHECK-LABEL: @casted_load_i32_insert_v8i32(
477- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32> , ptr [[P:%.*]], align 4
478- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, < 8 x i32> <i32 0 , i32 poison , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
468+ ; CHECK-NEXT: [[S :%.*]] = load i32, ptr [[P:%.*]], align 4
469+ ; CHECK-NEXT: [[R:%.*]] = insertelement < 8 x i32> undef , i32 [[S]] , i32 0
479470; CHECK-NEXT: ret <8 x i32> [[R]]
480471;
481472 %s = load i32 , ptr %p , align 4
@@ -485,8 +476,8 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %
485476
486477define <16 x float > @load_f32_insert_v16f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
487478; CHECK-LABEL: @load_f32_insert_v16f32(
488- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 16
489- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
479+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
480+ ; CHECK-NEXT: [[R:%.*]] = insertelement <16 x float> undef, float [[S ]], i32 0
490481; CHECK-NEXT: ret <16 x float> [[R]]
491482;
492483 %s = load float , ptr %p , align 4
@@ -496,8 +487,8 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)
496487
497488define <2 x float > @load_f32_insert_v2f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
498489; CHECK-LABEL: @load_f32_insert_v2f32(
499- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 16
500- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
490+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
491+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S ]], i32 0
501492; CHECK-NEXT: ret <2 x float> [[R]]
502493;
503494 %s = load float , ptr %p , align 4
@@ -549,8 +540,9 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
549540
550541define <4 x float > @load_v2f32_extract_insert_v4f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
551542; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
552- ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
553- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
543+ ; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 4
544+ ; CHECK-NEXT: [[S:%.*]] = extractelement <2 x float> [[L]], i32 0
545+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
554546; CHECK-NEXT: ret <4 x float> [[R]]
555547;
556548 %l = load <2 x float >, ptr %p , align 4
@@ -560,10 +552,17 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
560552}
561553
562554define <4 x float > @load_v8f32_extract_insert_v4f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
563- ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
564- ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
565- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
566- ; CHECK-NEXT: ret <4 x float> [[R]]
555+ ; SSE2-LABEL: @load_v8f32_extract_insert_v4f32(
556+ ; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x float>, ptr [[P:%.*]], i32 0, i32 0
557+ ; SSE2-NEXT: [[S:%.*]] = load float, ptr [[TMP1]], align 4
558+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
559+ ; SSE2-NEXT: ret <4 x float> [[R]]
560+ ;
561+ ; AVX2-LABEL: @load_v8f32_extract_insert_v4f32(
562+ ; AVX2-NEXT: [[L:%.*]] = load <8 x float>, ptr [[P:%.*]], align 4
563+ ; AVX2-NEXT: [[S:%.*]] = extractelement <8 x float> [[L]], i32 0
564+ ; AVX2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
565+ ; AVX2-NEXT: ret <4 x float> [[R]]
567566;
568567 %l = load <8 x float >, ptr %p , align 4
569568 %s = extractelement <8 x float > %l , i32 0
@@ -589,17 +588,12 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
589588; Can't safely load the offset vector, but can load+shuffle if it is profitable.
590589
591590define <8 x i16 > @gep1_load_v2i16_extract_insert_v8i16 (ptr align 1 dereferenceable (16 ) %p ) nofree nosync {
592- ; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
593- ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
594- ; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
595- ; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
596- ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
597- ; SSE2-NEXT: ret <8 x i16> [[R]]
598- ;
599- ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
600- ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
601- ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
602- ; AVX2-NEXT: ret <8 x i16> [[R]]
591+ ; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
592+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
593+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
594+ ; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
595+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
596+ ; CHECK-NEXT: ret <8 x i16> [[R]]
603597;
604598 %gep = getelementptr inbounds <2 x i16 >, ptr %p , i64 1
605599 %l = load <2 x i16 >, ptr %gep , align 8
0 commit comments