@@ -289,8 +289,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
289289; must be a multiple of element size.
290290; TODO: Could bitcast around this limitation.
291291
292- define <4 x i32 > @gep01_bitcast_load_i32_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
293- ; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32 (
292+ define <4 x i32 > @gep01_bitcast_load_i32_from_v16i8_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
293+ ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32 (
294294; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
295295; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
296296; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
@@ -302,6 +302,84 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
302302 ret <4 x i32 > %r
303303}
304304
305+ define <2 x i64 > @gep01_bitcast_load_i64_from_v16i8_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
306+ ; CHECK-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
307+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
308+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
309+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
310+ ; CHECK-NEXT: ret <2 x i64> [[R]]
311+ ;
312+ %gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 1
313+ %s = load i64 , ptr %gep , align 1
314+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
315+ ret <2 x i64 > %r
316+ }
317+
318+ define <4 x i32 > @gep11_bitcast_load_i32_from_v16i8_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
319+ ; CHECK-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
320+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
321+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
322+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
323+ ; CHECK-NEXT: ret <4 x i32> [[R]]
324+ ;
325+ %gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 11
326+ %s = load i32 , ptr %gep , align 1
327+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
328+ ret <4 x i32 > %r
329+ }
330+
331+ define <4 x i32 > @gep01_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
332+ ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
333+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
334+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
335+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
336+ ; CHECK-NEXT: ret <4 x i32> [[R]]
337+ ;
338+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
339+ %s = load i32 , ptr %gep , align 1
340+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
341+ ret <4 x i32 > %r
342+ }
343+
344+ define <2 x i64 > @gep01_bitcast_load_i64_from_v8i16_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
345+ ; CHECK-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
346+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
347+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
348+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
349+ ; CHECK-NEXT: ret <2 x i64> [[R]]
350+ ;
351+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
352+ %s = load i64 , ptr %gep , align 1
353+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
354+ ret <2 x i64 > %r
355+ }
356+
357+ define <4 x i32 > @gep05_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
358+ ; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
359+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
360+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
361+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
362+ ; CHECK-NEXT: ret <4 x i32> [[R]]
363+ ;
364+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 5
365+ %s = load i32 , ptr %gep , align 1
366+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
367+ ret <4 x i32 > %r
368+ }
369+
370+ define <2 x i64 > @gep01_bitcast_load_i32_from_v4i32_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
371+ ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(
372+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 1
373+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
374+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
375+ ; CHECK-NEXT: ret <2 x i64> [[R]]
376+ ;
377+ %gep = getelementptr inbounds <4 x i32 >, ptr %p , i64 0 , i64 1
378+ %s = load i64 , ptr %gep , align 1
379+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
380+ ret <2 x i64 > %r
381+ }
382+
305383define <4 x i32 > @gep012_bitcast_load_i32_insert_v4i32 (ptr align 1 dereferenceable (20 ) %p ) nofree nosync {
306384; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
307385; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
@@ -331,6 +409,58 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
331409 ret <4 x i32 > %r
332410}
333411
412+ define <4 x i32 > @gep07_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
413+ ; CHECK-LABEL: @gep07_bitcast_load_i32_from_v8i16_insert_v4i32(
414+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 7
415+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
416+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
417+ ; CHECK-NEXT: ret <4 x i32> [[R]]
418+ ;
419+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 7
420+ %s = load i32 , ptr %gep , align 1
421+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
422+ ret <4 x i32 > %r
423+ }
424+
425+ define <2 x i64 > @gep03_bitcast_load_i32_from_v4i32_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
426+ ; CHECK-LABEL: @gep03_bitcast_load_i32_from_v4i32_insert_v2i64(
427+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 3
428+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
429+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
430+ ; CHECK-NEXT: ret <2 x i64> [[R]]
431+ ;
432+ %gep = getelementptr inbounds <4 x i32 >, ptr %p , i64 0 , i64 3
433+ %s = load i64 , ptr %gep , align 1
434+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
435+ ret <2 x i64 > %r
436+ }
437+
438+ define <2 x i64 > @gep09_bitcast_load_i64_from_v16i8_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) #0 {
439+ ; CHECK-LABEL: @gep09_bitcast_load_i64_from_v16i8_insert_v2i64(
440+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 9
441+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
442+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
443+ ; CHECK-NEXT: ret <2 x i64> [[R]]
444+ ;
445+ %gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 9
446+ %s = load i64 , ptr %gep , align 1
447+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
448+ ret <2 x i64 > %r
449+ }
450+
451+ define <2 x i64 > @gep05_bitcast_load_i64_from_v8i16_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
452+ ; CHECK-LABEL: @gep05_bitcast_load_i64_from_v8i16_insert_v2i64(
453+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
454+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
455+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
456+ ; CHECK-NEXT: ret <2 x i64> [[R]]
457+ ;
458+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 5
459+ %s = load i64 , ptr %gep , align 1
460+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
461+ ret <2 x i64 > %r
462+ }
463+
334464; If there are enough dereferenceable bytes, we can offset the vector load.
335465
336466define <8 x i16 > @gep10_load_i16_insert_v8i16 (ptr align 16 dereferenceable (32 ) %p ) nofree nosync {
0 commit comments