@@ -417,12 +417,18 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
417417}
418418
419419define <4 x float > @ins_bo_ext_ext (<4 x float > %a , <4 x float > %b ) {
420- ; CHECK-LABEL: @ins_bo_ext_ext(
421- ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422- ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423- ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424- ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425- ; CHECK-NEXT: ret <4 x float> [[V3]]
420+ ; SSE-LABEL: @ins_bo_ext_ext(
421+ ; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422+ ; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423+ ; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424+ ; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425+ ; SSE-NEXT: ret <4 x float> [[V3]]
426+ ;
427+ ; AVX-LABEL: @ins_bo_ext_ext(
428+ ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
429+ ; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
430+ ; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
431+ ; AVX-NEXT: ret <4 x float> [[V3]]
426432;
427433 %a2 = extractelement <4 x float > %a , i32 2
428434 %a3 = extractelement <4 x float > %a , i32 3
@@ -435,13 +441,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
435441; but it is likely that extracting from index 3 is the better option.
436442
437443define <4 x float > @ins_bo_ext_ext_uses (<4 x float > %a , <4 x float > %b ) {
438- ; CHECK-LABEL: @ins_bo_ext_ext_uses(
439- ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
440- ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
441- ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
442- ; CHECK-NEXT: call void @use_f32(float [[A23]])
443- ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
444- ; CHECK-NEXT: ret <4 x float> [[V3]]
444+ ; SSE-LABEL: @ins_bo_ext_ext_uses(
445+ ; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
446+ ; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
447+ ; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
448+ ; SSE-NEXT: call void @use_f32(float [[A23]])
449+ ; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
450+ ; SSE-NEXT: ret <4 x float> [[V3]]
451+ ;
452+ ; AVX-LABEL: @ins_bo_ext_ext_uses(
453+ ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
454+ ; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
455+ ; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
456+ ; AVX-NEXT: call void @use_f32(float [[A23]])
457+ ; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
458+ ; AVX-NEXT: ret <4 x float> [[V3]]
445459;
446460 %a2 = extractelement <4 x float > %a , i32 2
447461 %a3 = extractelement <4 x float > %a , i32 3
@@ -452,22 +466,37 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
452466}
453467
454468define <4 x float > @PR34724 (<4 x float > %a , <4 x float > %b ) {
455- ; CHECK-LABEL: @PR34724(
456- ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
457- ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
458- ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
459- ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
460- ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
461- ; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
462- ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
463- ; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
464- ; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
465- ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
466- ; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
467- ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
468- ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
469- ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
470- ; CHECK-NEXT: ret <4 x float> [[V3]]
469+ ; SSE-LABEL: @PR34724(
470+ ; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
471+ ; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
472+ ; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
473+ ; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
474+ ; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
475+ ; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
476+ ; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
477+ ; SSE-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
478+ ; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
479+ ; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
480+ ; SSE-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
481+ ; SSE-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
482+ ; SSE-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
483+ ; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
484+ ; SSE-NEXT: ret <4 x float> [[V3]]
485+ ;
486+ ; AVX-LABEL: @PR34724(
487+ ; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
488+ ; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
489+ ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
490+ ; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
491+ ; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
492+ ; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
493+ ; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
494+ ; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
495+ ; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
496+ ; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
497+ ; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
498+ ; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
499+ ; AVX-NEXT: ret <4 x float> [[V3]]
471500;
472501 %a0 = extractelement <4 x float > %a , i32 0
473502 %a1 = extractelement <4 x float > %a , i32 1
0 commit comments