@@ -417,18 +417,11 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
417417}
418418
419419define <4 x float > @ins_bo_ext_ext (<4 x float > %a , <4 x float > %b ) {
420- ; SSE-LABEL: @ins_bo_ext_ext(
421- ; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422- ; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423- ; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424- ; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425- ; SSE-NEXT: ret <4 x float> [[V3]]
426- ;
427- ; AVX-LABEL: @ins_bo_ext_ext(
428- ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
429- ; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
430- ; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
431- ; AVX-NEXT: ret <4 x float> [[V3]]
420+ ; CHECK-LABEL: @ins_bo_ext_ext(
421+ ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422+ ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423+ ; CHECK-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
424+ ; CHECK-NEXT: ret <4 x float> [[V3]]
432425;
433426 %a2 = extractelement <4 x float > %a , i32 2
434427 %a3 = extractelement <4 x float > %a , i32 3
@@ -441,13 +434,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
441434; but it is likely that extracting from index 3 is the better option.
442435
443436define <4 x float > @ins_bo_ext_ext_uses (<4 x float > %a , <4 x float > %b ) {
444- ; CHECK-LABEL: @ins_bo_ext_ext_uses(
445- ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
446- ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
447- ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
448- ; CHECK-NEXT: call void @use_f32(float [[A23]])
449- ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
450- ; CHECK-NEXT: ret <4 x float> [[V3]]
437+ ; SSE-LABEL: @ins_bo_ext_ext_uses(
438+ ; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
439+ ; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
440+ ; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
441+ ; SSE-NEXT: call void @use_f32(float [[A23]])
442+ ; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
443+ ; SSE-NEXT: ret <4 x float> [[V3]]
444+ ;
445+ ; AVX-LABEL: @ins_bo_ext_ext_uses(
446+ ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
447+ ; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
448+ ; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
449+ ; AVX-NEXT: call void @use_f32(float [[A23]])
450+ ; AVX-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
451+ ; AVX-NEXT: ret <4 x float> [[V3]]
451452;
452453 %a2 = extractelement <4 x float > %a , i32 2
453454 %a3 = extractelement <4 x float > %a , i32 3
@@ -463,16 +464,13 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
463464; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
464465; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
465466; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
466- ; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
467467; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
468468; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
469- ; SSE-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
470469; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
471470; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
472- ; SSE-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
473- ; SSE-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
474- ; SSE-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
475- ; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
471+ ; SSE-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
472+ ; SSE-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
473+ ; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
476474; SSE-NEXT: ret <4 x float> [[V3]]
477475;
478476; AVX-LABEL: @PR34724(
0 commit comments