llvm · RKSimon · Nov 13, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 6, 2024
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -450,6 +450,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
   // TODO: Evaluate whether that always results in lowest cost. Alternatively,
   //       check the cost of creating a broadcast shuffle and shuffling both
   //       operands to element 0.
+  unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
+  unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
   InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
 
   // Extra uses of the extracts mean that we include those costs in the
@@ -485,8 +487,18 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
     // ShufMask = { poison, poison, 0, poison }
     // TODO: The cost model has an option for a "broadcast" shuffle
     //       (splat-from-element-0), but no option for a more general splat.
-    NewCost +=
-        TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+    if (auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
+      SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
+                                   PoisonMaskElem);
+      ShuffleMask[BestInsIndex] = BestExtIndex;
+      NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                    VecTy, ShuffleMask, CostKind, 0, nullptr,
+                                    {ConvertToShuffle});
+    } else {
+      NewCost +=
+          TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy,
+                             {}, CostKind, 0, nullptr, {ConvertToShuffle});
+    }
   }
 
   // Aggressively form a vector op if the cost is equal because the transform

diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE,SSE4
-; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE,SSE4
-; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE2
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE4
+; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE2
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE4
+; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX
 
 define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
 ; SSE2-LABEL: @PR50392(
@@ -30,24 +30,14 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
 ; SSE4-NEXT:    [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP3]], double [[ADD12]], i64 3
 ; SSE4-NEXT:    ret <4 x double> [[SHUFFLE]]
 ;
-; AVX1-LABEL: @PR50392(
-; AVX1-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
-; AVX1-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
-; AVX1-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
-; AVX1-NEXT:    [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
-; AVX1-NEXT:    [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
-; AVX1-NEXT:    [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
-; AVX1-NEXT:    [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3
-; AVX1-NEXT:    ret <4 x double> [[SHUFFLE]]
-;
-; AVX2-LABEL: @PR50392(
-; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
-; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
-; AVX2-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
-; AVX2-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; AVX2-NEXT:    [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
-; AVX2-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
-; AVX2-NEXT:    ret <4 x double> [[SHUFFLE]]
+; AVX-LABEL: @PR50392(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
+; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
+; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; AVX-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
+; AVX-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
+; AVX-NEXT:    ret <4 x double> [[SHUFFLE]]
 ;
   %vecext = extractelement <4 x double> %a, i32 0
   %vecext1 = extractelement <4 x double> %a, i32 1
@@ -68,6 +58,3 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
   %shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3>
   ret <4 x double> %shuffle
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
-; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
@@ -541,18 +541,18 @@ define i32 @load_extract_clobber_store_between(ptr %x, ptr %y) {
 define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %z) {
 ; CHECK-LABEL: @load_extract_clobber_store_between_limit(
 ; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
-; CHECK-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
-; CHECK-NEXT:    [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
-; CHECK-NEXT:    [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
-; CHECK-NEXT:    [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
-; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
-; CHECK-NEXT:    [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
-; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
-; CHECK-NEXT:    [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
-; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
+; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
+; CHECK-NEXT:    [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
+; CHECK-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
 ; CHECK-NEXT:    store i8 0, ptr [[Y:%.*]], align 1
 ; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
-; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
 ; CHECK-NEXT:    ret i32 [[ADD_4]]
 ;
   %lv = load <4 x i32>, ptr %x
@@ -573,35 +573,35 @@ define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %
 
 define i32 @load_extract_clobber_store_after_limit(ptr %x, ptr %y, <8 x i32> %z) {
 ; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit(
-; LIMIT-DEFAULT-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
-; LIMIT-DEFAULT-NEXT:    [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
-; LIMIT-DEFAULT-NEXT:    [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
-; LIMIT-DEFAULT-NEXT:    [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
-; LIMIT-DEFAULT-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
-; LIMIT-DEFAULT-NEXT:    [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
-; LIMIT-DEFAULT-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
-; LIMIT-DEFAULT-NEXT:    [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
-; LIMIT-DEFAULT-NEXT:    [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
+; LIMIT-DEFAULT-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT-DEFAULT-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
+; LIMIT-DEFAULT-NEXT:    [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT-DEFAULT-NEXT:    [[TMP2:%.*]] = add <8 x i32> [[TMP4]], [[SHIFT1]]
+; LIMIT-DEFAULT-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT-DEFAULT-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
+; LIMIT-DEFAULT-NEXT:    [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT-DEFAULT-NEXT:    [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
+; LIMIT-DEFAULT-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
 ; LIMIT-DEFAULT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
 ; LIMIT-DEFAULT-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 8
 ; LIMIT-DEFAULT-NEXT:    store i8 0, ptr [[Y:%.*]], align 1
-; LIMIT-DEFAULT-NEXT:    [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
+; LIMIT-DEFAULT-NEXT:    [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
 ; LIMIT-DEFAULT-NEXT:    ret i32 [[ADD_4]]
 ;
 ; LIMIT2-LABEL: @load_extract_clobber_store_after_limit(
 ; LIMIT2-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
-; LIMIT2-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
-; LIMIT2-NEXT:    [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
-; LIMIT2-NEXT:    [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
-; LIMIT2-NEXT:    [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
-; LIMIT2-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
-; LIMIT2-NEXT:    [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
-; LIMIT2-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
-; LIMIT2-NEXT:    [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
-; LIMIT2-NEXT:    [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
+; LIMIT2-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT2-NEXT:    [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
+; LIMIT2-NEXT:    [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT2-NEXT:    [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
+; LIMIT2-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT2-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
+; LIMIT2-NEXT:    [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT2-NEXT:    [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
+; LIMIT2-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
 ; LIMIT2-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
 ; LIMIT2-NEXT:    store i8 0, ptr [[Y:%.*]], align 1
-; LIMIT2-NEXT:    [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
+; LIMIT2-NEXT:    [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
 ; LIMIT2-NEXT:    ret i32 [[ADD_4]]
 ;
   %lv = load <4 x i32>, ptr %x
@@ -671,9 +671,9 @@ define i1 @load_with_non_power_of_2_element_type_2(ptr %x) {
 define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
 ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx(
 ; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
-; CHECK-NEXT:    [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i32 0
-; CHECK-NEXT:    [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i32 1
-; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]]
+; CHECK-NEXT:    [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %lv = load <4 x i32>, ptr %x
@@ -688,9 +688,9 @@ define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
 define i32 @load_multiple_extracts_with_constant_idx_profitable(ptr %x) {
 ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable(
 ; CHECK-NEXT:    [[LV:%.*]] = load <8 x i32>, ptr [[X:%.*]], align 16
-; CHECK-NEXT:    [[E_0:%.*]] = extractelement <8 x i32> [[LV]], i32 0
-; CHECK-NEXT:    [[E_1:%.*]] = extractelement <8 x i32> [[LV]], i32 6
-; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i32> [[LV]], <8 x i32> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i32> [[LV]], [[SHIFT]]
+; CHECK-NEXT:    [[RES:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %lv = load <8 x i32>, ptr %x, align 16

diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -404,11 +404,17 @@ define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
 }
 
 define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
-; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
-; CHECK-NEXT:    [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
-; CHECK-NEXT:    [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
-; CHECK-NEXT:    [[R:%.*]] = fadd float [[E0]], [[E1]]
-; CHECK-NEXT:    ret float [[R]]
+; SSE-LABEL: @ext14_ext15_fmul_v16f32(
+; SSE-NEXT:    [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
+; SSE-NEXT:    [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
+; SSE-NEXT:    [[R:%.*]] = fadd float [[E0]], [[E1]]
+; SSE-NEXT:    ret float [[R]]
+;
+; AVX-LABEL: @ext14_ext15_fmul_v16f32(
+; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison>
+; AVX-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]]
+; AVX-NEXT:    [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
+; AVX-NEXT:    ret float [[R]]
 ;
   %e0 = extractelement <16 x float> %x, i32 14
   %e1 = extractelement <16 x float> %x, i32 15

diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -404,11 +404,17 @@ define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
 }
 
 define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
-; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
-; CHECK-NEXT:    [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
-; CHECK-NEXT:    [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
-; CHECK-NEXT:    [[R:%.*]] = fadd float [[E0]], [[E1]]
-; CHECK-NEXT:    ret float [[R]]
+; SSE-LABEL: @ext14_ext15_fmul_v16f32(
+; SSE-NEXT:    [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
+; SSE-NEXT:    [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
+; SSE-NEXT:    [[R:%.*]] = fadd float [[E0]], [[E1]]
+; SSE-NEXT:    ret float [[R]]
+;
+; AVX-LABEL: @ext14_ext15_fmul_v16f32(
+; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison>
+; AVX-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]]
+; AVX-NEXT:    [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
+; AVX-NEXT:    ret float [[R]]
 ;
   %e0 = extractelement <16 x float> %x, i32 14
   %e1 = extractelement <16 x float> %x, i32 15