Skip to content

Commit 1878b94

Browse files
authored
[VectorCombine] isExtractExtractCheap - specify the extract/insert shuffle mask to improve shuffle costs (#114780)
This shuffle mask is so focused, the cost model is very likely to be able to determine a specific (lower) cost
1 parent aba5580 commit 1878b94

File tree

5 files changed

+88
-77
lines changed

5 files changed

+88
-77
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
450450
// TODO: Evaluate whether that always results in lowest cost. Alternatively,
451451
// check the cost of creating a broadcast shuffle and shuffling both
452452
// operands to element 0.
453+
unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
454+
unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
453455
InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
454456

455457
// Extra uses of the extracts mean that we include those costs in the
@@ -485,8 +487,18 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
485487
// ShufMask = { poison, poison, 0, poison }
486488
// TODO: The cost model has an option for a "broadcast" shuffle
487489
// (splat-from-element-0), but no option for a more general splat.
488-
NewCost +=
489-
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
490+
if (auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
491+
SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
492+
PoisonMaskElem);
493+
ShuffleMask[BestInsIndex] = BestExtIndex;
494+
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
495+
VecTy, ShuffleMask, CostKind, 0, nullptr,
496+
{ConvertToShuffle});
497+
} else {
498+
NewCost +=
499+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy,
500+
{}, CostKind, 0, nullptr, {ConvertToShuffle});
501+
}
490502
}
491503

492504
// Aggressively form a vector op if the cost is equal because the transform
Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE,SSE2
3-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE,SSE4
4-
; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX1
5-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX2
6-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE,SSE2
7-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE,SSE4
8-
; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX,AVX1
9-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX,AVX2
2+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE2
3+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE4
4+
; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX
5+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX
6+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE2
7+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE4
8+
; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX
9+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX
1010

1111
define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
1212
; SSE2-LABEL: @PR50392(
@@ -30,24 +30,14 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
3030
; SSE4-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP3]], double [[ADD12]], i64 3
3131
; SSE4-NEXT: ret <4 x double> [[SHUFFLE]]
3232
;
33-
; AVX1-LABEL: @PR50392(
34-
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
35-
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
36-
; AVX1-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
37-
; AVX1-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
38-
; AVX1-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
39-
; AVX1-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
40-
; AVX1-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3
41-
; AVX1-NEXT: ret <4 x double> [[SHUFFLE]]
42-
;
43-
; AVX2-LABEL: @PR50392(
44-
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
45-
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
46-
; AVX2-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
47-
; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
48-
; AVX2-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
49-
; AVX2-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
50-
; AVX2-NEXT: ret <4 x double> [[SHUFFLE]]
33+
; AVX-LABEL: @PR50392(
34+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
35+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
36+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
37+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
38+
; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
39+
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
40+
; AVX-NEXT: ret <4 x double> [[SHUFFLE]]
5141
;
5242
%vecext = extractelement <4 x double> %a, i32 0
5343
%vecext1 = extractelement <4 x double> %a, i32 1
@@ -68,6 +58,3 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
6858
%shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3>
6959
ret <4 x double> %shuffle
7060
}
71-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
72-
; AVX: {{.*}}
73-
; SSE: {{.*}}

llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -541,18 +541,18 @@ define i32 @load_extract_clobber_store_between(ptr %x, ptr %y) {
541541
define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %z) {
542542
; CHECK-LABEL: @load_extract_clobber_store_between_limit(
543543
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
544-
; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
545-
; CHECK-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
546-
; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
547-
; CHECK-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
548-
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
549-
; CHECK-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
550-
; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
551-
; CHECK-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
552-
; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
544+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
545+
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
546+
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
547+
; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
548+
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
549+
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
550+
; CHECK-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
551+
; CHECK-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
552+
; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
553553
; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1
554554
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
555-
; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
555+
; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
556556
; CHECK-NEXT: ret i32 [[ADD_4]]
557557
;
558558
%lv = load <4 x i32>, ptr %x
@@ -573,35 +573,35 @@ define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %
573573

574574
define i32 @load_extract_clobber_store_after_limit(ptr %x, ptr %y, <8 x i32> %z) {
575575
; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit(
576-
; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
577-
; LIMIT-DEFAULT-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
578-
; LIMIT-DEFAULT-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
579-
; LIMIT-DEFAULT-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
580-
; LIMIT-DEFAULT-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
581-
; LIMIT-DEFAULT-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
582-
; LIMIT-DEFAULT-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
583-
; LIMIT-DEFAULT-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
584-
; LIMIT-DEFAULT-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
576+
; LIMIT-DEFAULT-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
577+
; LIMIT-DEFAULT-NEXT: [[TMP4:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
578+
; LIMIT-DEFAULT-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
579+
; LIMIT-DEFAULT-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP4]], [[SHIFT1]]
580+
; LIMIT-DEFAULT-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
581+
; LIMIT-DEFAULT-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
582+
; LIMIT-DEFAULT-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
583+
; LIMIT-DEFAULT-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
584+
; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
585585
; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
586586
; LIMIT-DEFAULT-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8
587587
; LIMIT-DEFAULT-NEXT: store i8 0, ptr [[Y:%.*]], align 1
588-
; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
588+
; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
589589
; LIMIT-DEFAULT-NEXT: ret i32 [[ADD_4]]
590590
;
591591
; LIMIT2-LABEL: @load_extract_clobber_store_after_limit(
592592
; LIMIT2-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
593-
; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
594-
; LIMIT2-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
595-
; LIMIT2-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
596-
; LIMIT2-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
597-
; LIMIT2-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
598-
; LIMIT2-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
599-
; LIMIT2-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
600-
; LIMIT2-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
601-
; LIMIT2-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
593+
; LIMIT2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
594+
; LIMIT2-NEXT: [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
595+
; LIMIT2-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
596+
; LIMIT2-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
597+
; LIMIT2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
598+
; LIMIT2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
599+
; LIMIT2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
600+
; LIMIT2-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
601+
; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
602602
; LIMIT2-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
603603
; LIMIT2-NEXT: store i8 0, ptr [[Y:%.*]], align 1
604-
; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
604+
; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
605605
; LIMIT2-NEXT: ret i32 [[ADD_4]]
606606
;
607607
%lv = load <4 x i32>, ptr %x
@@ -671,9 +671,9 @@ define i1 @load_with_non_power_of_2_element_type_2(ptr %x) {
671671
define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
672672
; CHECK-LABEL: @load_multiple_extracts_with_constant_idx(
673673
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
674-
; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i32 0
675-
; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i32 1
676-
; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
674+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
675+
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]]
676+
; CHECK-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
677677
; CHECK-NEXT: ret i32 [[RES]]
678678
;
679679
%lv = load <4 x i32>, ptr %x
@@ -688,9 +688,9 @@ define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
688688
define i32 @load_multiple_extracts_with_constant_idx_profitable(ptr %x) {
689689
; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable(
690690
; CHECK-NEXT: [[LV:%.*]] = load <8 x i32>, ptr [[X:%.*]], align 16
691-
; CHECK-NEXT: [[E_0:%.*]] = extractelement <8 x i32> [[LV]], i32 0
692-
; CHECK-NEXT: [[E_1:%.*]] = extractelement <8 x i32> [[LV]], i32 6
693-
; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
691+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[LV]], <8 x i32> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
692+
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[LV]], [[SHIFT]]
693+
; CHECK-NEXT: [[RES:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
694694
; CHECK-NEXT: ret i32 [[RES]]
695695
;
696696
%lv = load <8 x i32>, ptr %x, align 16

llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,11 +404,17 @@ define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
404404
}
405405

406406
define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
407-
; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
408-
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409-
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410-
; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411-
; CHECK-NEXT: ret float [[R]]
407+
; SSE-LABEL: @ext14_ext15_fmul_v16f32(
408+
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409+
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410+
; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411+
; SSE-NEXT: ret float [[R]]
412+
;
413+
; AVX-LABEL: @ext14_ext15_fmul_v16f32(
414+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison>
415+
; AVX-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]]
416+
; AVX-NEXT: [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
417+
; AVX-NEXT: ret float [[R]]
412418
;
413419
%e0 = extractelement <16 x float> %x, i32 14
414420
%e1 = extractelement <16 x float> %x, i32 15

llvm/test/Transforms/VectorCombine/X86/extract-binop.ll

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,11 +404,17 @@ define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
404404
}
405405

406406
define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
407-
; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
408-
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409-
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410-
; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411-
; CHECK-NEXT: ret float [[R]]
407+
; SSE-LABEL: @ext14_ext15_fmul_v16f32(
408+
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409+
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410+
; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411+
; SSE-NEXT: ret float [[R]]
412+
;
413+
; AVX-LABEL: @ext14_ext15_fmul_v16f32(
414+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison>
415+
; AVX-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]]
416+
; AVX-NEXT: [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
417+
; AVX-NEXT: ret float [[R]]
412418
;
413419
%e0 = extractelement <16 x float> %x, i32 14
414420
%e1 = extractelement <16 x float> %x, i32 15

0 commit comments

Comments
 (0)