Skip to content

Commit 7b99818

Browse files
committed
[VectorCombine] isExtractExtractCheap - specify the extract/insert shuffle mask to improve shuffle costs
This shuffle mask is so focused, the cost model is very likely to be able to determine a specific (lower) cost
1 parent 4c9cb97 commit 7b99818

File tree

5 files changed

+80
-67
lines changed

5 files changed

+80
-67
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
450450
// TODO: Evaluate whether that always results in lowest cost. Alternatively,
451451
// check the cost of creating a broadcast shuffle and shuffling both
452452
// operands to element 0.
453+
unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
454+
unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
453455
InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
454456

455457
// Extra uses of the extracts mean that we include those costs in the
@@ -485,8 +487,16 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
485487
// ShufMask = { poison, poison, 0, poison }
486488
// TODO: The cost model has an option for a "broadcast" shuffle
487489
// (splat-from-element-0), but no option for a more general splat.
488-
NewCost +=
489-
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
490+
if (auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
491+
SmallVector<int> ShuffleMask;
492+
ShuffleMask.append(FixedVecTy->getNumElements(), PoisonMaskElem);
493+
ShuffleMask[BestInsIndex] = BestExtIndex;
494+
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
495+
VecTy, ShuffleMask);
496+
} else {
497+
NewCost +=
498+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
499+
}
490500
}
491501

492502
// Aggressively form a vector op if the cost is equal because the transform

llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,24 +30,14 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
3030
; SSE4-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP3]], double [[ADD12]], i64 3
3131
; SSE4-NEXT: ret <4 x double> [[SHUFFLE]]
3232
;
33-
; AVX1-LABEL: @PR50392(
34-
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
35-
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
36-
; AVX1-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
37-
; AVX1-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
38-
; AVX1-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
39-
; AVX1-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
40-
; AVX1-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3
41-
; AVX1-NEXT: ret <4 x double> [[SHUFFLE]]
42-
;
43-
; AVX2-LABEL: @PR50392(
44-
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
45-
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
46-
; AVX2-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
47-
; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
48-
; AVX2-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
49-
; AVX2-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
50-
; AVX2-NEXT: ret <4 x double> [[SHUFFLE]]
33+
; AVX-LABEL: @PR50392(
34+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
35+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
36+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
37+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
38+
; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
39+
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
40+
; AVX-NEXT: ret <4 x double> [[SHUFFLE]]
5141
;
5242
%vecext = extractelement <4 x double> %a, i32 0
5343
%vecext1 = extractelement <4 x double> %a, i32 1
@@ -69,5 +59,6 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
6959
ret <4 x double> %shuffle
7060
}
7161
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
72-
; AVX: {{.*}}
62+
; AVX1: {{.*}}
63+
; AVX2: {{.*}}
7364
; SSE: {{.*}}

llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -541,18 +541,18 @@ define i32 @load_extract_clobber_store_between(ptr %x, ptr %y) {
541541
define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %z) {
542542
; CHECK-LABEL: @load_extract_clobber_store_between_limit(
543543
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
544-
; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
545-
; CHECK-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
546-
; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
547-
; CHECK-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
548-
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
549-
; CHECK-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
550-
; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
551-
; CHECK-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
552-
; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
544+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
545+
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
546+
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
547+
; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
548+
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
549+
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
550+
; CHECK-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
551+
; CHECK-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
552+
; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
553553
; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1
554554
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
555-
; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
555+
; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
556556
; CHECK-NEXT: ret i32 [[ADD_4]]
557557
;
558558
%lv = load <4 x i32>, ptr %x
@@ -573,35 +573,35 @@ define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %
573573

574574
define i32 @load_extract_clobber_store_after_limit(ptr %x, ptr %y, <8 x i32> %z) {
575575
; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit(
576-
; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
577-
; LIMIT-DEFAULT-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
578-
; LIMIT-DEFAULT-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
579-
; LIMIT-DEFAULT-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
580-
; LIMIT-DEFAULT-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
581-
; LIMIT-DEFAULT-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
582-
; LIMIT-DEFAULT-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
583-
; LIMIT-DEFAULT-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
584-
; LIMIT-DEFAULT-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
576+
; LIMIT-DEFAULT-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
577+
; LIMIT-DEFAULT-NEXT: [[TMP4:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
578+
; LIMIT-DEFAULT-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
579+
; LIMIT-DEFAULT-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP4]], [[SHIFT1]]
580+
; LIMIT-DEFAULT-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
581+
; LIMIT-DEFAULT-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
582+
; LIMIT-DEFAULT-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
583+
; LIMIT-DEFAULT-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
584+
; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
585585
; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
586586
; LIMIT-DEFAULT-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8
587587
; LIMIT-DEFAULT-NEXT: store i8 0, ptr [[Y:%.*]], align 1
588-
; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
588+
; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
589589
; LIMIT-DEFAULT-NEXT: ret i32 [[ADD_4]]
590590
;
591591
; LIMIT2-LABEL: @load_extract_clobber_store_after_limit(
592592
; LIMIT2-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
593-
; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
594-
; LIMIT2-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
595-
; LIMIT2-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
596-
; LIMIT2-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
597-
; LIMIT2-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
598-
; LIMIT2-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
599-
; LIMIT2-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
600-
; LIMIT2-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
601-
; LIMIT2-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
593+
; LIMIT2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
594+
; LIMIT2-NEXT: [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
595+
; LIMIT2-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
596+
; LIMIT2-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
597+
; LIMIT2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
598+
; LIMIT2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
599+
; LIMIT2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
600+
; LIMIT2-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
601+
; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
602602
; LIMIT2-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
603603
; LIMIT2-NEXT: store i8 0, ptr [[Y:%.*]], align 1
604-
; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
604+
; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
605605
; LIMIT2-NEXT: ret i32 [[ADD_4]]
606606
;
607607
%lv = load <4 x i32>, ptr %x
@@ -671,9 +671,9 @@ define i1 @load_with_non_power_of_2_element_type_2(ptr %x) {
671671
define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
672672
; CHECK-LABEL: @load_multiple_extracts_with_constant_idx(
673673
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
674-
; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i32 0
675-
; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i32 1
676-
; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
674+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
675+
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]]
676+
; CHECK-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
677677
; CHECK-NEXT: ret i32 [[RES]]
678678
;
679679
%lv = load <4 x i32>, ptr %x
@@ -688,9 +688,9 @@ define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
688688
define i32 @load_multiple_extracts_with_constant_idx_profitable(ptr %x) {
689689
; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable(
690690
; CHECK-NEXT: [[LV:%.*]] = load <8 x i32>, ptr [[X:%.*]], align 16
691-
; CHECK-NEXT: [[E_0:%.*]] = extractelement <8 x i32> [[LV]], i32 0
692-
; CHECK-NEXT: [[E_1:%.*]] = extractelement <8 x i32> [[LV]], i32 6
693-
; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
691+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[LV]], <8 x i32> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
692+
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[LV]], [[SHIFT]]
693+
; CHECK-NEXT: [[RES:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
694694
; CHECK-NEXT: ret i32 [[RES]]
695695
;
696696
%lv = load <8 x i32>, ptr %x, align 16

llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,11 +404,17 @@ define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
404404
}
405405

406406
define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
407-
; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
408-
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409-
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410-
; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411-
; CHECK-NEXT: ret float [[R]]
407+
; SSE-LABEL: @ext14_ext15_fmul_v16f32(
408+
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409+
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410+
; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411+
; SSE-NEXT: ret float [[R]]
412+
;
413+
; AVX-LABEL: @ext14_ext15_fmul_v16f32(
414+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison>
415+
; AVX-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]]
416+
; AVX-NEXT: [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
417+
; AVX-NEXT: ret float [[R]]
412418
;
413419
%e0 = extractelement <16 x float> %x, i32 14
414420
%e1 = extractelement <16 x float> %x, i32 15

llvm/test/Transforms/VectorCombine/X86/extract-binop.ll

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,11 +404,17 @@ define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
404404
}
405405

406406
define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
407-
; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
408-
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409-
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410-
; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411-
; CHECK-NEXT: ret float [[R]]
407+
; SSE-LABEL: @ext14_ext15_fmul_v16f32(
408+
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409+
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410+
; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411+
; SSE-NEXT: ret float [[R]]
412+
;
413+
; AVX-LABEL: @ext14_ext15_fmul_v16f32(
414+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison>
415+
; AVX-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]]
416+
; AVX-NEXT: [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
417+
; AVX-NEXT: ret float [[R]]
412418
;
413419
%e0 = extractelement <16 x float> %x, i32 14
414420
%e1 = extractelement <16 x float> %x, i32 15

0 commit comments

Comments
 (0)